Python NodeEstimator Examples

Programming Language: Python

Namespace/Package Name: costmodel

Class/Type: NodeEstimator

Examples at hotexamples.com: 4

Python NodeEstimator - 4 examples found. These are the top rated real world Python examples of costmodel.NodeEstimator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NodeEstimator(1)

estimateNodes(1)

Example #1

Show file

File: unittest_nodeestimator.py Project: greinerb/mongodb-d4

    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

Example #2

Show file

File: unittest_nodeestimator.py Project: theseusyang/mongodb-d4

    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

Example #3

Show file

File: unittest_nodeestimator.py Project: greinerb/mongodb-d4

class TestNodeEstimator(MongoDBTestCase):
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

    ## DEF

    def testEstimateNodesEquality(self):
        """Check the estimating touched nodes for a equality predicate op"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])
            # Only put the first field in the interesting list as the sharding key
            # We'll worry about compound sharding keys later.
            d.addShardKey(col_info['name'], col_info['interesting'][:1])
        ## FOR

        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        #        pprint(op)

        # If we execute it twice, we should get back the exact same node ids
        touched0 = list(self.estimator.estimateNodes(d, op))
        touched1 = list(self.estimator.estimateNodes(d, op))
        self.assertListEqual(touched0, touched1)

    ## DEF

    def testEstimateNodesRange(self):
        """Check the estimating touched nodes for a range predicate op"""

        col_info = self.collections[COLLECTION_NAMES[0]]
        shard_key = col_info['interesting'][0]
        col_info['fields'][shard_key]['selectivity'] = 0.5

        d = Design()
        d.addCollection(col_info['name'])
        d.addShardKey(col_info['name'], [shard_key])

        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        op['query_content'] = [ {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": \
                {shard_key: {constants.REPLACE_KEY_DOLLAR_PREFIX+"gt": 10000l} } \
        } ]
        op['predicates'] = {shard_key: constants.PRED_TYPE_RANGE}

        # The list estimated touched nodes should contain more than one entry
        touched0 = list(self.estimator.estimateNodes(d, op))
        print "touched0:", touched0
        self.assertGreater(len(touched0), 1)

    ## DEF

    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info['name'], ['XXXX'])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op['type'] = constants.OP_TYPE_INSERT
        op['query_content'] = op['resp_content']
        op['predicates'] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op['collection'] = COLLECTION_NAMES[0]
        op['type'] = constants.OP_TYPE_INSERT
        op['query_id'] = 10000
        op['query_content'] = [{"parkinglot": 1234}]
        op['resp_content'] = [{"ok": 1}]
        op['resp_id'] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)

Example #4

Show file

File: unittest_nodeestimator.py Project: theseusyang/mongodb-d4

class TestNodeEstimator(MongoDBTestCase):
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

    ## DEF

    def testEstimateNodesEquality(self):
        """Check the estimating touched nodes for a equality predicate op"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info["name"])
            # Only put the first field in the interesting list as the sharding key
            # We'll worry about compound sharding keys later.
            d.addShardKey(col_info["name"], col_info["interesting"][:1])
        ## FOR

        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        #        pprint(op)

        # If we execute it twice, we should get back the exact same node ids
        touched0 = list(self.estimator.estimateNodes(d, op))
        touched1 = list(self.estimator.estimateNodes(d, op))
        self.assertListEqual(touched0, touched1)

    ## DEF

    def testEstimateNodesRange(self):
        """Check the estimating touched nodes for a range predicate op"""

        col_info = self.collections[COLLECTION_NAMES[0]]
        shard_key = col_info["interesting"][0]
        col_info["fields"][shard_key]["selectivity"] = 0.5

        d = Design()
        d.addCollection(col_info["name"])
        d.addShardKey(col_info["name"], [shard_key])

        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        op["query_content"] = [
            {
                constants.REPLACE_KEY_DOLLAR_PREFIX
                + "query": {shard_key: {constants.REPLACE_KEY_DOLLAR_PREFIX + "gt": 10000L}}
            }
        ]
        op["predicates"] = {shard_key: constants.PRED_TYPE_RANGE}

        # The list estimated touched nodes should contain more than one entry
        touched0 = list(self.estimator.estimateNodes(d, op))
        print "touched0:", touched0
        self.assertGreater(len(touched0), 1)

    ## DEF

    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info["name"])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info["name"], ["XXXX"])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op["type"] = constants.OP_TYPE_INSERT
        op["query_content"] = op["resp_content"]
        op["predicates"] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op["collection"] = COLLECTION_NAMES[0]
        op["type"] = constants.OP_TYPE_INSERT
        op["query_id"] = 10000
        op["query_content"] = [{"parkinglot": 1234}]
        op["resp_content"] = [{"ok": 1}]
        op["resp_id"] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)