def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
class TestNodeEstimator(MongoDBTestCase):
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

    ## DEF

    def testEstimateNodesEquality(self):
        """Check the estimating touched nodes for a equality predicate op"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])
            # Only put the first field in the interesting list as the sharding key
            # We'll worry about compound sharding keys later.
            d.addShardKey(col_info['name'], col_info['interesting'][:1])
        ## FOR

        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        #        pprint(op)

        # If we execute it twice, we should get back the exact same node ids
        touched0 = list(self.estimator.estimateNodes(d, op))
        touched1 = list(self.estimator.estimateNodes(d, op))
        self.assertListEqual(touched0, touched1)

    ## DEF

    def testEstimateNodesRange(self):
        """Check the estimating touched nodes for a range predicate op"""

        col_info = self.collections[COLLECTION_NAMES[0]]
        shard_key = col_info['interesting'][0]
        col_info['fields'][shard_key]['selectivity'] = 0.5

        d = Design()
        d.addCollection(col_info['name'])
        d.addShardKey(col_info['name'], [shard_key])

        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        op['query_content'] = [ {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": \
                {shard_key: {constants.REPLACE_KEY_DOLLAR_PREFIX+"gt": 10000l} } \
        } ]
        op['predicates'] = {shard_key: constants.PRED_TYPE_RANGE}

        # The list estimated touched nodes should contain more than one entry
        touched0 = list(self.estimator.estimateNodes(d, op))
        print "touched0:", touched0
        self.assertGreater(len(touched0), 1)

    ## DEF

    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info['name'], ['XXXX'])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op['type'] = constants.OP_TYPE_INSERT
        op['query_content'] = op['resp_content']
        op['predicates'] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op['collection'] = COLLECTION_NAMES[0]
        op['type'] = constants.OP_TYPE_INSERT
        op['query_id'] = 10000
        op['query_content'] = [{"parkinglot": 1234}]
        op['resp_content'] = [{"ok": 1}]
        op['resp_id'] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)
class TestNodeEstimator(MongoDBTestCase):
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)

    ## DEF

    def testEstimateNodesEquality(self):
        """Check the estimating touched nodes for a equality predicate op"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info["name"])
            # Only put the first field in the interesting list as the sharding key
            # We'll worry about compound sharding keys later.
            d.addShardKey(col_info["name"], col_info["interesting"][:1])
        ## FOR

        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        #        pprint(op)

        # If we execute it twice, we should get back the exact same node ids
        touched0 = list(self.estimator.estimateNodes(d, op))
        touched1 = list(self.estimator.estimateNodes(d, op))
        self.assertListEqual(touched0, touched1)

    ## DEF

    def testEstimateNodesRange(self):
        """Check the estimating touched nodes for a range predicate op"""

        col_info = self.collections[COLLECTION_NAMES[0]]
        shard_key = col_info["interesting"][0]
        col_info["fields"][shard_key]["selectivity"] = 0.5

        d = Design()
        d.addCollection(col_info["name"])
        d.addShardKey(col_info["name"], [shard_key])

        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        op["query_content"] = [
            {
                constants.REPLACE_KEY_DOLLAR_PREFIX
                + "query": {shard_key: {constants.REPLACE_KEY_DOLLAR_PREFIX + "gt": 10000L}}
            }
        ]
        op["predicates"] = {shard_key: constants.PRED_TYPE_RANGE}

        # The list estimated touched nodes should contain more than one entry
        touched0 = list(self.estimator.estimateNodes(d, op))
        print "touched0:", touched0
        self.assertGreater(len(touched0), 1)

    ## DEF

    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info["name"])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info["name"], ["XXXX"])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op["type"] = constants.OP_TYPE_INSERT
        op["query_content"] = op["resp_content"]
        op["predicates"] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op["collection"] = COLLECTION_NAMES[0]
        op["type"] = constants.OP_TYPE_INSERT
        op["query_id"] = 10000
        op["query_content"] = [{"parkinglot": 1234}]
        op["resp_content"] = [{"ok": 1}]
        op["resp_id"] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)