예제 #1
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        random.seed(0)  # Needed for TPC-C code
        self.rng = random.Random(0)
        self.timestamp = time.time()
        self.query_id = 0l
        self.resp_id = 0l

        sp = scaleparameters.makeWithScaleFactor(TPCCTestCase.NUM_WAREHOUSES,
                                                 TPCCTestCase.SCALEFACTOR)
        executor = Executor(sp)

        # WORKLOAD
        for i in xrange(TPCCTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = self.timestamp

            txn, params = executor.doOne()
            if tpccConstants.TransactionTypes.DELIVERY == txn:
                sess['operations'] = self.createDelivery(params)
            elif tpccConstants.TransactionTypes.NEW_ORDER == txn:
                sess['operations'] = self.createNewOrder(params)
            elif tpccConstants.TransactionTypes.ORDER_STATUS == txn:
                sess['operations'] = self.createOrderStatus(params)
            elif tpccConstants.TransactionTypes.PAYMENT == txn:
                sess['operations'] = self.createPayment(params)
            elif tpccConstants.TransactionTypes.STOCK_LEVEL == txn:
                sess['operations'] = self.createStockLevel(params)
            else:
                assert False, "Unexpected TransactionType: " + txn

            sess['end_time'] = self.nextTimestamp(2)
            sess.save()
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(TPCCTestCase.NUM_SESSIONS,
                         self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload

        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = 10000
            col_info['avg_doc_size'] = 1024  # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info[
                'avg_doc_size'] / (4 * 1024)
            for k, v in col_info['fields'].iteritems():
                if col_name == tpccConstants.TABLENAME_ORDER_LINE:
                    v['parent_col'] = tpccConstants.TABLENAME_ORDERS
            col_info.save()
            # print pformat(col_info)

        self.costModelConfig = {
            'max_memory': 1024,  # MB
            'skew_intervals': 10,
            'address_size': 64,
            'nodes': 10,
            'window_size': 10
        }

        self.state = State(self.collections, populated_workload,
                           self.costModelConfig)
예제 #2
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()
        for i in xrange(CostModelTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, CostModelTestCase.NUM_FIELDS):
                    if j == 0:
                        f_name = "field%02d" % 0
                    elif j == 1:
                        f_name = "field%02d" % 1
                    else:
                        f_name = "field%02d" % 2
                    responseContent[f_name] = random.randint(0, 100)
                    queryContent[f_name] = responseContent[f_name]
                    queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = CostModelTestCase.COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates
                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp
                sess['operations'].append(op)
                ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS,
                         self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES),
                         len(self.collections))

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload

        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024  # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info[
                'avg_doc_size'] / (4 * 1024)
            col_info.save()
            #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory': 1024,  # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size': 64,
            'nodes': CostModelTestCase.NUM_NODES,
            'window_size': 3
        }

        self.state = State(self.collections, populated_workload,
                           self.costModelConfig)
예제 #3
0
    def setUp(self):
        MongoDBTestCase.setUp(self)
        field00_value = 0
        field01_value = 0
        field02_value = 9999999
        
        # WORKLOAD
        timestamp = time.time()
        for i in xrange(CostModelTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234+i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i<<16) + j)
                queryContent = { }
                queryPredicates = { }

                responseContent = {"_id": _id}
                responseId = (queryId<<8)
                for f in xrange(0, CostModelTestCase.NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f == 0:
                        responseContent[f_name] = field00_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field00_value += 1
                    elif f == 1:
                        responseContent[f_name] = field01_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field01_value += 1
                    else:
                        responseContent[f_name] = field02_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field02_value -= 1
                    ## ELSE
                ## FOR

                queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
                op = Session.operationFactory()
                op['collection']    = CostModelTestCase.COLLECTION_NAMES[j]
                op['type']          = constants.OP_TYPE_QUERY
                op['query_id']      = queryId
                op['query_content'] = [ queryContent ]
                op['resp_content']  = [ responseContent ]
                op['resp_id']       = responseId
                op['predicates']    = queryPredicates
                op['query_time']    = timestamp
                timestamp += 1
                op['resp_time']    = timestamp
                sess['operations'].append(op)
                ## FOR (ops)
                
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections))

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024 # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory':     1024, # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size':   64,
            'nodes':          CostModelTestCase.NUM_NODES,
            'window_size':    1024
        }

        self.state = State(self.collections, populated_workload, self.costModelConfig)
    ## DEF
## CLASS
예제 #4
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
예제 #5
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        random.seed(0) # Needed for TPC-C code
        self.rng = random.Random(0)
        self.timestamp = time.time()
        self.query_id = 0l
        self.resp_id = 0l
        
        sp = scaleparameters.makeWithScaleFactor(TPCCTestCase.NUM_WAREHOUSES, TPCCTestCase.SCALEFACTOR)
        executor = Executor(sp)
        
        # WORKLOAD
        for i in xrange(TPCCTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234+i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = self.timestamp
            
            txn, params = executor.doOne()
            if tpccConstants.TransactionTypes.DELIVERY == txn:
                sess['operations'] = self.createDelivery(params)
            elif tpccConstants.TransactionTypes.NEW_ORDER == txn:
                sess['operations'] = self.createNewOrder(params)
            elif tpccConstants.TransactionTypes.ORDER_STATUS == txn:
                sess['operations'] = self.createOrderStatus(params)
            elif tpccConstants.TransactionTypes.PAYMENT == txn:
                sess['operations'] = self.createPayment(params)
            elif tpccConstants.TransactionTypes.STOCK_LEVEL == txn:
                sess['operations'] = self.createStockLevel(params)
            else:
                assert False, "Unexpected TransactionType: " + txn

            sess['end_time'] = self.nextTimestamp(2)
            sess.save()
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(TPCCTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])
        
        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = 10000
            col_info['avg_doc_size'] = 1024 # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
            for k,v in col_info['fields'].iteritems():
                if col_name == tpccConstants.TABLENAME_ORDER_LINE:
                    v['parent_col'] = tpccConstants.TABLENAME_ORDERS
            col_info.save()
            # print pformat(col_info)
            
        self.costModelConfig = {
            'max_memory':     1024, # MB
            'skew_intervals': 10,
            'address_size':   64,
            'nodes':          10,
            'window_size':    10
        }

                    
        self.state = State(self.collections, populated_workload, self.costModelConfig)
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()

        sess = self.metadata_db.Session()
        sess['session_id'] = 0
        sess['ip_client'] = "client:%d" % (1234+0)
        sess['ip_server'] = "server:5678"
        sess['start_time'] = timestamp

        # generate query 0 querying field00
        _id = str(random.random())
        queryId = long((0<<16) + 0)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        # generate query 1 querying field01
        _id = str(random.random())
        queryId = long((1<<16) + 1)
        queryContent = { }
        queryPredicates = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)
        projectionField = { }

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        # generate query 2 querying field00, field01
        _id = str(random.random())
        queryId = long((2<<16) + 2)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 3 querying field00, field01 but without projection field
        _id = str(random.random())
        queryId = long((2<<16) + 3)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 4 querying field00, field01 but it goes to collection 2
        _id = str(random.random())
        queryId = long((2<<16) + 4)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME_2
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1
        
        # generate query 5 querying field00 but it goes to collection 3
        _id = str(random.random())
        queryId = long((2<<16) + 5)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME_3
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1
        
        sess.save()

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024 # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory':     1024, # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size':   64,
            'nodes':          CostModelTestCase.NUM_NODES,
            'window_size':    10
        }

        self.state = State(self.collections, populated_workload, self.costModelConfig)
예제 #8
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()

        sess = self.metadata_db.Session()
        sess['session_id'] = 0
        sess['ip_client'] = "client:%d" % (1234 + 0)
        sess['ip_server'] = "server:5678"
        sess['start_time'] = timestamp

        # generate query 0 querying field00
        _id = str(random.random())
        queryId = long((0 << 16) + 0)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        # generate query 1 querying field01
        _id = str(random.random())
        queryId = long((1 << 16) + 1)
        queryContent = {}
        queryPredicates = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)
        projectionField = {}

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        # generate query 2 querying field00, field01
        _id = str(random.random())
        queryId = long((2 << 16) + 2)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 3 querying field00, field01 but without projection field
        _id = str(random.random())
        queryId = long((2 << 16) + 3)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 4 querying field00, field01 but it goes to collection 2
        _id = str(random.random())
        queryId = long((2 << 16) + 4)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME_2
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 5 querying field00 but it goes to collection 3
        _id = str(random.random())
        queryId = long((2 << 16) + 5)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME_3
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        sess.save()

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS,
                         self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024  # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info[
                'avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory': 1024,  # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size': 64,
            'nodes': CostModelTestCase.NUM_NODES,
            'window_size': 10
        }

        self.state = State(self.collections, populated_workload,
                           self.costModelConfig)