def setUp(self): MongoDBTestCase.setUp(self) random.seed(0) # Needed for TPC-C code self.rng = random.Random(0) self.timestamp = time.time() self.query_id = 0l self.resp_id = 0l sp = scaleparameters.makeWithScaleFactor(TPCCTestCase.NUM_WAREHOUSES, TPCCTestCase.SCALEFACTOR) executor = Executor(sp) # WORKLOAD for i in xrange(TPCCTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234 + i) sess['ip_server'] = "server:5678" sess['start_time'] = self.timestamp txn, params = executor.doOne() if tpccConstants.TransactionTypes.DELIVERY == txn: sess['operations'] = self.createDelivery(params) elif tpccConstants.TransactionTypes.NEW_ORDER == txn: sess['operations'] = self.createNewOrder(params) elif tpccConstants.TransactionTypes.ORDER_STATUS == txn: sess['operations'] = self.createOrderStatus(params) elif tpccConstants.TransactionTypes.PAYMENT == txn: sess['operations'] = self.createPayment(params) elif tpccConstants.TransactionTypes.STOCK_LEVEL == txn: sess['operations'] = self.createStockLevel(params) else: assert False, "Unexpected TransactionType: " + txn sess['end_time'] = self.nextTimestamp(2) sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(TPCCTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = 10000 col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info[ 'avg_doc_size'] / (4 * 1024) for k, v in col_info['fields'].iteritems(): if col_name == tpccConstants.TABLENAME_ORDER_LINE: v['parent_col'] = tpccConstants.TABLENAME_ORDERS col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': 10, 'address_size': 64, 'nodes': 10, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() for i in xrange(CostModelTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234 + i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) for f in xrange(0, CostModelTestCase.NUM_FIELDS): if j == 0: f_name = "field%02d" % 0 elif j == 1: f_name = "field%02d" % 1 else: f_name = "field%02d" % 2 responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections)) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info[ 'avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 3 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def setUp(self): MongoDBTestCase.setUp(self) field00_value = 0 field01_value = 0 field02_value = 9999999 # WORKLOAD timestamp = time.time() for i in xrange(CostModelTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234+i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i<<16) + j) queryContent = { } queryPredicates = { } responseContent = {"_id": _id} responseId = (queryId<<8) for f in xrange(0, CostModelTestCase.NUM_FIELDS): f_name = "field%02d" % f if f == 0: responseContent[f_name] = field00_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field00_value += 1 elif f == 1: responseContent[f_name] = field01_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field01_value += 1 else: responseContent[f_name] = field02_value queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY field02_value -= 1 ## ELSE ## FOR queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections)) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 1024 } self.state = State(self.collections, populated_workload, self.costModelConfig) ## DEF ## CLASS
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD self.workload = [] timestamp = time.time() for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234 + i) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp for j in xrange(0, len(COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY else: responseContent[f_name] = str( random.randint(1000, 100000)) ## FOR queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = COLLECTION_NAMES[j] op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) ## FOR (ops) sess['end_time'] = timestamp timestamp += 2 sess.save() self.workload.append(sess) ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(COLLECTION_NAMES), len(self.collections)) self.estimator = NodeEstimator(self.collections, NUM_NODES)
def setUp(self): MongoDBTestCase.setUp(self) random.seed(0) # Needed for TPC-C code self.rng = random.Random(0) self.timestamp = time.time() self.query_id = 0l self.resp_id = 0l sp = scaleparameters.makeWithScaleFactor(TPCCTestCase.NUM_WAREHOUSES, TPCCTestCase.SCALEFACTOR) executor = Executor(sp) # WORKLOAD for i in xrange(TPCCTestCase.NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234+i) sess['ip_server'] = "server:5678" sess['start_time'] = self.timestamp txn, params = executor.doOne() if tpccConstants.TransactionTypes.DELIVERY == txn: sess['operations'] = self.createDelivery(params) elif tpccConstants.TransactionTypes.NEW_ORDER == txn: sess['operations'] = self.createNewOrder(params) elif tpccConstants.TransactionTypes.ORDER_STATUS == txn: sess['operations'] = self.createOrderStatus(params) elif tpccConstants.TransactionTypes.PAYMENT == txn: sess['operations'] = self.createPayment(params) elif tpccConstants.TransactionTypes.STOCK_LEVEL == txn: sess['operations'] = self.createStockLevel(params) else: assert False, "Unexpected TransactionType: " + txn sess['end_time'] = self.nextTimestamp(2) sess.save() ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(TPCCTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = 10000 col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024) for k,v in col_info['fields'].iteritems(): if col_name == tpccConstants.TABLENAME_ORDER_LINE: v['parent_col'] = tpccConstants.TABLENAME_ORDERS col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': 10, 'address_size': 64, 'nodes': 10, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD self.workload = [] timestamp = time.time() for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess["session_id"] = i sess["ip_client"] = "client:%d" % (1234 + i) sess["ip_server"] = "server:5678" sess["start_time"] = timestamp for j in xrange(0, len(COLLECTION_NAMES)): _id = str(random.random()) queryId = long((i << 16) + j) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = queryId << 8 for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) queryContent[f_name] = responseContent[f_name] queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY else: responseContent[f_name] = str(random.randint(1000, 100000)) ## FOR queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent} op = Session.operationFactory() op["collection"] = COLLECTION_NAMES[j] op["type"] = constants.OP_TYPE_QUERY op["query_id"] = queryId op["query_content"] = [queryContent] op["resp_content"] = [responseContent] op["resp_id"] = responseId op["predicates"] = queryPredicates op["query_time"] = timestamp timestamp += 1 op["resp_time"] = timestamp sess["operations"].append(op) ## FOR (ops) sess["end_time"] = timestamp timestamp += 2 sess.save() self.workload.append(sess) ## FOR (sess) # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()]) self.assertEqual(len(COLLECTION_NAMES), len(self.collections)) self.estimator = NodeEstimator(self.collections, NUM_NODES)
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() sess = self.metadata_db.Session() sess['session_id'] = 0 sess['ip_client'] = "client:%d" % (1234+0) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp # generate query 0 querying field00 _id = str(random.random()) queryId = long((0<<16) + 0) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 1 querying field01 _id = str(random.random()) queryId = long((1<<16) + 1) queryContent = { } queryPredicates = { } responseContent = {"_id": _id} responseId = (queryId<<8) projectionField = { } responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 2 querying field00, field01 _id = str(random.random()) queryId = long((2<<16) + 2) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 3 querying field00, field01 but without projection field _id = str(random.random()) queryId = long((2<<16) + 3) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 4 querying field00, field01 but it goes to collection 2 _id = str(random.random()) queryId = long((2<<16) + 4) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_2 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 5 querying field00 but it goes to collection 3 _id = str(random.random()) queryId = long((2<<16) + 5) queryContent = { } queryPredicates = { } projectionField = { } responseContent = {"_id": _id} responseId = (queryId<<8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_3 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 sess.save() # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)
def setUp(self): MongoDBTestCase.setUp(self) # WORKLOAD timestamp = time.time() sess = self.metadata_db.Session() sess['session_id'] = 0 sess['ip_client'] = "client:%d" % (1234 + 0) sess['ip_server'] = "server:5678" sess['start_time'] = timestamp # generate query 0 querying field00 _id = str(random.random()) queryId = long((0 << 16) + 0) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 1 querying field01 _id = str(random.random()) queryId = long((1 << 16) + 1) queryContent = {} queryPredicates = {} responseContent = {"_id": _id} responseId = (queryId << 8) projectionField = {} responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) # generate query 2 querying field00, field01 _id = str(random.random()) queryId = long((2 << 16) + 2) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY projectionField['field02'] = random.randint(0, 100) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 3 querying field00, field01 but without projection field _id = str(random.random()) queryId = long((2 << 16) + 3) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field01'] = random.randint(0, 100) queryContent['field01'] = responseContent['field01'] queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 4 querying field00, field01 but it goes to collection 2 _id = str(random.random()) queryId = long((2 << 16) + 4) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_2 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 # generate query 5 querying field00 but it goes to collection 3 _id = str(random.random()) queryId = long((2 << 16) + 5) queryContent = {} queryPredicates = {} projectionField = {} responseContent = {"_id": _id} responseId = (queryId << 8) responseContent['field00'] = random.randint(0, 100) queryContent['field00'] = responseContent['field00'] queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent } op = Session.operationFactory() op['collection'] = CostModelTestCase.COLLECTION_NAME_3 op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [queryContent] op['resp_content'] = [responseContent] op['resp_id'] = responseId op['predicates'] = queryPredicates op['query_time'] = timestamp op['query_fields'] = projectionField timestamp += 1 op['resp_time'] = timestamp sess['operations'].append(op) sess['end_time'] = timestamp timestamp += 1 sess.save() # Use the MongoSniffConverter to populate our metadata converter = MongoSniffConverter(self.metadata_db, self.dataset_db) converter.no_mongo_parse = True converter.no_mongo_sessionizer = True converter.process() self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count()) self.collections = dict([(c['name'], c) for c in self.metadata_db.Collection.fetch()]) populated_workload = list(c for c in self.metadata_db.Session.fetch()) self.workload = populated_workload # Increase the database size beyond what the converter derived from the workload for col_name, col_info in self.collections.iteritems(): col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS col_info['avg_doc_size'] = 1024 # bytes col_info['max_pages'] = col_info['doc_count'] * col_info[ 'avg_doc_size'] / (4 * 1024) col_info.save() # print pformat(col_info) self.costModelConfig = { 'max_memory': 1024, # MB 'skew_intervals': CostModelTestCase.NUM_INTERVALS, 'address_size': 64, 'nodes': CostModelTestCase.NUM_NODES, 'window_size': 10 } self.state = State(self.collections, populated_workload, self.costModelConfig)