def setUp(self): MongoDBTestCase.setUp(self) # Generate some fake workload sessions for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234+i) sess['ip_server'] = "server:5678" sess['start_time'] = time.time() sess['end_time'] = time.time() + 5 for j in xrange(0, NUM_OPS_PER_SESSION): _id = str(random.random()) queryId = long((i<<16) + j) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": { "_id": [ _id ] } } responseContent = {"_id": _id} responseId = (queryId<<8) for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) else: responseContent[f_name] = str(random.randint(1000, 100000)) ## FOR op = workload.Session.operationFactory() op['collection'] = COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId sess['operations'].append(op) ## FOR (ops) sess.save() ## FOR (sess) self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.reconstructor = Reconstructor(self.metadata_db, self.dataset_db)
def testPostProcess(self): """ Check whether we can successfully extract the database schema into our internal catalog """ Reconstructor(self.metadata_db, self.dataset_db).process() self.converter.postProcess() col_info = self.metadata_db.Collection.one({"name": COLLECTION_NAME}) # pprint(col_info) # Workload-derived Attributes self.assertEqual(NUM_SESSIONS*NUM_OPS_PER_SESSION, col_info['workload_queries']) self.assertAlmostEqual(1.0, col_info['workload_percent']) # Fields # Add one for the '_id' field count self.assertEqual(NUM_FIELDS + 1, len(col_info['fields'])) for k,field in col_info['fields'].iteritems(): self.assertEqual(NUM_SESSIONS*NUM_OPS_PER_SESSION, field['query_use_count'])
class TestReconstructor(MongoDBTestCase): def setUp(self): MongoDBTestCase.setUp(self) # Generate some fake workload sessions for i in xrange(0, NUM_SESSIONS): sess = self.metadata_db.Session() sess['session_id'] = i sess['ip_client'] = "client:%d" % (1234+i) sess['ip_server'] = "server:5678" sess['start_time'] = time.time() sess['end_time'] = time.time() + 5 for j in xrange(0, NUM_OPS_PER_SESSION): _id = str(random.random()) queryId = long((i<<16) + j) queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": { "_id": [ _id ] } } responseContent = {"_id": _id} responseId = (queryId<<8) for f in xrange(0, NUM_FIELDS): f_name = "field%02d" % f if f % 2 == 0: responseContent[f_name] = random.randint(0, 100) else: responseContent[f_name] = str(random.randint(1000, 100000)) ## FOR op = workload.Session.operationFactory() op['collection'] = COLLECTION_NAME op['type'] = constants.OP_TYPE_QUERY op['query_id'] = queryId op['query_content'] = [ queryContent ] op['resp_content'] = [ responseContent ] op['resp_id'] = responseId sess['operations'].append(op) ## FOR (ops) sess.save() ## FOR (sess) self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count()) self.reconstructor = Reconstructor(self.metadata_db, self.dataset_db) ## DEF def testProcess(self): """ Check whether the reconstructed database includes the fields that were returned in the query responses """ self.reconstructor.process() fields = [ "field%02d" % f for f in xrange(0, NUM_FIELDS) ] num_docs = 0 for doc in self.dataset_db[COLLECTION_NAME].find(): # pprint(doc) for f in fields: self.assertIn(f, doc.keys()) num_docs += 1 # We should always have one per operation self.assertEquals(num_docs, NUM_SESSIONS * NUM_OPS_PER_SESSION)