def setUp(self):
        MongoDBTestCase.setUp(self)

        # Generate some fake workload sessions
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234+i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = time.time()
            sess['end_time'] = time.time() + 5
            for j in xrange(0, NUM_OPS_PER_SESSION):
                _id = str(random.random())
                queryId = long((i<<16) + j)
                queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query":
                                 { "_id": [ _id ] } }

                responseContent = {"_id": _id}
                responseId = (queryId<<8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                ## FOR

                op = workload.Session.operationFactory()
                op['collection']    = COLLECTION_NAME
                op['type']          = constants.OP_TYPE_QUERY
                op['query_id']      = queryId
                op['query_content'] = [ queryContent ]
                op['resp_content']  = [ responseContent ]
                op['resp_id']       = responseId
                sess['operations'].append(op)
            ## FOR (ops)

            sess.save()
        ## FOR (sess)
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.reconstructor = Reconstructor(self.metadata_db, self.dataset_db)
    def testPostProcess(self):
        """
            Check whether we can successfully extract the database schema
            into our internal catalog
        """
        Reconstructor(self.metadata_db, self.dataset_db).process()
        self.converter.postProcess()

        col_info = self.metadata_db.Collection.one({"name": COLLECTION_NAME})
#        pprint(col_info)

        # Workload-derived Attributes
        self.assertEqual(NUM_SESSIONS*NUM_OPS_PER_SESSION, col_info['workload_queries'])
        self.assertAlmostEqual(1.0, col_info['workload_percent'])

        # Fields
        # Add one for the '_id' field count
        self.assertEqual(NUM_FIELDS + 1, len(col_info['fields']))
        for k,field in col_info['fields'].iteritems():
            self.assertEqual(NUM_SESSIONS*NUM_OPS_PER_SESSION, field['query_use_count'])
class TestReconstructor(MongoDBTestCase):

    def setUp(self):
        MongoDBTestCase.setUp(self)

        # Generate some fake workload sessions
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234+i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = time.time()
            sess['end_time'] = time.time() + 5
            for j in xrange(0, NUM_OPS_PER_SESSION):
                _id = str(random.random())
                queryId = long((i<<16) + j)
                queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query":
                                 { "_id": [ _id ] } }

                responseContent = {"_id": _id}
                responseId = (queryId<<8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                ## FOR

                op = workload.Session.operationFactory()
                op['collection']    = COLLECTION_NAME
                op['type']          = constants.OP_TYPE_QUERY
                op['query_id']      = queryId
                op['query_content'] = [ queryContent ]
                op['resp_content']  = [ responseContent ]
                op['resp_id']       = responseId
                sess['operations'].append(op)
            ## FOR (ops)

            sess.save()
        ## FOR (sess)
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.reconstructor = Reconstructor(self.metadata_db, self.dataset_db)
    ## DEF

    def testProcess(self):
        """
            Check whether the reconstructed database includes the fields
            that were returned in the query responses
        """
        self.reconstructor.process()

        fields = [ "field%02d" % f for f in xrange(0, NUM_FIELDS) ]
        num_docs = 0
        for doc in self.dataset_db[COLLECTION_NAME].find():
            # pprint(doc)
            for f in fields:
                self.assertIn(f, doc.keys())
            num_docs += 1
        # We should always have one per operation
        self.assertEquals(num_docs, NUM_SESSIONS * NUM_OPS_PER_SESSION)