Esempio n. 1
0
 def createOrderStatus(self, params):
     ops = [ ]
     w_id = params["w_id"]
     d_id = params["d_id"]
     c_id = params["c_id"]
     c_last = params["c_last"]
     o_id = self.rng.randint(0, 10000)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["C_W_ID"] = self.rng.randint(0, 100)
     responseContent["C_D_ID"] = self.rng.randint(0, 100)
     responseContent["C_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_CUSTOMER
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}}]
     op['query_fields']  = {"C_ID": 1, "C_FIRST": 1, "C_MIDDLE": 1, "C_LAST": 1, "C_BALANCE": 1}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["O_W_ID"] = self.rng.randint(0, 100)
     responseContent["O_D_ID"] = self.rng.randint(0, 100)
     responseContent["O_C_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_ORDERS
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"O_W_ID": w_id, "O_D_ID": d_id, "O_C_ID": c_id}}]
     op['query_fields']  = {"O_ID": 1, "O_CARRIER_ID": 1, "O_ENTRY_D": 1}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["OL_W_ID"] = self.rng.randint(0, 100)
     responseContent["OL_D_ID"] = self.rng.randint(0, 100)
     responseContent["OL_O_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_ORDER_LINE
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": o_id}}]
     op['query_fields']  = {"OL_SUPPLY_W_ID": 1, "OL_I_ID": 1, "OL_QUANTITY": 1, "OL_AMOUNT": 1, "OL_DELIVERY_D": 1}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     return ops
Esempio n. 2
0
    def createStockLevel(self, params):
        ops = [ ]
        w_id = params["w_id"]
        d_id = params["d_id"]
        o_id = self.rng.randint(0, 10000)
        ol_ids = [ self.rng.randint(0, 1000) for i in xrange(10) ]
        threshold = params["threshold"]
        
        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        responseContent["D_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_DISTRICT
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"D_W_ID": w_id, "D_ID": d_id}}]
        op['query_fields']  = {"D_NEXT_O_ID": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        op = Session.operationFactory()
        responseContent = {}
        responseContent["OL_W_ID"] = self.rng.randint(0, 100)
        responseContent["OL_D_ID"] = self.rng.randint(0, 100)
        responseContent["OL_O_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_ORDER_LINE
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": {"#lt": o_id, "#gte": o_id-20}}}]
        op['query_fields']  = {"OL_I_ID": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["S_W_ID"] = self.rng.randint(0, 100)
        responseContent["S_I_ID"] = self.rng.randint(0, 100)
        responseContent["S_QUANTITY"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_STOCK
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"S_W_ID": w_id, "S_I_ID": {"#in": list(ol_ids)}, "S_QUANTITY": {"#lt": threshold}}}]
        op['query_fields']  = None
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        return ops
    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info['name'])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info['name'], ['XXXX'])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess['operations'][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op['type'] = constants.OP_TYPE_INSERT
        op['query_content'] = op['resp_content']
        op['predicates'] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op['collection'] = COLLECTION_NAMES[0]
        op['type'] = constants.OP_TYPE_INSERT
        op['query_id'] = 10000
        op['query_content'] = [{"parkinglot": 1234}]
        op['resp_content'] = [{"ok": 1}]
        op['resp_id'] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)
    def testEstimateNodesNullValue(self):
        """Check the estimating touched nodes when the sharding key value is null"""

        d = Design()
        for i in xrange(0, len(COLLECTION_NAMES)):
            col_info = self.collections[COLLECTION_NAMES[i]]
            d.addCollection(col_info["name"])
            # This key won't be in the operation's fields, but we should still
            # be able to get back a value
            d.addShardKey(col_info["name"], ["XXXX"])
            ## FOR

        # A query that looks up on a non-sharding key should always be
        # broadcast to every node
        sess = self.metadata_db.Session.fetch_one()
        op = sess["operations"][0]
        touched0 = list(self.estimator.estimateNodes(d, op))
        #        print "touched0:", touched0
        self.assertListEqual(range(NUM_NODES), touched0)

        # But if we insert into that collection with a document that doesn't
        # have the sharding key, it should only go to one node
        op["type"] = constants.OP_TYPE_INSERT
        op["query_content"] = op["resp_content"]
        op["predicates"] = []
        #        pprint(op)
        touched1 = list(self.estimator.estimateNodes(d, op))
        #        print "touched1:", touched1
        self.assertEqual(1, len(touched1))

        # And if we insert another one, then we should get the same value back
        op = Session.operationFactory()
        op["collection"] = COLLECTION_NAMES[0]
        op["type"] = constants.OP_TYPE_INSERT
        op["query_id"] = 10000
        op["query_content"] = [{"parkinglot": 1234}]
        op["resp_content"] = [{"ok": 1}]
        op["resp_id"] = 10001
        #        pprint(op)
        touched2 = list(self.estimator.estimateNodes(d, op))
        self.assertEqual(1, len(touched2))
        self.assertListEqual(touched1, touched2)
Esempio n. 5
0
    def createNewOrder(self, params):
        ops = []
        w_id = params["w_id"]
        d_id = params["d_id"]
        c_id = params["c_id"]
        o_entry_d = params["o_entry_d"]
        i_ids = params["i_ids"]
        i_w_ids = params["i_w_ids"]
        i_qtys = params["i_qtys"]
        s_dist_col = "S_DIST_%02d" % d_id
        w_tax = self.rng.random()
        d_tax = self.rng.random()
        d_next_o_id = self.rng.randint(0, 1000)
        c_discount = self.rng.randint(0, 10)
        ol_cnt = len(i_ids)
        o_carrier_id = tpccConstants.NULL_CARRIER_ID
        all_local = (not i_w_ids or [w_id] * len(i_w_ids) == i_w_ids)

        op = Session.operationFactory()
        responseContent = {"I_ID": self.rng.randint(0, 100)}
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_ITEM
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"I_ID": {"#in": i_ids}}}]
        op['query_fields'] = {
            "I_ID": 1,
            "I_PRICE": 1,
            "I_NAME": 1,
            "I_DATA": 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()

        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_WAREHOUSE
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"W_ID": w_id}}]
        op['query_fields'] = {"W_TAX": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_ID"] = self.rng.randint(0, 100)
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_DISTRICT
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"D_ID": d_id, "D_W_ID": w_id}}]
        op['query_fields'] = {"D_TAX": 1, "D_NEXT_O_ID": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_ID"] = self.rng.randint(0, 100)
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_DISTRICT
        op['type'] = constants.OP_TYPE_UPDATE
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "D_ID": d_id,
            "D_W_ID": w_id
        }, {
            "#inc": {
                "D_NEXT_O_ID": 1
            }
        }]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        op['update_multi'] = False
        op['update_upsert'] = True
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["C_ID"] = self.rng.randint(0, 100)
        responseContent["C_D_ID"] = self.rng.randint(0, 100)
        responseContent["C_W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_CUSTOMER
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "C_ID": c_id,
                "C_D_ID": d_id,
                "C_W_ID": w_id
            }
        }]
        op['query_fields'] = {"C_DISCOUNT": 1, "C_LAST": 1, "C_CREDIT": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["NO_O_ID"] = self.rng.randint(0, 100)
        responseContent["NO_D_ID"] = self.rng.randint(0, 100)
        responseContent["NO_W_ID"] = self.rng.randint(0, 100)

        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_NEW_ORDER
        op['type'] = constants.OP_TYPE_INSERT
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "NO_O_ID": d_next_o_id,
            "NO_D_ID": d_id,
            "NO_W_ID": w_id
        }]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        o = {
            "O_D_ID": d_id,
            "O_W_ID": w_id,
            "O_C_ID": c_id,
            "O_ID": d_next_o_id,
            "O_ENTRY_D": o_entry_d,
            "O_CARRIER_ID": o_carrier_id,
            "O_OL_CNT": ol_cnt,
            "O_ALL_LOCAL": all_local
        }
        responseContent = {
            "O_D_ID": self.rng.randint(0, 100),
            "O_W_ID": self.rng.randint(0, 100),
            "O_C_ID": self.rng.randint(0, 100),
            "O_ID": self.rng.randint(0, 100),
            "O_ENTRY_D": self.rng.randint(0, 100),
            "O_CARRIER_ID": self.rng.randint(0, 100),
            "O_OL_CNT": self.rng.randint(0, 100),
            "O_ALL_LOCAL": self.rng.randint(0, 100)
        }

        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_ORDERS
        op['type'] = constants.OP_TYPE_INSERT
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [o]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["S_I_ID"] = self.rng.randint(0, 100)
        responseContent["S_W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_STOCK
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "S_I_ID": {
                    "#in": i_ids
                },
                "S_W_ID": w_id
            }
        }]
        op['query_fields'] = {
            "S_I_ID": 1,
            "S_QUANTITY": 1,
            "S_DATA": 1,
            "S_YTD": 1,
            "S_ORDER_CNT": 1,
            "S_REMOTE_CNT": 1,
            s_dist_col: 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        for i in range(ol_cnt):
            s = {"S_I_ID": i_ids[i], "S_W_ID": w_id}
            ol = {
                "OL_D_ID":
                d_id,
                "OL_W_ID":
                w_id,
                "OL_O_ID":
                d_next_o_id,
                "OL_NUMBER":
                i + 1,
                "OL_I_ID":
                i_ids[i],
                "OL_SUPPLY_W_ID":
                i_w_ids[i],
                "OL_DELIVERY_D":
                o_entry_d,
                "OL_QUANTITY":
                i_qtys[i],
                "OL_AMOUNT":
                self.rng.random() * 100,
                "OL_DIST_INFO":
                ''.join(
                    self.rng.choice(string.ascii_uppercase) for x in range(24))
            }
            s_remote_cnt = self.rng.randint(0, 10)
            s_order_cnt = self.rng.randint(0, 10)
            s_quantity = self.rng.randint(0, 10)
            s_ytd = self.rng.random()

            op = Session.operationFactory()
            responseContent = {}
            responseContent["S_I_ID"] = self.rng.randint(0, 100)
            responseContent["S_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_STOCK
            op['type'] = constants.OP_TYPE_UPDATE
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [
                s, {
                    "#set": {
                        "S_QUANTITY": s_quantity,
                        "S_YTD": s_ytd,
                        "S_ORDER_CNT": s_order_cnt,
                        "S_REMOTE_CNT": s_remote_cnt
                    }
                }
            ]
            op['query_fields'] = None
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            op['update_upsert'] = True
            ops.append(op)
        ## FOR
        return ops
Esempio n. 6
0
    def setUp(self):
        MongoDBTestCase.setUp(self)
        field00_value = 0
        field01_value = 0
        field02_value = 9999999
        
        # WORKLOAD
        timestamp = time.time()
        for i in xrange(CostModelTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234+i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i<<16) + j)
                queryContent = { }
                queryPredicates = { }

                responseContent = {"_id": _id}
                responseId = (queryId<<8)
                for f in xrange(0, CostModelTestCase.NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f == 0:
                        responseContent[f_name] = field00_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field00_value += 1
                    elif f == 1:
                        responseContent[f_name] = field01_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field01_value += 1
                    else:
                        responseContent[f_name] = field02_value
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                        field02_value -= 1
                    ## ELSE
                ## FOR

                queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
                op = Session.operationFactory()
                op['collection']    = CostModelTestCase.COLLECTION_NAMES[j]
                op['type']          = constants.OP_TYPE_QUERY
                op['query_id']      = queryId
                op['query_content'] = [ queryContent ]
                op['resp_content']  = [ responseContent ]
                op['resp_id']       = responseId
                op['predicates']    = queryPredicates
                op['query_time']    = timestamp
                timestamp += 1
                op['resp_time']    = timestamp
                sess['operations'].append(op)
                ## FOR (ops)
                
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections))

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024 # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory':     1024, # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size':   64,
            'nodes':          CostModelTestCase.NUM_NODES,
            'window_size':    1024
        }

        self.state = State(self.collections, populated_workload, self.costModelConfig)
    ## DEF
## CLASS
Esempio n. 7
0
    def reconstructMetaData(self, changed_fields, fieldscol2col):
        """
        Since we have re-constructed the database
        """
        LOG.info("Reconstructing metadata!")
        if len(changed_fields) == 0:
            return
        
        op_counter = 0
        col2fields = self.generateDict(changed_fields)
        for sess in self.metadata_db.Session.fetch():
            for i in xrange(len(sess['operations']) - 1, - 1, -1):
                op_counter += 1
                offset = 1 # indicate where we should insert the splitted operation. It depends on if we remove the current operation
                op = sess['operations'][i]
                col_name = op['collection']
                fields = col2fields.get(col_name, None)
                # If this op's collection has no fields in moved_fields, skip it
                try:
                    if fields:
                        payload = op["query_content"] # payload is a list type
                        changed_query = [ ]
                        counter = 0
                        while counter < len(payload):
                            doc = payload[counter] # doc is a dict type
                            for key, value in doc.iteritems():
                                if type(value) == dict:
                                    for k in value.iterkeys():
                                        if k in fields:
                                            LOG.debug("counter: %d, key: %s, value: %s", counter, key, k)
                                            changed_query.append((counter, key, k))
                                        ## IF
                                    ## FOR
                                ## IF
                                else:
                                    if value in fields:
                                        LOG.info("counter: %d, key: %s, value: %s", counter, key, k)
                                        changed_query.append((counter, key, value))
                                    ## IF
                                ## ELSE
                            ## FOR
                            counter += 1
                        # WHILE
                        # If we have queries to split
                        if len(changed_query) > 0:
                            # construct new queries
                            for tup in changed_query:
                                old_query_content = payload[tup[0]][tup[1]].pop(tup[2])
                                # If the doc is empty after the pop, remove it from the payload
                                if len(payload[tup[0]][tup[1]]) == 0:
                                    payload[tup[0]].pop(tup[1])
                                    if len(payload[tup[0]]) == 0:
                                        payload.remove(payload[tup[0]])
                                        # If the payload is empty, we remove the op from the session queue
                                        if len(payload) == 0:
                                            sess['operations'].remove(op)
                                            offset -= 1
                                        ## IF
                                ## IF
                                new_op = Session.operationFactory()
                                new_col = fieldscol2col[(col_name, tup[2])]
                                LOG.debug("Creating a new operation to collection: %s", new_col)
                                new_op['collection'] = new_col
                                new_op['type']  = op['type']
                                new_op['query_id']      = long(hash(time.time()))
                                new_op['query_content'] = [ {tup[1] : {tup[2] : old_query_content}} ]
                                new_op['resp_content']  = new_op['query_content']
                                new_op['resp_id']       = new_op['query_id'] + 1
                                new_op['predicates']    = op['predicates']
                                new_op['query_time']    = op['query_time']
                                new_op['resp_time']    = op['resp_time']

                                # add the new query after the current one of the session queue
                                sess['operations'].insert(i + offset, new_op)
                            ## FOR
                        ## IF
                    ## IF
                except:
                    LOG.error("Error happened when process op: %s", pformat(op))
                    raise
            ## FOR
            try:
                sess.save()
            except:
                LOG.error("Failed to save session!\n%s", pformat(sess))
                raise
Esempio n. 8
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()
        for i in xrange(CostModelTestCase.NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, CostModelTestCase.NUM_FIELDS):
                    if j == 0:
                        f_name = "field%02d" % 0
                    elif j == 1:
                        f_name = "field%02d" % 1
                    else:
                        f_name = "field%02d" % 2
                    responseContent[f_name] = random.randint(0, 100)
                    queryContent[f_name] = responseContent[f_name]
                    queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = CostModelTestCase.COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates
                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp
                sess['operations'].append(op)
                ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS,
                         self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES),
                         len(self.collections))

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload

        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024  # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info[
                'avg_doc_size'] / (4 * 1024)
            col_info.save()
            #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory': 1024,  # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size': 64,
            'nodes': CostModelTestCase.NUM_NODES,
            'window_size': 3
        }

        self.state = State(self.collections, populated_workload,
                           self.costModelConfig)
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess['session_id'] = i
            sess['ip_client'] = "client:%d" % (1234 + i)
            sess['ip_server'] = "server:5678"
            sess['start_time'] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = (queryId << 8)
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(
                            random.randint(1000, 100000))
                    ## FOR

                queryContent = {
                    constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
                }
                op = Session.operationFactory()
                op['collection'] = COLLECTION_NAMES[j]
                op['type'] = constants.OP_TYPE_QUERY
                op['query_id'] = queryId
                op['query_content'] = [queryContent]
                op['resp_content'] = [responseContent]
                op['resp_id'] = responseId
                op['predicates'] = queryPredicates

                op['query_time'] = timestamp
                timestamp += 1
                op['resp_time'] = timestamp

                sess['operations'].append(op)
            ## FOR (ops)
            sess['end_time'] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
Esempio n. 10
0
    def storeCurrentOpInSession(self):
        """Stores the currentOp in a session. We will create a new session if one does not already exist."""
        
        # Check whether it has a busted collection name
        # For now we'll just change the name to our marker so that we can figure out
        # what it really should be after we recreate the schema
        try:
            self.currentOp['collection'].decode('ascii')
        except Exception as err:
            if self.debug:
                LOG.warn("Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp)
            self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER
            self.bustedOps.append(self.currentOp)
            pass
        
        # Figure out whether this is a outgoing query from the client
        # Or an incoming response from the server
        if self.currentOp['arrow'] == '-->>':
            ip_client = self.currentOp['ip1']
            ip_server = self.currentOp['ip2']
        else:
            ip_client = self.currentOp['ip2']
            ip_server = self.currentOp['ip1']
            
            # If this doesn't have a type here, then we know that it's a reply
            if not 'type' in self.currentOp:
                self.currentOp['type'] = constants.OP_TYPE_REPLY
        ## IF

        if not 'type' in self.currentOp:
            msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr
            LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp)))
            if self.stop_on_error: raise Exception(msg)
            return
        ## IF
        
        # Get the session to store this operation in
        session = self.getOrCreateSession(ip_client, ip_server)
        if session["start_time"] is None and "timestamp" in self.currentOp:
            session["start_time"] = self.currentOp['timestamp']

        # Escape any invalid key names
        for i in xrange(0, len(self.currentContent)):
            # HACK: Rename the 'query' key to '$query'
            if 'query' in self.currentContent[i]:
                self.currentContent[i][constants.OP_TYPE_QUERY] = self.currentContent[i]['query']
                del self.currentContent[i]['query']
            self.currentContent[i] = util.escapeFieldNames(self.currentContent[i])
        ## FOR
        
        # QUERY: $query, $delete, $insert, $update:
        # Create the operation, add it to the session
        if self.currentOp['type'] in [constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE]:
            # create the operation -- corresponds to current
            if self.debug:
                LOG.debug("Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent)))
            
            op = Session.operationFactory()
            op['collection']        = self.currentOp['collection']
            op['type']              = self.currentOp['type']
            op['query_time']        = self.currentOp['timestamp']
            op['query_size']        = self.currentOp['size']
            op['query_content']     = self.currentContent
            op['query_id']          = long(self.currentOp['query_id'])
            op['query_aggregate']   = False # false -not aggregate- by default

            # UPDATE Flags
            if op['type'] == constants.OP_TYPE_UPDATE:
                op['update_upsert'] = self.currentOp['update_upsert']
                op['update_multi'] = self.currentOp['update_multi']
            
            # QUERY Flags
            elif op['type'] == constants.OP_TYPE_QUERY:
                # SKIP, LIMIT
                op['query_limit'] = self.currentOp['ntoreturn']
                op['query_offset'] = self.currentOp['ntoskip']
                if self.currentOp['hasfields']:
                    # HACK: Convert dot notation into '*'
                    # FIXME: This should really be broke out into a dictionary of 'include' 'exclude'
                    op['query_fields'] = dict([ (k.replace(".", "*"), v) for k,v in self.currentOp['hasfields'].iteritems()])
            
                # check for aggregate
                # update collection name, set aggregate type
                if op['collection'].find("$cmd") > 0:
                    op['query_aggregate'] = True
                    # extract the real collection name
                    ## --> This has to be done at the end after the first pass, because the collection name is hashed up
            
            # Keep track of operations by their ids so that we can add
            # the response to it later on
            self.query_response_map[self.currentOp['query_id']] = op
            
            # Append it to the current session
            # TODO: Large traces will cause the sessions to get too big.
            #       We need to split out the operations into a seperate collection
            #       Or use multiple sessions
            session['operations'].append(op)
            self.op_ctr += 1
            if self.debug:
                LOG.debug("Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op)))
        
            # store the collection name in known_collections. This will be useful later.
            # see the comment at known_collections
            # HACK: We have to cut off the db name here. We may not want
            #       to do that if the application is querying multiple databases.
            full_name = op['collection']
            col_name = full_name[full_name.find(".")+1:] # cut off the db name
            self.known_collections.add(col_name)
        
        # RESPONSE - add information to the matching query
        elif self.currentOp['type'] == constants.OP_TYPE_REPLY:
            self.resp_ctr += 1
            reply_id = self.currentOp['reply_id'];
            # see if the matching query is in the map
            if reply_id in self.query_response_map:
                # fill in missing information
                query_op = self.query_response_map[reply_id]
                query_op['resp_content'] = self.currentContent
                query_op['resp_size'] = self.currentOp['size']
                query_op['resp_time'] = self.currentOp['timestamp']
                query_op['resp_id'] = long(self.currentOp['query_id'])
                del self.query_response_map[reply_id]
            else:
                self.skip_ctr += 1
                if self.debug:
                    LOG.warn("Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr))
                
        # These can be safely ignored
        elif self.currentOp['type'] in [constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS]:
            if self.debug:
                LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr))
            
        # UNKNOWN
        else:
            raise Exception("Unexpected message type '%s'" % self.currentOp['type'])
                
        return
Esempio n. 11
0
    def createStockLevel(self, params):
        ops = []
        w_id = params["w_id"]
        d_id = params["d_id"]
        o_id = self.rng.randint(0, 10000)
        ol_ids = [self.rng.randint(0, 1000) for i in xrange(10)]
        threshold = params["threshold"]

        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        responseContent["D_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_DISTRICT
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"D_W_ID": w_id, "D_ID": d_id}}]
        op['query_fields'] = {"D_NEXT_O_ID": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["OL_W_ID"] = self.rng.randint(0, 100)
        responseContent["OL_D_ID"] = self.rng.randint(0, 100)
        responseContent["OL_O_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_ORDER_LINE
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "OL_W_ID": w_id,
                "OL_D_ID": d_id,
                "OL_O_ID": {
                    "#lt": o_id,
                    "#gte": o_id - 20
                }
            }
        }]
        op['query_fields'] = {"OL_I_ID": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["S_W_ID"] = self.rng.randint(0, 100)
        responseContent["S_I_ID"] = self.rng.randint(0, 100)
        responseContent["S_QUANTITY"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_STOCK
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "S_W_ID": w_id,
                "S_I_ID": {
                    "#in": list(ol_ids)
                },
                "S_QUANTITY": {
                    "#lt": threshold
                }
            }
        }]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        return ops
Esempio n. 12
0
    def createDelivery(self, params):
        ops = []
        w_id = params["w_id"]
        o_carrier_id = params["o_carrier_id"]
        ol_delivery_d = params["ol_delivery_d"]

        for d_id in xrange(1, tpccConstants.DISTRICTS_PER_WAREHOUSE + 1):
            c_id = self.rng.randint(0, 10000)
            o_id = self.rng.randint(0, 10000)
            ol_total = self.rng.random() * 100

            op = Session.operationFactory()
            responseContent = {}
            responseContent["NO_D_ID"] = self.rng.randint(0, 100)
            responseContent["NO_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_NEW_ORDER
            op['type'] = constants.OP_TYPE_QUERY
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "#query": {
                    "NO_D_ID": d_id,
                    "NO_W_ID": w_id
                }
            }]
            op['query_fields'] = {"NO_O_ID": 1}
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["O_ID"] = self.rng.randint(0, 100)
            responseContent["O_D_ID"] = self.rng.randint(0, 100)
            responseContent["O_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_ORDERS
            op['type'] = constants.OP_TYPE_QUERY
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "#query": {
                    "O_ID": o_id,
                    "O_D_ID": d_id,
                    "O_W_ID": w_id
                }
            }]
            op['query_fields'] = {"O_C_ID": 1}
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["OL_O_ID"] = self.rng.randint(0, 100)
            responseContent["OL_D_ID"] = self.rng.randint(0, 100)
            responseContent["OL_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_ORDER_LINE
            op['type'] = constants.OP_TYPE_QUERY
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "#query": {
                    "OL_O_ID": o_id,
                    "OL_D_ID": d_id,
                    "OL_W_ID": w_id
                }
            }]
            op['query_fields'] = {"OL_AMOUNT": 1}
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["O_ID"] = self.rng.randint(0, 100)
            responseContent["O_D_ID"] = self.rng.randint(0, 100)
            responseContent["O_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_ORDERS
            op['type'] = constants.OP_TYPE_UPDATE
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "O_ID": o_id,
                "O_D_ID": d_id,
                "O_W_ID": w_id
            }, {
                "#set": {
                    "O_CARRIER_ID": o_carrier_id
                }
            }]
            op['query_fields'] = None
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            op['update_multi'] = False
            op['update_upsert'] = True
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["OL_O_ID"] = self.rng.randint(0, 100)
            responseContent["OL_D_ID"] = self.rng.randint(0, 100)
            responseContent["OL_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_ORDER_LINE
            op['type'] = constants.OP_TYPE_UPDATE
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "OL_O_ID": o_id,
                "OL_D_ID": d_id,
                "OL_W_ID": w_id
            }, {
                "#set": {
                    "OL_DELIVERY_D": ol_delivery_d
                }
            }]
            op['query_fields'] = None
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            op['update_multi'] = True
            op['update_upsert'] = True
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["C_ID"] = self.rng.randint(0, 100)
            responseContent["C_D_ID"] = self.rng.randint(0, 100)
            responseContent["C_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_CUSTOMER
            op['type'] = constants.OP_TYPE_UPDATE
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{
                "C_ID": c_id,
                "C_D_ID": d_id,
                "C_W_ID": w_id
            }, {
                "#inc": {
                    "C_BALANCE": ol_total
                }
            }]
            op['query_fields'] = None
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            op['update_multi'] = False
            op['update_upsert'] = True
            ops.append(op)

            op = Session.operationFactory()
            responseContent = {}
            responseContent["NO_D_ID"] = self.rng.randint(0, 100)
            responseContent["NO_W_ID"] = self.rng.randint(0, 100)
            op['resp_content'] = [responseContent]
            op['collection'] = tpccConstants.TABLENAME_NEW_ORDER
            op['type'] = constants.OP_TYPE_DELETE
            op['query_id'] = self.nextQueryId()
            op['query_content'] = [{"NO_D_ID": d_id, "NO_W_ID": w_id}]
            op['query_fields'] = None
            op['resp_id'] = self.nextResponseId()
            op['query_time'] = self.nextTimestamp()
            op['resp_time'] = self.nextTimestamp()
            ops.append(op)
        ## FOR
        return ops
Esempio n. 13
0
    def createDelivery(self, params):
        ops = [ ]
        w_id = params["w_id"]
        o_carrier_id = params["o_carrier_id"]
        ol_delivery_d = params["ol_delivery_d"]

        for d_id in xrange(1, tpccConstants.DISTRICTS_PER_WAREHOUSE+1):
            c_id = self.rng.randint(0, 10000)
            o_id = self.rng.randint(0, 10000)
            ol_total = self.rng.random() * 100
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["NO_D_ID"] = self.rng.randint(0, 100)
            responseContent["NO_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_NEW_ORDER
            op['type']          = constants.OP_TYPE_QUERY
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"#query" : {"NO_D_ID": d_id, "NO_W_ID": w_id}}]
            op['query_fields']  = {"NO_O_ID": 1}
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["O_ID"] = self.rng.randint(0, 100)
            responseContent["O_D_ID"] = self.rng.randint(0, 100)
            responseContent["O_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_ORDERS
            op['type']          = constants.OP_TYPE_QUERY
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"#query" :  {"O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id}}]
            op['query_fields']  = {"O_C_ID": 1}
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["OL_O_ID"] = self.rng.randint(0, 100)
            responseContent["OL_D_ID"] = self.rng.randint(0, 100)
            responseContent["OL_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_ORDER_LINE
            op['type']          = constants.OP_TYPE_QUERY
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"#query" : {"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}}]
            op['query_fields']  = {"OL_AMOUNT": 1}
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["O_ID"] = self.rng.randint(0, 100)
            responseContent["O_D_ID"] = self.rng.randint(0, 100)
            responseContent["O_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_ORDERS
            op['type']          = constants.OP_TYPE_UPDATE
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id}, {"#set": {"O_CARRIER_ID": o_carrier_id}} ]
            op['query_fields']  = None
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            op['update_multi']  = False
            op['update_upsert'] = True
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["OL_O_ID"] = self.rng.randint(0, 100)
            responseContent["OL_D_ID"] = self.rng.randint(0, 100)
            responseContent["OL_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_ORDER_LINE
            op['type']          = constants.OP_TYPE_UPDATE
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}, {"#set": {"OL_DELIVERY_D": ol_delivery_d}}]
            op['query_fields']  = None
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            op['update_multi']  = True
            op['update_upsert'] = True
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["C_ID"] = self.rng.randint(0, 100)
            responseContent["C_D_ID"] = self.rng.randint(0, 100)
            responseContent["C_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_CUSTOMER
            op['type']          = constants.OP_TYPE_UPDATE
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}, {"#inc": {"C_BALANCE": ol_total}}]
            op['query_fields']  = None
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            op['update_multi']  = False
            op['update_upsert'] = True
            ops.append(op)
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["NO_D_ID"] = self.rng.randint(0, 100)
            responseContent["NO_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_NEW_ORDER
            op['type']          = constants.OP_TYPE_DELETE
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [{"NO_D_ID": d_id, "NO_W_ID": w_id}]
            op['query_fields']  = None
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            ops.append(op)
        ## FOR
        return ops
Esempio n. 14
0
    def createNewOrder(self, params):
        ops = [ ]
        w_id = params["w_id"]
        d_id = params["d_id"]
        c_id = params["c_id"]
        o_entry_d = params["o_entry_d"]
        i_ids = params["i_ids"]
        i_w_ids = params["i_w_ids"]
        i_qtys = params["i_qtys"]
        s_dist_col = "S_DIST_%02d" % d_id
        w_tax = self.rng.random()
        d_tax = self.rng.random()
        d_next_o_id = self.rng.randint(0, 1000)
        c_discount = self.rng.randint(0, 10)
        ol_cnt = len(i_ids)
        o_carrier_id = tpccConstants.NULL_CARRIER_ID
        all_local = (not i_w_ids or [w_id] * len(i_w_ids) == i_w_ids)
        
        op = Session.operationFactory()
        responseContent = {"I_ID": self.rng.randint(0, 100)}
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_ITEM
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"I_ID": {"#in": i_ids}}}]
        op['query_fields']  = {"I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()

        ops.append(op)
        
        op = Session.operationFactory()
        responseContent = {}
        responseContent["W_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_WAREHOUSE
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"W_ID": w_id}}]
        op['query_fields']  = {"W_TAX": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
                
        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_ID"] = self.rng.randint(0, 100)
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_DISTRICT
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"D_ID": d_id, "D_W_ID": w_id}}]
        op['query_fields']  = {"D_TAX": 1, "D_NEXT_O_ID": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_ID"] = self.rng.randint(0, 100)
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_DISTRICT
        op['type']          = constants.OP_TYPE_UPDATE
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"D_ID": d_id, "D_W_ID": w_id}, {"#inc": {"D_NEXT_O_ID": 1}}]
        op['query_fields']  = None
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        op['update_multi']  = False
        op['update_upsert'] = True
        ops.append(op)
                
        op = Session.operationFactory()
        responseContent = {}
        responseContent["C_ID"] = self.rng.randint(0, 100)
        responseContent["C_D_ID"] = self.rng.randint(0, 100)
        responseContent["C_W_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_CUSTOMER
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}}]
        op['query_fields']  = {"C_DISCOUNT": 1, "C_LAST": 1, "C_CREDIT": 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        op = Session.operationFactory()
        responseContent = {}
        responseContent["NO_O_ID"] = self.rng.randint(0, 100)
        responseContent["NO_D_ID"] = self.rng.randint(0, 100)
        responseContent["NO_W_ID"] = self.rng.randint(0, 100)

        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_NEW_ORDER
        op['type']          = constants.OP_TYPE_INSERT
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"NO_O_ID": d_next_o_id, "NO_D_ID": d_id, "NO_W_ID": w_id}]
        op['query_fields']  = None
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        o = {
            "O_D_ID": d_id,
            "O_W_ID": w_id,
            "O_C_ID": c_id,
            "O_ID": d_next_o_id,
            "O_ENTRY_D": o_entry_d,
            "O_CARRIER_ID": o_carrier_id,
            "O_OL_CNT": ol_cnt,
            "O_ALL_LOCAL": all_local
        }
        responseContent = {
            "O_D_ID": self.rng.randint(0, 100),
            "O_W_ID": self.rng.randint(0, 100),
            "O_C_ID": self.rng.randint(0, 100),
            "O_ID": self.rng.randint(0, 100),
            "O_ENTRY_D": self.rng.randint(0, 100),
            "O_CARRIER_ID": self.rng.randint(0, 100),
            "O_OL_CNT": self.rng.randint(0, 100),
            "O_ALL_LOCAL": self.rng.randint(0, 100)
        }

        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_ORDERS
        op['type']          = constants.OP_TYPE_INSERT
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [o]
        op['query_fields']  = None
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
                
        op = Session.operationFactory()
        responseContent = {}
        responseContent["S_I_ID"] = self.rng.randint(0, 100)
        responseContent["S_W_ID"] = self.rng.randint(0, 100)
        op['resp_content']  = [responseContent]
        op['collection']    = tpccConstants.TABLENAME_STOCK
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = self.nextQueryId()
        op['query_content'] = [{"#query" : {"S_I_ID": {"#in": i_ids}, "S_W_ID": w_id}}]
        op['query_fields']  = {"S_I_ID": 1, "S_QUANTITY": 1, "S_DATA": 1, "S_YTD": 1, "S_ORDER_CNT": 1, "S_REMOTE_CNT": 1, s_dist_col: 1}
        op['resp_id']       = self.nextResponseId()
        op['query_time']    = self.nextTimestamp()
        op['resp_time']     = self.nextTimestamp()
        ops.append(op)
        
        for i in range(ol_cnt):
            s = {"S_I_ID": i_ids[i], "S_W_ID": w_id}
            ol = {
                "OL_D_ID": d_id,
                "OL_W_ID": w_id,
                "OL_O_ID": d_next_o_id,
                "OL_NUMBER": i + 1,
                "OL_I_ID": i_ids[i],
                "OL_SUPPLY_W_ID": i_w_ids[i],
                "OL_DELIVERY_D": o_entry_d,
                "OL_QUANTITY": i_qtys[i],
                "OL_AMOUNT": self.rng.random() * 100,
                "OL_DIST_INFO": ''.join(self.rng.choice(string.ascii_uppercase) for x in range(24))
            }
            s_remote_cnt = self.rng.randint(0, 10)
            s_order_cnt = self.rng.randint(0, 10)
            s_quantity = self.rng.randint(0, 10)
            s_ytd = self.rng.random()
            
            op = Session.operationFactory()
            responseContent = {}
            responseContent["S_I_ID"] = self.rng.randint(0, 100)
            responseContent["S_W_ID"] = self.rng.randint(0, 100)
            op['resp_content']  = [responseContent]
            op['collection']    = tpccConstants.TABLENAME_STOCK
            op['type']          = constants.OP_TYPE_UPDATE
            op['query_id']      = self.nextQueryId()
            op['query_content'] = [s, {"#set": {"S_QUANTITY": s_quantity, "S_YTD": s_ytd, "S_ORDER_CNT": s_order_cnt, "S_REMOTE_CNT": s_remote_cnt}}]
            op['query_fields']  = None
            op['resp_id']       = self.nextResponseId()
            op['query_time']    = self.nextTimestamp()
            op['resp_time']     = self.nextTimestamp()
            op['update_upsert'] = True
            ops.append(op)
        ## FOR
        return ops
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        self.workload = []
        timestamp = time.time()
        for i in xrange(0, NUM_SESSIONS):
            sess = self.metadata_db.Session()
            sess["session_id"] = i
            sess["ip_client"] = "client:%d" % (1234 + i)
            sess["ip_server"] = "server:5678"
            sess["start_time"] = timestamp

            for j in xrange(0, len(COLLECTION_NAMES)):
                _id = str(random.random())
                queryId = long((i << 16) + j)
                queryContent = {}
                queryPredicates = {}

                responseContent = {"_id": _id}
                responseId = queryId << 8
                for f in xrange(0, NUM_FIELDS):
                    f_name = "field%02d" % f
                    if f % 2 == 0:
                        responseContent[f_name] = random.randint(0, 100)
                        queryContent[f_name] = responseContent[f_name]
                        queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
                    else:
                        responseContent[f_name] = str(random.randint(1000, 100000))
                    ## FOR

                queryContent = {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent}
                op = Session.operationFactory()
                op["collection"] = COLLECTION_NAMES[j]
                op["type"] = constants.OP_TYPE_QUERY
                op["query_id"] = queryId
                op["query_content"] = [queryContent]
                op["resp_content"] = [responseContent]
                op["resp_id"] = responseId
                op["predicates"] = queryPredicates

                op["query_time"] = timestamp
                timestamp += 1
                op["resp_time"] = timestamp

                sess["operations"].append(op)
            ## FOR (ops)
            sess["end_time"] = timestamp
            timestamp += 2
            sess.save()
            self.workload.append(sess)
        ## FOR (sess)

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([(c["name"], c) for c in self.metadata_db.Collection.fetch()])
        self.assertEqual(len(COLLECTION_NAMES), len(self.collections))

        self.estimator = NodeEstimator(self.collections, NUM_NODES)
Esempio n. 16
0
    def storeCurrentOpInSession(self):
        """Stores the currentOp in a session. We will create a new session if one does not already exist."""

        # Check whether it has a busted collection name
        # For now we'll just change the name to our marker so that we can figure out
        # what it really should be after we recreate the schema
        try:
            self.currentOp['collection'].decode('ascii')
        except Exception as err:
            if self.debug:
                LOG.warn(
                    "Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]"
                    % self.currentOp)
            self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER
            self.bustedOps.append(self.currentOp)
            pass

        # Figure out whether this is a outgoing query from the client
        # Or an incoming response from the server
        if self.currentOp['arrow'] == '-->>':
            ip_client = self.currentOp['ip1']
            ip_server = self.currentOp['ip2']
        else:
            ip_client = self.currentOp['ip2']
            ip_server = self.currentOp['ip1']

            # If this doesn't have a type here, then we know that it's a reply
            if not 'type' in self.currentOp:
                self.currentOp['type'] = constants.OP_TYPE_REPLY
        ## IF

        if not 'type' in self.currentOp:
            msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr
            LOG.warn("%s [opCtr=%d]\n%s" %
                     (msg, self.op_ctr, pformat(self.currentOp)))
            if self.stop_on_error: raise Exception(msg)
            return
        ## IF

        # Get the session to store this operation in
        session = self.getOrCreateSession(ip_client, ip_server)
        if session["start_time"] is None and "timestamp" in self.currentOp:
            session["start_time"] = self.currentOp['timestamp']

        # Escape any invalid key names
        for i in xrange(0, len(self.currentContent)):
            # HACK: Rename the 'query' key to '$query'
            if 'query' in self.currentContent[i]:
                self.currentContent[i][
                    constants.OP_TYPE_QUERY] = self.currentContent[i]['query']
                del self.currentContent[i]['query']
            self.currentContent[i] = util.escapeFieldNames(
                self.currentContent[i])
        ## FOR

        # QUERY: $query, $delete, $insert, $update:
        # Create the operation, add it to the session
        if self.currentOp['type'] in [
                constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT,
                constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE
        ]:
            # create the operation -- corresponds to current
            if self.debug:
                LOG.debug(
                    "Current Operation %d Content:\n%s" %
                    (self.currentOp['query_id'], pformat(self.currentContent)))

            op = Session.operationFactory()
            op['collection'] = self.currentOp['collection']
            op['type'] = self.currentOp['type']
            op['query_time'] = self.currentOp['timestamp']
            op['query_size'] = self.currentOp['size']
            op['query_content'] = self.currentContent
            op['query_id'] = long(self.currentOp['query_id'])
            op['query_aggregate'] = False  # false -not aggregate- by default

            # UPDATE Flags
            if op['type'] == constants.OP_TYPE_UPDATE:
                op['update_upsert'] = self.currentOp['update_upsert']
                op['update_multi'] = self.currentOp['update_multi']

            # QUERY Flags
            elif op['type'] == constants.OP_TYPE_QUERY:
                # SKIP, LIMIT
                op['query_limit'] = self.currentOp['ntoreturn']
                op['query_offset'] = self.currentOp['ntoskip']
                if self.currentOp['hasfields']:
                    # HACK: Convert dot notation into '*'
                    # FIXME: This should really be broke out into a dictionary of 'include' 'exclude'
                    op['query_fields'] = dict([
                        (k.replace(".", "*"), v)
                        for k, v in self.currentOp['hasfields'].iteritems()
                    ])

                # check for aggregate
                # update collection name, set aggregate type
                if op['collection'].find("$cmd") > 0:
                    op['query_aggregate'] = True
                    # extract the real collection name
                    ## --> This has to be done at the end after the first pass, because the collection name is hashed up

            # Keep track of operations by their ids so that we can add
            # the response to it later on
            self.query_response_map[self.currentOp['query_id']] = op

            # Append it to the current session
            # TODO: Large traces will cause the sessions to get too big.
            #       We need to split out the operations into a seperate collection
            #       Or use multiple sessions
            session['operations'].append(op)
            self.op_ctr += 1
            if self.debug:
                LOG.debug(
                    "Added %s operation %d to session %s from line %d:\n%s" %
                    (op['type'], self.currentOp['query_id'],
                     session['session_id'], self.line_ctr, pformat(op)))

            # store the collection name in known_collections. This will be useful later.
            # see the comment at known_collections
            # HACK: We have to cut off the db name here. We may not want
            #       to do that if the application is querying multiple databases.
            full_name = op['collection']
            col_name = full_name[full_name.find(".") +
                                 1:]  # cut off the db name
            self.known_collections.add(col_name)

        # RESPONSE - add information to the matching query
        elif self.currentOp['type'] == constants.OP_TYPE_REPLY:
            self.resp_ctr += 1
            reply_id = self.currentOp['reply_id']
            # see if the matching query is in the map
            if reply_id in self.query_response_map:
                # fill in missing information
                query_op = self.query_response_map[reply_id]
                query_op['resp_content'] = self.currentContent
                query_op['resp_size'] = self.currentOp['size']
                query_op['resp_time'] = self.currentOp['timestamp']
                query_op['resp_id'] = long(self.currentOp['query_id'])
                del self.query_response_map[reply_id]
            else:
                self.skip_ctr += 1
                if self.debug:
                    LOG.warn(
                        "Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]"
                        % (self.line_ctr, reply_id, self.skip_ctr,
                           self.resp_ctr))

        # These can be safely ignored
        elif self.currentOp['type'] in [
                constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS
        ]:
            if self.debug:
                LOG.warn("Skipping '%s' operation %d on line %d" %
                         (self.currentOp['type'], self.currentOp['query_id'],
                          self.line_ctr))

        # UNKNOWN
        else:
            raise Exception("Unexpected message type '%s'" %
                            self.currentOp['type'])

        return
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()

        sess = self.metadata_db.Session()
        sess['session_id'] = 0
        sess['ip_client'] = "client:%d" % (1234+0)
        sess['ip_server'] = "server:5678"
        sess['start_time'] = timestamp

        # generate query 0 querying field00
        _id = str(random.random())
        queryId = long((0<<16) + 0)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        # generate query 1 querying field01
        _id = str(random.random())
        queryId = long((1<<16) + 1)
        queryContent = { }
        queryPredicates = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)
        projectionField = { }

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        # generate query 2 querying field00, field01
        _id = str(random.random())
        queryId = long((2<<16) + 2)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        projectionField['field02'] = random.randint(0, 100)

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 3 querying field00, field01 but without projection field
        _id = str(random.random())
        queryId = long((2<<16) + 3)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 4 querying field00, field01 but it goes to collection 2
        _id = str(random.random())
        queryId = long((2<<16) + 4)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME_2
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1
        
        # generate query 5 querying field00 but it goes to collection 3
        _id = str(random.random())
        queryId = long((2<<16) + 5)
        queryContent = { }
        queryPredicates = { }
        projectionField = { }

        responseContent = {"_id": _id}
        responseId = (queryId<<8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
        op = Session.operationFactory()
        op['collection']    = CostModelTestCase.COLLECTION_NAME_3
        op['type']          = constants.OP_TYPE_QUERY
        op['query_id']      = queryId
        op['query_content'] = [ queryContent ]
        op['resp_content']  = [ responseContent ]
        op['resp_id']       = responseId
        op['predicates']    = queryPredicates
        op['query_time']    = timestamp
        op['query_fields']   = projectionField
        timestamp += 1
        op['resp_time']    = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1
        
        sess.save()

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())

        self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024 # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory':     1024, # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size':   64,
            'nodes':          CostModelTestCase.NUM_NODES,
            'window_size':    10
        }

        self.state = State(self.collections, populated_workload, self.costModelConfig)
Esempio n. 18
0
    def createOrderStatus(self, params):
        ops = []
        w_id = params["w_id"]
        d_id = params["d_id"]
        c_id = params["c_id"]
        c_last = params["c_last"]
        o_id = self.rng.randint(0, 10000)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["C_W_ID"] = self.rng.randint(0, 100)
        responseContent["C_D_ID"] = self.rng.randint(0, 100)
        responseContent["C_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_CUSTOMER
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "C_W_ID": w_id,
                "C_D_ID": d_id,
                "C_ID": c_id
            }
        }]
        op['query_fields'] = {
            "C_ID": 1,
            "C_FIRST": 1,
            "C_MIDDLE": 1,
            "C_LAST": 1,
            "C_BALANCE": 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["O_W_ID"] = self.rng.randint(0, 100)
        responseContent["O_D_ID"] = self.rng.randint(0, 100)
        responseContent["O_C_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_ORDERS
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "O_W_ID": w_id,
                "O_D_ID": d_id,
                "O_C_ID": c_id
            }
        }]
        op['query_fields'] = {"O_ID": 1, "O_CARRIER_ID": 1, "O_ENTRY_D": 1}
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["OL_W_ID"] = self.rng.randint(0, 100)
        responseContent["OL_D_ID"] = self.rng.randint(0, 100)
        responseContent["OL_O_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_ORDER_LINE
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "OL_W_ID": w_id,
                "OL_D_ID": d_id,
                "OL_O_ID": o_id
            }
        }]
        op['query_fields'] = {
            "OL_SUPPLY_W_ID": 1,
            "OL_I_ID": 1,
            "OL_QUANTITY": 1,
            "OL_AMOUNT": 1,
            "OL_DELIVERY_D": 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        return ops
Esempio n. 19
0
 def createPayment(self, params):
     ops = [ ]
     w_id = params["w_id"]
     d_id = params["d_id"]
     h_amount = params["h_amount"]
     c_w_id = params["c_w_id"]
     c_d_id = params["c_d_id"]
     c_id = params["c_id"]
     c_last = params["c_last"]
     h_date = params["h_date"]
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["C_W_ID"] = self.rng.randint(0, 100)
     responseContent["C_D_ID"] = self.rng.randint(0, 100)
     responseContent["C_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_CUSTOMER
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}}]
     op['query_fields']  = {"C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["W_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_WAREHOUSE
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"W_ID": w_id}}]
     op['query_fields']  = {"W_NAME": 1, "W_STREET_1": 1, "W_STREET_2": 1, "W_CITY": 1, "W_STATE": 1, "W_ZIP": 1}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["W_NAME"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_WAREHOUSE
     op['type']          = constants.OP_TYPE_UPDATE
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [ { "W_NAME" : "igmrhawo" }, { "#inc" : { "W_YTD" : 123 }} ]
     op['query_fields']  = None
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     op['update_upsert'] = True
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["D_W_ID"] = self.rng.randint(0, 100)
     responseContent["D_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_DISTRICT
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"D_W_ID": w_id, "D_ID": d_id}}]
     op['query_fields']  = {"D_NAME": 1, "D_STREET_1": 1, "D_STREET_2": 1, "D_CITY": 1, "D_STATE": 1, "D_ZIP": 1}
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     op = Session.operationFactory()
     responseContent = {}
     responseContent["D_ID"] = self.rng.randint(0, 100)
     op['resp_content']  = [responseContent]
     op['collection']    = tpccConstants.TABLENAME_DISTRICT
     op['type']          = constants.OP_TYPE_QUERY
     op['query_id']      = self.nextQueryId()
     op['query_content'] = [{"#query" : {"D_ID": d_id}}, {"#inc": {"D_YTD": h_amount}} ]
     op['query_fields']  = None
     op['resp_id']       = self.nextResponseId()
     op['query_time']    = self.nextTimestamp()
     op['resp_time']     = self.nextTimestamp()
     ops.append(op)
     
     return ops
Esempio n. 20
0
    def createPayment(self, params):
        ops = []
        w_id = params["w_id"]
        d_id = params["d_id"]
        h_amount = params["h_amount"]
        c_w_id = params["c_w_id"]
        c_d_id = params["c_d_id"]
        c_id = params["c_id"]
        c_last = params["c_last"]
        h_date = params["h_date"]

        op = Session.operationFactory()
        responseContent = {}
        responseContent["C_W_ID"] = self.rng.randint(0, 100)
        responseContent["C_D_ID"] = self.rng.randint(0, 100)
        responseContent["C_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_CUSTOMER
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "C_W_ID": w_id,
                "C_D_ID": d_id,
                "C_ID": c_id
            }
        }]
        op['query_fields'] = {
            "C_BALANCE": 0,
            "C_YTD_PAYMENT": 0,
            "C_PAYMENT_CNT": 0
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["W_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_WAREHOUSE
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"W_ID": w_id}}]
        op['query_fields'] = {
            "W_NAME": 1,
            "W_STREET_1": 1,
            "W_STREET_2": 1,
            "W_CITY": 1,
            "W_STATE": 1,
            "W_ZIP": 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["W_NAME"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_WAREHOUSE
        op['type'] = constants.OP_TYPE_UPDATE
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "W_NAME": "igmrhawo"
        }, {
            "#inc": {
                "W_YTD": 123
            }
        }]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        op['update_upsert'] = True
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_W_ID"] = self.rng.randint(0, 100)
        responseContent["D_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_DISTRICT
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{"#query": {"D_W_ID": w_id, "D_ID": d_id}}]
        op['query_fields'] = {
            "D_NAME": 1,
            "D_STREET_1": 1,
            "D_STREET_2": 1,
            "D_CITY": 1,
            "D_STATE": 1,
            "D_ZIP": 1
        }
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        op = Session.operationFactory()
        responseContent = {}
        responseContent["D_ID"] = self.rng.randint(0, 100)
        op['resp_content'] = [responseContent]
        op['collection'] = tpccConstants.TABLENAME_DISTRICT
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = self.nextQueryId()
        op['query_content'] = [{
            "#query": {
                "D_ID": d_id
            }
        }, {
            "#inc": {
                "D_YTD": h_amount
            }
        }]
        op['query_fields'] = None
        op['resp_id'] = self.nextResponseId()
        op['query_time'] = self.nextTimestamp()
        op['resp_time'] = self.nextTimestamp()
        ops.append(op)

        return ops
Esempio n. 21
0
    def setUp(self):
        MongoDBTestCase.setUp(self)

        # WORKLOAD
        timestamp = time.time()

        sess = self.metadata_db.Session()
        sess['session_id'] = 0
        sess['ip_client'] = "client:%d" % (1234 + 0)
        sess['ip_server'] = "server:5678"
        sess['start_time'] = timestamp

        # generate query 0 querying field00
        _id = str(random.random())
        queryId = long((0 << 16) + 0)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        # generate query 1 querying field01
        _id = str(random.random())
        queryId = long((1 << 16) + 1)
        queryContent = {}
        queryPredicates = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)
        projectionField = {}

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY
        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        # generate query 2 querying field00, field01
        _id = str(random.random())
        queryId = long((2 << 16) + 2)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        projectionField['field02'] = random.randint(0, 100)

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 3 querying field00, field01 but without projection field
        _id = str(random.random())
        queryId = long((2 << 16) + 3)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field01'] = random.randint(0, 100)
        queryContent['field01'] = responseContent['field01']
        queryPredicates['field01'] = constants.PRED_TYPE_EQUALITY

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 4 querying field00, field01 but it goes to collection 2
        _id = str(random.random())
        queryId = long((2 << 16) + 4)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME_2
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        # generate query 5 querying field00 but it goes to collection 3
        _id = str(random.random())
        queryId = long((2 << 16) + 5)
        queryContent = {}
        queryPredicates = {}
        projectionField = {}

        responseContent = {"_id": _id}
        responseId = (queryId << 8)

        responseContent['field00'] = random.randint(0, 100)
        queryContent['field00'] = responseContent['field00']
        queryPredicates['field00'] = constants.PRED_TYPE_EQUALITY

        queryContent = {
            constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent
        }
        op = Session.operationFactory()
        op['collection'] = CostModelTestCase.COLLECTION_NAME_3
        op['type'] = constants.OP_TYPE_QUERY
        op['query_id'] = queryId
        op['query_content'] = [queryContent]
        op['resp_content'] = [responseContent]
        op['resp_id'] = responseId
        op['predicates'] = queryPredicates
        op['query_time'] = timestamp
        op['query_fields'] = projectionField
        timestamp += 1
        op['resp_time'] = timestamp

        sess['operations'].append(op)

        sess['end_time'] = timestamp
        timestamp += 1

        sess.save()

        # Use the MongoSniffConverter to populate our metadata
        converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
        converter.no_mongo_parse = True
        converter.no_mongo_sessionizer = True
        converter.process()
        self.assertEqual(CostModelTestCase.NUM_SESSIONS,
                         self.metadata_db.Session.find().count())

        self.collections = dict([(c['name'], c)
                                 for c in self.metadata_db.Collection.fetch()])

        populated_workload = list(c for c in self.metadata_db.Session.fetch())
        self.workload = populated_workload
        # Increase the database size beyond what the converter derived from the workload
        for col_name, col_info in self.collections.iteritems():
            col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
            col_info['avg_doc_size'] = 1024  # bytes
            col_info['max_pages'] = col_info['doc_count'] * col_info[
                'avg_doc_size'] / (4 * 1024)
            col_info.save()
        #            print pformat(col_info)

        self.costModelConfig = {
            'max_memory': 1024,  # MB
            'skew_intervals': CostModelTestCase.NUM_INTERVALS,
            'address_size': 64,
            'nodes': CostModelTestCase.NUM_NODES,
            'window_size': 10
        }

        self.state = State(self.collections, populated_workload,
                           self.costModelConfig)