class TestDiskCostIndexesWithProjection(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) self.cm.no_index_insertion_penalty = True ## DEF def testDiskCostIndexes(self): """Check whether disk cost calculations work correctly""" # First get the disk cost when there are no indexes d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) cost0 = self.cm.getCost(d) print "diskCost0:", cost0 # The cost should be exactly equal to one, which means that every operation # has to perform a full sequential scan on the collection self.assertEqual(cost0, 1.0) # Now add one index. The disk cost should be lower d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) d.addIndex(col_info['name'], ["field01"]) self.state.invalidateCache(col_info['name']) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d) print "diskCost1:", cost1 self.assertGreater(cost0, cost1) # Now add one more index. The disk cost should be lower again d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) d.addIndex(col_info['name'], ["field01", "field00"]) self.state.invalidateCache(col_info['name']) self.cm.reset() self.cm.state.reset() cost2 = self.cm.getCost(d) print "diskCost2:", cost2 # Now add the one index. The disk cost should be much lower d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) d.addIndex(col_info['name'], ["field01", "field00", "field02"]) self.state.invalidateCache(col_info['name']) self.cm.reset() self.cm.state.reset() cost3 = self.cm.getCost(d) print "diskCost3:", cost3 self.assertGreater(cost2, cost3)
class TestWorkloadCombiner(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) self.cmn = NetworkCostComponent(self.state) self.col_names = [ x for x in self.collections.iterkeys()] ## DEF def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d.addCollection(col_info['name']) d.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str(number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload) ## DEF def testDiskCostChangesAfterQueryCombination(self): """ Assume we have collection A, B, C and we want to embed C to A If we build index on field00 of A and field02 of C The cost after query combination should be lower """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cm.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cm.reset() print "child collection ", self.cm.child_collections self.cm.state.reset() cost2 = self.cm.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0) ## def def testNetworkCostShouldReduceAfterQueryCombination(self): """ Network cost should be reduce after embedding collections """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cmn.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cmn.reset() self.cmn.state.reset() cost1 = self.cmn.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cmn.reset() self.cmn.state.reset() cost2 = self.cmn.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0) ## def def testNotCollectionEmbeddingProcessShouldReturnNone(self): """ If the given design has no collection embedding, we should return right away """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) combinedWorkload = combiner.process(d0) self.assertEqual(None, combinedWorkload)
class TestWorkloadCombiner(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) self.col_names = [ x for x in self.collections.iterkeys()] ## DEF def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str(number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload) def testDiskCostNotChangedAfterQueryCombination(self): """Disk cost should not be changed after query combination""" d = Design() d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) cost0 = self.cm.getCost(d) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() d = Design() for col_name in self.collections.iterkeys(): d.addCollection(col_name) self.state.invalidateCache(col_name) d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS) combinedWorkload = combiner.process(d) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d) print "cost1 " + str(cost1) self.assertEqual(cost0, cost1)
class TestDiskCostIndexes(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) self.cm.no_index_insertion_penalty = True # DEF def testDiskCostIndexes(self): """Check whether disk cost calculations work correctly""" # First get the disk cost when there are no indexes d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) cost0 = self.cm.getCost(d) print "diskCost0:", cost0 # The cost should be exactly equal to one, which means that every operation # has to perform a full sequential scan on the collection self.assertEqual(cost0, 1.0) # Now add the all indexes. The disk cost should be lower d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) d.addIndex(col_info['name'], col_info['interesting']) self.state.invalidateCache(col_info['name']) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d) print "diskCost1:", cost1 self.assertGreater(cost0, cost1) def testDiskCostOnDifferentIndexes(self): """Check how indexes will affect the disk cost""" # 1. Put index on both of the fields seperately d = Design() d.addCollection(CostModelTestCase.COLLECTION_NAME) d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field00"]) d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01"]) self.cm.reset() self.cm.state.reset() cost0 = self.cm.getCost(d) print "diskCost0:", cost0 # 3. Put indexes on both field together d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(CostModelTestCase.COLLECTION_NAME) d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01", "field00"]) self.state.invalidateCache(col_info['name']) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d) print "diskCost1:", cost1 self.assertGreater(cost0, cost1) def testDiskCostCaching(self): """Check whether disk cost calculations work correctly with caching enabled""" self.cm.cache_enable = True # Give the mofo a full Design with indexes d = Design() col_info = self.collections[CostModelTestCase.COLLECTION_NAME] d.addCollection(col_info['name']) d.addIndex(col_info['name'], col_info['interesting']) ## FOR cost0 = self.cm.getCost(d) print "diskCost0:", cost0 # FIXME self.assertGreater(cost0, 0.0) # We should get the same cost back after we execute it a second time cost1 = self.cm.getCost(d) print "diskCost1:", cost1
class TestWorkloadCombiner(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) self.cmn = NetworkCostComponent(self.state) self.col_names = [x for x in self.collections.iterkeys()] ## DEF def testQueriesCombination(self): """Test if the total number of queries are reduced""" original_number_of_queries = 0 for sess in self.workload: for op in sess["operations"]: original_number_of_queries += 1 print "orignal number of queries: " + str(original_number_of_queries) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d.addCollection(col_info['name']) d.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d) number_of_queries_from_combined_workload = 0 for sess in combinedWorkload: for op in sess["operations"]: number_of_queries_from_combined_workload += 1 print "number of queries after query combination: " + str( number_of_queries_from_combined_workload) self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload) ## DEF def testDiskCostChangesAfterQueryCombination(self): """ Assume we have collection A, B, C and we want to embed C to A If we build index on field00 of A and field02 of C The cost after query combination should be lower """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cm.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cm.reset() self.cm.state.reset() cost1 = self.cm.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cm.reset() print "child collection ", self.cm.child_collections self.cm.state.reset() cost2 = self.cm.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0) ## def def testNetworkCostShouldReduceAfterQueryCombination(self): """ Network cost should be reduce after embedding collections """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) cost0 = self.cmn.getCost(d0) print "cost0 " + str(cost0) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) # initialize a design with denormalization d1 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d1.addCollection(col_info['name']) d1.addIndex(col_info['name'], ['field00', 'field02']) self.state.invalidateCache(col_info['name']) d1.setDenormalizationParent("koalas", "apples") combinedWorkload = combiner.process(d1) self.state.updateWorkload(combinedWorkload) self.cmn.reset() self.cmn.state.reset() cost1 = self.cmn.getCost(d1) print "cost1 " + str(cost1) self.assertGreater(cost0, cost1) # Cost should remain the same after restoring the original workload self.state.restoreOriginalWorkload() self.cmn.reset() self.cmn.state.reset() cost2 = self.cmn.getCost(d0) print "cost2 " + str(cost2) self.assertEqual(cost2, cost0) ## def def testNotCollectionEmbeddingProcessShouldReturnNone(self): """ If the given design has no collection embedding, we should return right away """ d0 = Design() for i in xrange(len(CostModelTestCase.COLLECTION_NAMES)): col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[i]] d0.addCollection(col_info['name']) d0.addIndex(col_info['name'], ['field00', 'field02']) # Initialize a combiner combiner = WorkloadCombiner(self.col_names, self.workload) combinedWorkload = combiner.process(d0) self.assertEqual(None, combinedWorkload)
class TestDiskCost_IndexInsertionPenalty(CostModelTestCase): def setUp(self): CostModelTestCase.setUp(self) self.cm = DiskCostComponent(self.state) # DEF def testDiskCost_IndexInsertionPenalty(self): """ IndexInsertionPenalty should be high if we build bad indexes """ # 1 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field00"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p0 = self.cm.total_index_insertion_penalty # 2 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field01"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p1 = self.cm.total_index_insertion_penalty self.assertEqual(p0, p1) #3 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field00", "field01"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p2 = self.cm.total_index_insertion_penalty self.assertEqual(p0, p2) #4 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field00", "field02"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p3 = self.cm.total_index_insertion_penalty self.assertGreater(p3, p0) #5 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field01", "field02"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p4 = self.cm.total_index_insertion_penalty self.assertGreater(p4, p0) #6 d = Design() for col_name in CostModelTestCase.COLLECTION_NAMES: d.addCollection(col_name) d.addIndex(col_name, ["field00", "field01", "field02"]) ## FOR self.cm.reset() self.cm.state.reset() self.cm.getCost(d) p5 = self.cm.total_index_insertion_penalty self.assertGreater(p5, p0) ## DEF def testDiskCost_IndexInsertionPenalty_integrated_to_cost_component(self): """