def setUp(self): self.connector = MockConnector() self.algo = DropHeuristicAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.all_columns = [self.column_0, self.column_1, self.column_2] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_0 = Index([self.column_0]) self.index_1 = Index([self.column_1]) self.index_2 = Index([self.column_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [self.column_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_0, query_1]) self.algo.workload = self.workload self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock)
def setUp(self): self.connector = MockConnector() self.algo = IBMAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.column_3 = Column("Col3") self.column_4 = Column("Col4") self.column_5 = Column("Col5") self.column_6 = Column("Col6") self.column_7 = Column("Col7") self.all_columns = [ self.column_0, self.column_1, self.column_2, self.column_3, self.column_4, self.column_5, self.column_6, self.column_7, ] self.table = Table("Table0") self.table.add_columns(self.all_columns) self.query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, )
def test_possible_indexes(self): column_0_table_1 = Column("Col0") table_1 = Table("Table1") table_1.add_column(column_0_table_1) query = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [self.column_0, self.column_1, self.column_2, column_0_table_1], ) indexes = self.algo._possible_indexes(query) self.assertIn(Index([column_0_table_1]), indexes) self.assertIn(Index([self.column_0]), indexes) self.assertIn(Index([self.column_1]), indexes) self.assertIn(Index([self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_0]), indexes) self.assertIn(Index([self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_1, self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_2, self.column_1, self.column_0]), indexes)
def setUp(self): self.connector = MockConnector() self.algo = EPICAlgorithm(database_connector=self.connector) self.column_1 = Column("ColA") self.column_2 = Column("ColB") self.column_3 = Column("ColC") self.all_columns = [self.column_1, self.column_2, self.column_3] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_1 = Index([self.column_1]) self.index_1.estimated_size = 5 self.index_2 = Index([self.column_2]) self.index_2.estimated_size = 1 self.index_3 = Index([self.column_3]) self.index_3.estimated_size = 3 query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1]) query_2 = Query( 1, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_1, query_2], self.database_name) self.algo.workload = self.workload
def test_column_eq(self): table_1 = Table("TableA") table_2 = Table("TableA") column_1 = Column(name="ColA") column_2 = Column(name="ColA") column_3 = Column(name="ColB") # Column name equal but table (for both) is None with self.assertRaises(AssertionError): column_1 == column_2 # Column name different but table (for both) is None with self.assertRaises(AssertionError): column_1 == column_3 table_1.add_column(column_1) # Column name equal but table of column_2 is None with self.assertRaises(AssertionError): column_1 == column_2 # Column name equal but table of column_2 is None with self.assertRaises(AssertionError): column_2 == column_1 table_2.add_column(column_2) self.assertTrue(column_1 == column_2) table_2.add_column(column_3) self.assertFalse(column_1 == column_3) # Check comparing object of different class self.assertFalse(column_1 == int(3))
def test_table_eq_no_columns(self): table_1 = Table("TableA") table_2 = Table("TableA") table_3 = Table("TableB") self.assertTrue(table_1 == table_2) self.assertFalse(table_1 == table_3) # Check comparing object of different class self.assertFalse(table_1 == int(3))
def test_appendable_by_other_table(self): column = Column("ColZ") table = Table("TableZ") table.add_column(column) index_on_other_table = Index([column]) index_0 = Index([self.column_0]) self.assertFalse(index_0.appendable_by(index_on_other_table))
def setUpClass(cls): cls.column_a_0 = Column("Col0") cls.column_a_1 = Column("Col1") cls.table_a = Table("TableA") cls.table_a.add_columns([cls.column_a_0, cls.column_a_1]) cls.column_b_0 = Column("Col0") cls.table_b = Table("TableB") cls.table_b.add_columns([cls.column_b_0]) cls.index_0 = Index([cls.column_a_0]) cls.index_1 = Index([cls.column_b_0]) cls.index_2 = Index([cls.column_a_1])
def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.queries = [ Query(0, "SELECT * FROM TestTableA WHERE Col0 = 4", [cls.columns[0]]), Query(1, "SELECT * FROM TestTableA WHERE Col1 = 3", [cls.columns[1]]), Query( 2, "SELECT * FROM TestTableA WHERE Col0 = 14 AND Col1 = 13", [cls.columns[0], cls.columns[1]], ), ] cls.workload = Workload(cls.queries, cls.db_name)
def setUpClass(cls): cls.column_0 = Column("Col0") cls.column_1 = Column("Col1") cls.column_2 = Column("Col2") columns = [cls.column_0, cls.column_1, cls.column_2] cls.table = Table("TableA") cls.table.add_columns(columns)
def setUpClass(cls): cls.column_0 = Column("Col0") cls.column_1 = Column("Col1") cls.column_2 = Column("Col2") cls.table = Table("Table0") cls.table.add_columns([cls.column_0, cls.column_1, cls.column_2]) cls.column_0_table_1 = Column("Col0") cls.table_1 = Table("Table1") cls.table_1.add_column(cls.column_0_table_1) cls.query_0 = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [cls.column_0, cls.column_1, cls.column_2, cls.column_0_table_1], )
def test_merge(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1]) self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1, self.column_2]) self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_0]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1]) self.assertEqual(result, expected) # Example from Bruno's paper column_a = Column("a") column_b = Column("b") column_c = Column("c") column_d = Column("d") column_e = Column("e") column_f = Column("f") column_g = Column("g") columns = [ column_a, column_b, column_c, column_d, column_e, column_f, column_g ] table = Table("TableB") table.add_columns(columns) index_1 = Index( [column_a, column_b, column_c, column_d, column_e, column_f]) index_2 = Index([column_c, column_d, column_g, column_e]) result = index_merge(index_1, index_2) expected = Index([ column_a, column_b, column_c, column_d, column_e, column_f, column_g ]) self.assertEqual(result, expected)
def test_runtime_data_logging(self): db = PostgresDatabaseConnector(self.db_name, "postgres") query = Query(17, "SELECT count(*) FROM nation;") db.get_cost(query) self.assertEqual(db.cost_estimations, 1) self.assertGreater(db.cost_estimation_duration, 0) column_n_name = Column("n_name") nation_table = Table("nation") nation_table.add_column(column_n_name) index = Index([column_n_name]) index_oid = db.simulate_index(index)[0] self.assertGreater(db.index_simulation_duration, 0) self.assertEqual(db.simulated_indexes, 1) previou_simulation_duration = db.index_simulation_duration db.drop_simulated_index(index_oid) self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
def test_workload_indexable_columns(self): table = Table("TableA") column_1 = Column(name="ColA") column_2 = Column(name="ColB") column_3 = Column(name="ColC") table.add_column(column_1) table.add_column(column_2) table.add_column(column_3) query_1 = Query( 17, "SELECT * FROM TableA WHERE ColA = 4 AND ColB = 5;", columns=[column_1, column_2], ) query_2 = Query( 18, "SELECT * FROM TableA WHERE ColA = 3 AND ColC = 2;", columns=[column_1, column_3], ) database_name = "test_DB" workload = Workload([query_1, query_2], database_name) indexable_columns = workload.indexable_columns() self.assertEqual(sorted(indexable_columns), sorted([column_1, column_2, column_3]))
def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.index_0 = Index([cls.columns[0]]) cls.index_1 = Index([cls.columns[1]]) cls.index_2 = Index([cls.columns[2]])
def test_table_add_column(self): table = Table("TableA") column_1 = Column("ColA") table.add_column(column_1) self.assertEqual(table.columns, [column_1]) self.assertEqual(column_1.table, table) column_2 = Column("ColB") column_3 = Column("ColC") table.add_columns([column_2, column_3]) self.assertEqual(table.columns, [column_1, column_2, column_3]) self.assertEqual(column_2.table, table) self.assertEqual(column_3.table, table)
class TestDropHeuristicAlgorithm(unittest.TestCase): def setUp(self): self.connector = MockConnector() self.algo = DropHeuristicAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.all_columns = [self.column_0, self.column_1, self.column_2] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_0 = Index([self.column_0]) self.index_1 = Index([self.column_1]) self.index_2 = Index([self.column_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [self.column_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_0, query_1]) self.algo.workload = self.workload self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock) def test_drop_heuristic_algoritm(self): # Should use default parameters if none are specified self.assertEqual(self.algo.parameters["max_indexes"], 15) self.assertEqual(self.algo.cost_evaluation.cost_estimation, "whatif") def _calculate_cost_mock(self, workload, remaining_indexes): assert isinstance( remaining_indexes, set ), "The cost mock for the drop heuristic should only be called with index sets" index_combination_str = utils.index_combination_to_str( remaining_indexes) # In the first round, the lowest cost is achieved, if col1 is dropped. # In the second round, if col0 is dropped. index_combination_cost = { "tablea_col0_idx||tablea_col1_idx": 80, "tablea_col0_idx||tablea_col2_idx": 60, "tablea_col1_idx||tablea_col2_idx": 70, "tablea_col0_idx": 110, "tablea_col2_idx": 100, } return index_combination_cost[index_combination_str] def test_calculate_best_indexes_all_fit(self): self.algo.parameters["max_indexes"] = 3 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = frozenset( [self.index_0, self.index_1, self.index_2]) self.assertEqual(indexes, expected_indexes) def test_calculate_best_indexes_two_fit(self): self.algo.parameters["max_indexes"] = 2 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = frozenset([self.index_0, self.index_2]) self.assertEqual(indexes, expected_indexes) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_0, self.index_1])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_0, self.index_2])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_1, self.index_2])) self.assertEqual(self.algo.cost_evaluation.calculate_cost.call_count, 3) def test_calculate_best_indexes_one_fits(self): self.algo.parameters["max_indexes"] = 1 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = frozenset([self.index_2]) self.assertEqual(indexes, expected_indexes) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_0, self.index_1])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_0, self.index_2])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_1, self.index_2])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_0])) self.algo.cost_evaluation.calculate_cost.assert_any_call( self.workload, set([self.index_2])) self.assertEqual(self.algo.cost_evaluation.calculate_cost.call_count, 5) def test_calculate_best_indexes_none_fits(self): self.algo.parameters["max_indexes"] = 0 with self.assertRaises(AssertionError): self.algo._calculate_best_indexes(self.workload)
class TestIBMAlgorithm(unittest.TestCase): def setUp(self): self.connector = MockConnector() self.algo = IBMAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.column_3 = Column("Col3") self.column_4 = Column("Col4") self.column_5 = Column("Col5") self.column_6 = Column("Col6") self.column_7 = Column("Col7") self.all_columns = [ self.column_0, self.column_1, self.column_2, self.column_3, self.column_4, self.column_5, self.column_6, self.column_7, ] self.table = Table("Table0") self.table.add_columns(self.all_columns) self.query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) # query_1 = Query(1, 'SELECT * FROM TableA WHERE ColA = 4;', [self.column_0]) def test_ibm_algorithm(self): # Should use default parameters if none are specified budget_in_mb = 500 self.assertEqual(self.algo.disk_constraint, budget_in_mb * MB_TO_BYTES) self.assertEqual(self.algo.cost_evaluation.cost_estimation, "whatif") self.assertEqual(self.algo.seconds_limit, 10) self.assertEqual(self.algo.maximum_remove, 4) def test_possible_indexes(self): column_0_table_1 = Column("Col0") table_1 = Table("Table1") table_1.add_column(column_0_table_1) query = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [self.column_0, self.column_1, self.column_2, column_0_table_1], ) indexes = self.algo._possible_indexes(query) self.assertIn(Index([column_0_table_1]), indexes) self.assertIn(Index([self.column_0]), indexes) self.assertIn(Index([self.column_1]), indexes) self.assertIn(Index([self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_0]), indexes) self.assertIn(Index([self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_1, self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_2, self.column_1, self.column_0]), indexes) def test_recommended_indexes(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): plan = { "Total Cost": 17, "Plans": [{ "Index Name": "<1337>btree_(C table0.col1,)", "Filter": "(Col0 = 1)", }], } return plan query = Query( 17, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;", [self.column_0, self.column_1], ) self.algo.database_connector.get_plan = MagicMock( side_effect=_simulate_get_plan) self.algo.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock) self.algo.what_if.drop_all_simulated_indexes = MagicMock() indexes, cost = self.algo._recommended_indexes(query) self.assertEqual(cost, 17) self.assertEqual(indexes, {Index([self.column_1])}) self.assertEqual(self.algo.what_if.simulate_index.call_count, 4) self.algo.what_if.drop_all_simulated_indexes.assert_called_once() self.algo.database_connector.get_plan.assert_called_once_with(query) def test_exploit_virtual_indexes(self): def _simulate_index_mock(index, store_size): index.hypopg_name = f"<1337>btree_{index.columns}" # For some reason, the database decides to only use an index for one of # the filters def _simulate_get_plan(query): if "Table0" in query.text: return { "Total Cost": 17, "Plans": [{ "Index Name": "<1337>btree_(C table0.col1,)" }], } return { "Total Cost": 5, "Plans": [{ "Simple Table Retrieve": "table1" }] } query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;", [self.column_0, self.column_1], ) query_1 = Query(1, "SELECT * FROM Table1;", []) workload = Workload([query_0, query_1], "database_name") self.algo.database_connector.get_plan = MagicMock( side_effect=_simulate_get_plan) self.algo.what_if.simulate_index = MagicMock( side_effect=_simulate_index_mock) self.algo.what_if.drop_all_simulated_indexes = MagicMock() query_results, index_candidates = self.algo._exploit_virtual_indexes( workload) self.assertEqual(len(query_results), len(workload.queries)) expected_first_result = { "cost_without_indexes": 17, "cost_with_recommended_indexes": 17, "recommended_indexes": set([Index([self.column_1])]), } expected_second_result = { "cost_without_indexes": 5, "cost_with_recommended_indexes": 5, "recommended_indexes": set(), } self.assertEqual(query_results[query_0], expected_first_result) self.assertEqual(query_results[query_1], expected_second_result) self.assertEqual(index_candidates, set([Index([self.column_1])])) def test_calculate_index_benefits(self): index_0 = Index([self.column_0]) index_0.estimated_size = 5 index_1 = Index([self.column_1]) index_1.estimated_size = 1 index_2 = Index([self.column_2]) index_2.estimated_size = 3 query_result_0 = { "cost_without_indexes": 100, "cost_with_recommended_indexes": 50, "recommended_indexes": [index_0, index_1], } # Yes, negative benefit is possible query_result_1 = { "cost_without_indexes": 50, "cost_with_recommended_indexes": 60, "recommended_indexes": [index_1], } query_result_2 = { "cost_without_indexes": 60, "cost_with_recommended_indexes": 57, "recommended_indexes": [index_2], } query_result_3 = { "cost_without_indexes": 60, "cost_with_recommended_indexes": 60, "recommended_indexes": [], } query_results = { "q0": query_result_0, "q1": query_result_1, "q2": query_result_2, "q3": query_result_3, } index_benefits = self.algo._calculate_index_benefits( [index_0, index_1, index_2], query_results) expected_index_benefits = [ IndexBenefit(index_1, 40), IndexBenefit(index_0, 50), IndexBenefit(index_2, 3), ] self.assertEqual(index_benefits, expected_index_benefits) def test_combine_subsumed(self): index_0_1 = Index([self.column_0, self.column_1]) index_0_1.estimated_size = 2 index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 1 # Scenario 1. Index subsumed because better ratio for larger index index_benefits = [ IndexBenefit(index_0_1, 21), IndexBenefit(index_0, 10) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1, 31)] self.assertEqual(subsumed, expected) # Scenario 2. Index not subsumed because better index has fewer attributes index_benefits = [ IndexBenefit(index_0, 11), IndexBenefit(index_0_1, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0, 11), IndexBenefit(index_0_1, 20)] self.assertEqual(subsumed, expected) # Scenario 3. Index not subsumed because last element does not match # attribute even though better ratio index_0_1_2 = Index([self.column_0, self.column_1, self.column_2]) index_0_1_2.estimated_size = 3 index_0_2 = Index([self.column_0, self.column_2]) index_0_2.estimated_size = 2 index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_2, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_2, 20)] self.assertEqual(subsumed, expected) # Scenario 4. Multi Index subsumed index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_1, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 51)] self.assertEqual(subsumed, expected) # Scenario 5. Multiple Indexes subsumed index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_1, 20), IndexBenefit(index_0, 10), ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 61)] self.assertEqual(subsumed, expected) # Scenario 6. Input returned if len(input) < 2 subsumed = self.algo._combine_subsumed([IndexBenefit(index_0_1, 21)]) expected = [IndexBenefit(index_0_1, 21)] self.assertEqual(subsumed, expected) # Scenario 7. Input not sorted by ratio throws with self.assertRaises(AssertionError): subsumed = self.algo._combine_subsumed( [IndexBenefit(index_0, 10), IndexBenefit(index_0_1, 21)]) def test_evaluate_workload(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) self.algo.cost_evaluation.calculate_cost = MagicMock() self.algo._evaluate_workload( [IndexBenefit(index_0, 10), IndexBenefit(index_1, 9)], workload=[]) self.algo.cost_evaluation.calculate_cost.assert_called_once_with( [], [index_0, index_1]) def test_try_variations_time_limit(self): index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 1 index_2 = Index([self.column_2]) index_2.estimated_size = 1 index_3 = Index([self.column_3]) index_3.estimated_size = 1 index_4 = Index([self.column_4]) index_4.estimated_size = 1 index_5 = Index([self.column_5]) index_5.estimated_size = 1 index_6 = Index([self.column_6]) index_6.estimated_size = 1 index_7 = Index([self.column_7]) index_7.estimated_size = 5 self.algo.cost_evaluation.calculate_cost = MagicMock(return_value=17) self.algo.seconds_limit = 0.2 time_before = time.time() self.algo._try_variations( selected_index_benefits=frozenset([IndexBenefit(index_0, 1)]), index_benefits=frozenset([IndexBenefit(index_1, 1)]), workload=[], ) self.assertGreaterEqual(time.time(), time_before + self.algo.seconds_limit) def fake(selected, workload): cost = 10 if IndexBenefit(index_3, 1.5) in selected: cost -= 0.5 if IndexBenefit(index_4, 0.5) in selected: cost += 0.5 if IndexBenefit(index_1, 0.5) in selected: cost += 0.5 if IndexBenefit(index_1, 0.5) in selected: cost += 0.5 return cost # In this scenario a good index has not been selected (index_3). # We test three things: # (i) That index_3 gets chosen by variation. # (ii) That the weakest index from the original selection gets # removed (index_1). # (iii) That index_4 does not get chosen even though it is better than index_1. self.algo._evaluate_workload = fake self.algo.maximum_remove = 1 self.algo.disk_constraint = 3 new = self.algo._try_variations( selected_index_benefits=frozenset([ IndexBenefit(index_0, 1), IndexBenefit(index_1, 0.5), IndexBenefit(index_2, 1), ]), index_benefits=frozenset([ IndexBenefit(index_0, 1), IndexBenefit(index_1, 0.5), IndexBenefit(index_2, 1), IndexBenefit(index_3, 1.5), IndexBenefit(index_4, 0.6), ]), workload=[], ) self.assertIn(IndexBenefit(index_3, 1.5), new) self.assertNotIn(IndexBenefit(index_4, 0.5), new) self.assertNotIn(IndexBenefit(index_1, 0.5), new) # Test that good index is not chosen because of storage restrictions new = self.algo._try_variations( selected_index_benefits=frozenset([IndexBenefit(index_0, 1)]), index_benefits=frozenset( [IndexBenefit(index_0, 1), IndexBenefit(index_7, 5)]), workload=[], ) self.assertEqual(new, set([IndexBenefit(index_0, 1)]))
def test_table_eq_with_columns(self): table_1 = Table("TableA") table_1.add_column(Column("ColA")) table_2 = Table("TableA") self.assertFalse(table_1 == table_2) table_2.add_column(Column("ColA")) self.assertTrue(table_1 == table_2) table_1.add_column(Column("ColB")) table_1.add_column(Column("ColC")) self.assertFalse(table_1 == table_2) table_2.add_column(Column("ColB")) table_2.add_column(Column("ColC")) self.assertTrue(table_1 == table_2) # Testing same column names, but different order table_3 = Table("TableA") table_3.add_column(Column("ColC")) table_3.add_column(Column("ColB")) table_3.add_column(Column("ColA")) self.assertFalse(table_1 == table_3)
def test_table_repr(self): table = Table("TableA") self.assertEqual(repr(table), "tablea")
def test_column_added_to_table(self): column = Column(name="ColA") table = Table("TableA") table.add_column(column) self.assertEqual(column.table, table)
def test_split(self): # If there are no common columns, index splits are undefined index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) result = index_split(index_0, index_1) expected = None self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_0 = Index([self.column_0]) expected = {common_column_index, residual_column_index_0} self.assertEqual(result, expected) index_0 = Index([self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_1 = Index([self.column_2]) expected = {common_column_index, residual_column_index_1} self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_0 = Index([self.column_0]) residual_column_index_1 = Index([self.column_2]) expected = { common_column_index, residual_column_index_0, residual_column_index_1, } self.assertEqual(result, expected) # Example from Bruno's paper column_a = Column("a") column_b = Column("b") column_c = Column("c") column_d = Column("d") column_e = Column("e") column_f = Column("f") column_g = Column("g") columns = [ column_a, column_b, column_c, column_d, column_e, column_f, column_g ] table = Table("TableB") table.add_columns(columns) index_1 = Index( [column_a, column_b, column_c, column_d, column_e, column_f]) index_2 = Index([column_c, column_a, column_e]) index_3 = Index([column_a, column_b, column_d, column_g]) result = index_split(index_1, index_2) # expected is different from the paper, because there was an error for I_R2 expected = { Index([column_a, column_c, column_e]), Index([column_b, column_d, column_f]), } self.assertEqual(result, expected) result = index_split(index_1, index_3) # expected is different from the paper, # because all columns are part of the key (there is no suffix) expected = { Index([column_a, column_b, column_d]), Index([column_c, column_e, column_f]), Index([column_g]), } self.assertEqual(result, expected)
class TestDB2AdvisAlgorithm(unittest.TestCase): def setUp(self): self.connector = MockConnector() self.algo = DB2AdvisAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.column_3 = Column("Col3") self.column_4 = Column("Col4") self.column_5 = Column("Col5") self.column_6 = Column("Col6") self.column_7 = Column("Col7") self.all_columns = [ self.column_0, self.column_1, self.column_2, self.column_3, self.column_4, self.column_5, self.column_6, self.column_7, ] self.table = Table("Table0") self.table.add_columns(self.all_columns) self.query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) # query_1 = Query(1, 'SELECT * FROM TableA WHERE ColA = 4;', [self.column_0]) def test_db2advis_algorithm(self): # Should use default parameters if none are specified budget_in_mb = 500 self.assertEqual(self.algo.disk_constraint, budget_in_mb * MB_TO_BYTES) self.assertEqual(self.algo.cost_evaluation.cost_estimation, "whatif") self.assertEqual(self.algo.try_variations_seconds, 10) self.assertEqual(self.algo.try_variations_max_removals, 4) def test_index_benefit__lt__(self): index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 2 # Due to its size, index_0 has the better ratio index_benefit_0 = IndexBenefit(index_0, 10) index_benefit_1 = IndexBenefit(index_1, 10) self.assertTrue(index_benefit_1 < index_benefit_0) # The ratios are equal, the columns are taken into consideration index_benefit_1 = IndexBenefit(index_1, 20) self.assertTrue(index_benefit_0 < index_benefit_1) def test_calculate_index_benefits(self): index_0 = Index([self.column_0]) index_0.estimated_size = 5 index_1 = Index([self.column_1]) index_1.estimated_size = 1 index_2 = Index([self.column_2]) index_2.estimated_size = 3 query_result_0 = { "cost_without_indexes": 100, "cost_with_indexes": 50, "utilized_indexes": [index_0, index_1], } # Yes, negative benefit is possible query_result_1 = { "cost_without_indexes": 50, "cost_with_indexes": 60, "utilized_indexes": [index_1], } query_result_2 = { "cost_without_indexes": 60, "cost_with_indexes": 57, "utilized_indexes": [index_2], } query_result_3 = { "cost_without_indexes": 60, "cost_with_indexes": 60, "utilized_indexes": [], } query_results = { "q0": query_result_0, "q1": query_result_1, "q2": query_result_2, "q3": query_result_3, } index_benefits = self.algo._calculate_index_benefits( [index_0, index_1, index_2], query_results) expected_index_benefits = [ IndexBenefit(index_1, 40), IndexBenefit(index_0, 50), IndexBenefit(index_2, 3), ] self.assertEqual(index_benefits, expected_index_benefits) def test_combine_subsumed(self): index_0_1 = Index([self.column_0, self.column_1]) index_0_1.estimated_size = 2 index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 1 # Scenario 1. Index subsumed because better ratio for larger index index_benefits = [ IndexBenefit(index_0_1, 21), IndexBenefit(index_0, 10) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1, 31)] self.assertEqual(subsumed, expected) # Scenario 2. Index not subsumed because better index has fewer attributes index_benefits = [ IndexBenefit(index_0, 11), IndexBenefit(index_0_1, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0, 11), IndexBenefit(index_0_1, 20)] self.assertEqual(subsumed, expected) # Scenario 3. Index not subsumed because last element does not match # attribute even though better ratio index_0_1_2 = Index([self.column_0, self.column_1, self.column_2]) index_0_1_2.estimated_size = 3 index_0_2 = Index([self.column_0, self.column_2]) index_0_2.estimated_size = 2 index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_2, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_2, 20)] self.assertEqual(subsumed, expected) # Scenario 4. Multi Index subsumed index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_1, 20) ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 51)] self.assertEqual(subsumed, expected) # Scenario 5. Multiple Indexes subsumed index_benefits = [ IndexBenefit(index_0_1_2, 31), IndexBenefit(index_0_1, 20), IndexBenefit(index_0, 10), ] subsumed = self.algo._combine_subsumed(index_benefits) expected = [IndexBenefit(index_0_1_2, 61)] self.assertEqual(subsumed, expected) # Scenario 6. Input returned if len(input) < 2 subsumed = self.algo._combine_subsumed([IndexBenefit(index_0_1, 21)]) expected = [IndexBenefit(index_0_1, 21)] self.assertEqual(subsumed, expected) # Scenario 7. Input not sorted by ratio throws with self.assertRaises(AssertionError): subsumed = self.algo._combine_subsumed( [IndexBenefit(index_0, 10), IndexBenefit(index_0_1, 21)]) def test_evaluate_workload(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) self.algo.cost_evaluation.calculate_cost = MagicMock() self.algo._evaluate_workload( [IndexBenefit(index_0, 10), IndexBenefit(index_1, 9)], workload=[]) self.algo.cost_evaluation.calculate_cost.assert_called_once_with( [], [index_0, index_1]) def test_try_variations_time_limit(self): index_0 = Index([self.column_0]) index_0.estimated_size = 1 index_1 = Index([self.column_1]) index_1.estimated_size = 1 index_2 = Index([self.column_2]) index_2.estimated_size = 1 index_3 = Index([self.column_3]) index_3.estimated_size = 1 index_4 = Index([self.column_4]) index_4.estimated_size = 1 index_5 = Index([self.column_5]) index_5.estimated_size = 1 index_6 = Index([self.column_6]) index_6.estimated_size = 1 index_7 = Index([self.column_7]) index_7.estimated_size = 5 self.algo.cost_evaluation.calculate_cost = MagicMock(return_value=17) self.algo.try_variations_seconds = 0.2 time_before = time.time() self.algo._try_variations( selected_index_benefits=frozenset([IndexBenefit(index_0, 1)]), index_benefits=frozenset([IndexBenefit(index_1, 1)]), workload=[], ) self.assertGreaterEqual(time.time(), time_before + self.algo.try_variations_seconds) def fake(selected, workload): cost = 10 if IndexBenefit(index_3, 1.5) in selected: cost -= 0.5 if IndexBenefit(index_4, 0.5) in selected: cost += 0.5 if IndexBenefit(index_1, 0.5) in selected: cost += 0.5 if IndexBenefit(index_1, 0.5) in selected: cost += 0.5 return cost # In this scenario a good index has not been selected (index_3). # We test three things: # (i) That index_3 gets chosen by variation. # (ii) That the weakest index from the original selection gets # removed (index_1). # (iii) That index_4 does not get chosen even though it is better than index_1. self.algo._evaluate_workload = fake self.algo.try_variations_max_removals = 1 self.algo.disk_constraint = 3 new = self.algo._try_variations( selected_index_benefits=frozenset([ IndexBenefit(index_0, 1), IndexBenefit(index_1, 0.5), IndexBenefit(index_2, 1), ]), index_benefits=frozenset([ IndexBenefit(index_0, 1), IndexBenefit(index_1, 0.5), IndexBenefit(index_2, 1), IndexBenefit(index_3, 1.5), IndexBenefit(index_4, 0.6), ]), workload=[], ) self.assertIn(IndexBenefit(index_3, 1.5), new) self.assertNotIn(IndexBenefit(index_4, 0.5), new) self.assertNotIn(IndexBenefit(index_1, 0.5), new) # Test that good index is not chosen because of storage restrictions new = self.algo._try_variations( selected_index_benefits=frozenset([IndexBenefit(index_0, 1)]), index_benefits=frozenset( [IndexBenefit(index_0, 1), IndexBenefit(index_7, 5)]), workload=[], ) self.assertEqual(new, set([IndexBenefit(index_0, 1)]))
def test_table(self): table = Table("TableA") self.assertEqual(table.name, "tablea") self.assertEqual(table.columns, [])
def test_column_repr(self): column = Column(name="ColA") table = Table("TableA") table.add_column(column) self.assertEqual(repr(column), "C tablea.cola")
class TestEpicAlgorithm(unittest.TestCase): def setUp(self): self.connector = MockConnector() self.algo = EPICAlgorithm(database_connector=self.connector) self.column_1 = Column("ColA") self.column_2 = Column("ColB") self.column_3 = Column("ColC") self.all_columns = [self.column_1, self.column_2, self.column_3] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_1 = Index([self.column_1]) self.index_1.estimated_size = 5 self.index_2 = Index([self.column_2]) self.index_2.estimated_size = 1 self.index_3 = Index([self.column_3]) self.index_3.estimated_size = 3 query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1]) query_2 = Query( 1, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_1, query_2], self.database_name) self.algo.workload = self.workload def test_attach_to_indexes(self): index_combination = [self.index_1, self.index_2] candidate = self.index_3 self.algo.initial_cost = 10 best = {"combination": [], "benefit_to_size_ratio": 0} self.algo._evaluate_combination = MagicMock() self.algo._attach_to_indexes( index_combination, candidate, best, self.algo.initial_cost ) first_new_combination = [ index_combination[1], Index(index_combination[0].columns + candidate.columns), ] self.algo._evaluate_combination.assert_any_call( first_new_combination, best, self.algo.initial_cost, 5 ) second_new_combination = [ index_combination[0], Index(index_combination[1].columns + candidate.columns), ] self.algo._evaluate_combination.assert_any_call( second_new_combination, best, self.algo.initial_cost, 1 ) multi_column_candidate = Index([self.column_2, self.column_3]) with self.assertRaises(AssertionError): self.algo._attach_to_indexes( index_combination, multi_column_candidate, best, self.algo.initial_cost ) def test_remove_impossible_canidates(self): # All Fit candidates = [self.index_1, self.index_2, self.index_3] self.algo.budget = 10 new_candidates = self.algo._get_candidates_within_budget( index_combination_size=0, candidates=candidates ) expected_candidates = candidates self.assertEqual(new_candidates, expected_candidates) # None fit because of algorithm budget self.algo.budget = 0 new_candidates = self.algo._get_candidates_within_budget( index_combination_size=0, candidates=candidates ) expected_candidates = [] self.assertEqual(new_candidates, expected_candidates) # None fit because of index_combination_size self.algo.budget = 10 new_candidates = self.algo._get_candidates_within_budget( index_combination_size=10, candidates=candidates ) expected_candidates = [] self.assertEqual(new_candidates, expected_candidates) # Some do not fit self.algo.budget = 4 new_candidates = self.algo._get_candidates_within_budget( index_combination_size=1, candidates=candidates ) expected_candidates = [self.index_2, self.index_3] self.assertEqual(new_candidates, expected_candidates) # Index with size none is not removed even though there is no space left self.index_1.estimated_size = None self.algo.budget = 0 new_candidates = self.algo._get_candidates_within_budget( index_combination_size=0, candidates=candidates ) expected_candidates = [self.index_1] self.assertEqual(new_candidates, expected_candidates) def test_evaluate_combination_worse_ratio(self): # Mock the internal algorithm state best_old = {"combination": [self.index_2], "benefit_to_size_ratio": 10} best_input = best_old.copy() new_index_combination = [self.index_1] # Mock internally called cost function self.algo.cost_evaluation.calculate_cost = MagicMock(return_value=4) self.algo.initial_cost = 14 cost = self.algo.cost_evaluation.calculate_cost( self.workload, new_index_combination, store_size=True ) benefit = self.algo.initial_cost - cost size = sum(x.estimated_size for x in new_index_combination) ratio = benefit / size assert best_old["benefit_to_size_ratio"] >= ratio # Above's specification leads to a benefit of 10. The index cost is 5. # The ratio is 2 which is worse than above's 10. Hence, best_input # should not change self.algo._evaluate_combination( new_index_combination, best_input, self.algo.initial_cost ) expected_best = best_old self.assertEqual(expected_best, best_input) def test_evaluate_combination_better_ratio_too_large(self): # Mock the internal algorithm state self.algo.budget = 2 best_old = { "combination": [self.index_2], "benefit_to_size_ratio": 1, "cost": 4, } best_input = best_old.copy() new_index_combination = [self.index_1] # Mock internally called cost function self.algo.cost_evaluation.calculate_cost = MagicMock(return_value=4) self.algo.initial_cost = 14 cost = self.algo.cost_evaluation.calculate_cost( self.workload, new_index_combination, store_size=True ) benefit = self.algo.initial_cost - cost size = sum(x.estimated_size for x in new_index_combination) ratio = benefit / size assert best_old["benefit_to_size_ratio"] < ratio # Above's specification leads to a benefit of 10. The index cost is 5. # The ratio is 2 which is better than above's 1. But the remaining budget # is not sufficient. self.algo._evaluate_combination( new_index_combination, best_input, self.algo.initial_cost ) expected_best = best_old self.assertEqual(expected_best, best_input) def test_evaluate_combination_better_ratio(self): # Mock the internal algorithm state best_old = { "combination": [self.index_2], "benefit_to_size_ratio": 1, } best_input = best_old.copy() new_index_combination = [self.index_1] # Mock internally called cost function self.algo.cost_evaluation.calculate_cost = MagicMock(return_value=4) self.algo.initial_cost = 14 # Above's specification leads to a benefit of 10. The index cost is 5. # The ratio is 2 which is better than above's 1. Hence, best_input should change self.algo._evaluate_combination( new_index_combination, best_input, self.algo.initial_cost ) expected_best = { "combination": new_index_combination, "benefit_to_size_ratio": 2, "cost": 4, } self.assertEqual(expected_best, best_input) def test_epic_algoritm(self): # Should use default parameters if none are specified budget_in_mb = 10 self.assertEqual(self.algo.budget, budget_in_mb * MB_TO_BYTES) self.assertEqual(self.algo.cost_evaluation.cost_estimation, "whatif") def _assign_size_1(self, index): index_sizes = { "tablea_cola_idx": 1, "tablea_colb_idx": 1, "tablea_colc_idx": 1, "tablea_colb_cola_idx": 2, "tablea_colb_colc_idx": 2, "tablea_colc_cola_idx": 2, "tablea_colc_colb_idx": 2, } # Assume large size if index not in mocked size table if index.index_idx() not in index_sizes: return sys.maxsize index.estimated_size = index_sizes[index.index_idx()] def _calculate_cost_mock_1(self, workload, indexes, store_size): for index in indexes: self._assign_size_1(index) index_combination_str = index_combination_to_str(indexes) index_combination_cost = { "": 100, "tablea_cola_idx": 90, "tablea_colb_idx": 80, "tablea_colc_idx": 70, "tablea_cola_idx||tablea_colc_idx": 60, "tablea_colb_idx||tablea_colc_idx": 50, "tablea_colc_idx||tablea_cola_idx": 60, "tablea_colc_idx||tablea_colb_idx": 50, "tablea_cola_idx||tablea_colb_idx||tablea_colc_idx": 40, # Below here multi, they do not result in benefit "tablea_colb_idx||tablea_colc_cola_idx": 1000, "tablea_colb_idx||tablea_colc_colb_idx": 1000, "tablea_colb_cola_idx||tablea_colc_idx": 1000, "tablea_colb_colc_idx||tablea_colc_idx": 1000, "tablea_colc_cola_idx": 1000, "tablea_colc_colb_idx": 1000, "tablea_colc_idx||tablea_colb_cola_idx": 1000, "tablea_colc_idx||tablea_colb_colc_idx": 1000, "tablea_colc_cola_idx||tablea_colb_idx": 1000, "tablea_colc_colb_idx||tablea_colb_idx": 1000, } # Assume high cost if index not in mocked cost table if index_combination_str not in index_combination_cost: return sys.maxsize return index_combination_cost[index_combination_str] # In this scenario, only single column indexes make sense. # They all have the same size but different benefits. def test_calculate_best_indexes_scenario_1(self): self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_1 ) # Each one alone of the single column indexes would fit, # but the one with the best benefit/cost ratio is chosen self.algo.budget = 1 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_3])] self.assertEqual(indexes, expected_indexes) # Two single column indexes would fit, but the two best ones are chosen self.algo.budget = 2 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_3]), Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # All single column indexes are chosen self.algo.budget = 3 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [ Index([self.column_3]), Index([self.column_2]), Index([self.column_1]), ] self.assertEqual(indexes, expected_indexes) def _assign_size_2(self, index): index_sizes = { "tablea_cola_idx": 1, "tablea_colb_idx": 3, "tablea_colc_idx": 5, "tablea_cola_colb_idx": 20, "tablea_cola_colc_idx": 20, "tablea_colb_cola_idx": 20, "tablea_colb_colc_idx": 20, "tablea_colc_cola_idx": 20, "tablea_colc_colb_idx": 20, } index.estimated_size = index_sizes[index.index_idx()] def _calculate_cost_mock_2(self, workload, indexes, store_size): for index in indexes: self._assign_size_2(index) index_combination_str = index_combination_to_str(indexes) index_combination_cost = { "": 100, "tablea_cola_idx": 90, "tablea_colb_idx": 80, "tablea_colc_idx": 70, "tablea_cola_idx||tablea_colb_idx": 70, "tablea_cola_idx||tablea_colc_idx": 60, "tablea_colb_idx||tablea_colc_idx": 50, "tablea_cola_idx||tablea_colb_idx||tablea_colc_idx": 40, # Below here multi, they do not result in benefit "tablea_cola_colb_idx": 1000, "tablea_cola_colc_idx": 1000, "tablea_colb_colc_idx": 1000, "tablea_cola_idx||tablea_colb_cola_idx": 1000, "tablea_cola_idx||tablea_colb_colc_idx": 1000, "tablea_cola_idx||tablea_colc_cola_idx": 1000, "tablea_cola_idx||tablea_colc_colb_idx": 1000, "tablea_cola_colb_idx||tablea_colb_idx": 1000, "tablea_cola_colb_idx||tablea_colc_idx": 1000, "tablea_cola_colc_idx||tablea_colb_idx": 1000, } # Assume high cost if index not in mocked cost table if index_combination_str not in index_combination_cost: return sys.maxsize return index_combination_cost[index_combination_str] # In this scenario, only single column indexes make sense. # Size and benefit are antiproportional. def test_calculate_best_indexes_scenario_2(self): self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_2 ) # There is only one index fitting the budget self.algo.budget = 1 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1])] self.assertEqual(indexes, expected_indexes) # Theoretically, two indexes fit, but one has a better benefit/cost ratio self.algo.budget = 3 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1])] self.assertEqual(indexes, expected_indexes) # The two indexes with the best ratio should be chosen self.algo.budget = 5 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [Index([self.column_1]), Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # All single column indexes are chosen self.algo.budget = 9 indexes = self.algo._calculate_best_indexes(self.workload) expected_indexes = [ Index([self.column_1]), Index([self.column_2]), Index([self.column_3]), ] self.assertEqual(indexes, expected_indexes) def _assign_size_3(self, index): index_sizes = { "tablea_cola_idx": 2, "tablea_colb_idx": 1.9, "tablea_cola_colb_idx": 4, "tablea_colb_cola_idx": 3, } index.estimated_size = index_sizes[index.index_idx()] def _calculate_cost_mock_3(self, workload, indexes, store_size): for index in indexes: self._assign_size_3(index) index_combination_str = index_combination_to_str(indexes) index_combination_cost = { "": 100, "tablea_cola_idx": 80, "tablea_colb_idx": 80, "tablea_cola_idx||tablea_colb_idx": 70, # Below here multi, they do not result in benefit "tablea_cola_colb_idx": 60, "tablea_colb_cola_idx": 60, "tablea_colb_cola_idx||tablea_colb_idx": 60, } return index_combination_cost[index_combination_str] # In this scenario, multi column indexes dominate single column indexes. def test_calculate_best_indexes_scenario_3(self): query_1 = Query( 0, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;", [self.column_1, self.column_2], ) workload = Workload([query_1], self.database_name) self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock_3 ) # Budget too small for multi self.algo.budget = 2 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2])] self.assertEqual(indexes, expected_indexes) # Picks multi with best ratio self.algo.budget = 4 indexes = self.algo._calculate_best_indexes(workload) expected_indexes = [Index([self.column_2, self.column_1])] self.assertEqual(indexes, expected_indexes)
from selection.cost_evaluation import CostEvaluation from selection.index import Index from selection.workload import Column, Query, Table import itertools table_A = Table("TableA") column_A_0 = Column("Col0") column_A_1 = Column("Col1") column_A_2 = Column("Col2") table_A.add_columns([column_A_0, column_A_1, column_A_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [column_A_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", [column_A_0, column_A_1, column_A_2], ) mock_cache = {} table_A_potential_indexes = [] # Calculate potential indexes for TableA for number_of_columns in range(1, len(table_A.columns) + 1): for column_list in itertools.permutations(table_A.columns, number_of_columns): table_A_potential_indexes.append(Index(column_list)) # Calculate relevant indexes for query_0 based on potential indexes relevant_indexes_query1_table_A = CostEvaluation._relevant_indexes( query_0, table_A_potential_indexes )