def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.queries = [ Query(0, "SELECT * FROM TestTableA WHERE Col0 = 4", [cls.columns[0]]), Query(1, "SELECT * FROM TestTableA WHERE Col1 = 3", [cls.columns[1]]), Query( 2, "SELECT * FROM TestTableA WHERE Col0 = 14 AND Col1 = 13", [cls.columns[0], cls.columns[1]], ), ] cls.workload = Workload(cls.queries, cls.db_name)
def test_column_eq(self): table_1 = Table("TableA") table_2 = Table("TableA") column_1 = Column(name="ColA") column_2 = Column(name="ColA") column_3 = Column(name="ColB") # Column name equal but table (for both) is None with self.assertRaises(AssertionError): column_1 == column_2 # Column name different but table (for both) is None with self.assertRaises(AssertionError): column_1 == column_3 table_1.add_column(column_1) # Column name equal but table of column_2 is None with self.assertRaises(AssertionError): column_1 == column_2 # Column name equal but table of column_2 is None with self.assertRaises(AssertionError): column_2 == column_1 table_2.add_column(column_2) self.assertTrue(column_1 == column_2) table_2.add_column(column_3) self.assertFalse(column_1 == column_3) # Check comparing object of different class self.assertFalse(column_1 == int(3))
def test_workload_indexable_columns(self): table = Table("TableA") column_1 = Column(name="ColA") column_2 = Column(name="ColB") column_3 = Column(name="ColC") table.add_column(column_1) table.add_column(column_2) table.add_column(column_3) query_1 = Query( 17, "SELECT * FROM TableA WHERE ColA = 4 AND ColB = 5;", columns=[column_1, column_2], ) query_2 = Query( 18, "SELECT * FROM TableA WHERE ColA = 3 AND ColC = 2;", columns=[column_1, column_3], ) database_name = "test_DB" workload = Workload([query_1, query_2], database_name) indexable_columns = workload.indexable_columns() self.assertEqual(sorted(indexable_columns), sorted([column_1, column_2, column_3]))
def setUp(self): self.connector = MockConnector() self.algo = DropHeuristicAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.all_columns = [self.column_0, self.column_1, self.column_2] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_0 = Index([self.column_0]) self.index_1 = Index([self.column_1]) self.index_2 = Index([self.column_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [self.column_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_0, query_1]) self.algo.workload = self.workload self.algo.cost_evaluation.calculate_cost = MagicMock( side_effect=self._calculate_cost_mock)
def test_column_lt(self): column_1 = Column(name="ColA") column_2 = Column(name="ColA") column_3 = Column(name="ColB") self.assertFalse(column_1 < column_2) self.assertTrue(column_1 < column_3)
def setUp(self): self.connector = MockConnector() self.algo = EPICAlgorithm(database_connector=self.connector) self.column_1 = Column("ColA") self.column_2 = Column("ColB") self.column_3 = Column("ColC") self.all_columns = [self.column_1, self.column_2, self.column_3] self.table = Table("TableA") self.table.add_columns(self.all_columns) self.index_1 = Index([self.column_1]) self.index_1.estimated_size = 5 self.index_2 = Index([self.column_2]) self.index_2.estimated_size = 1 self.index_3 = Index([self.column_3]) self.index_3.estimated_size = 3 query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1]) query_2 = Query( 1, "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;", self.all_columns, ) self.database_name = "test_DB" self.workload = Workload([query_1, query_2], self.database_name) self.algo.workload = self.workload
def setUpClass(cls): cls.column_0 = Column("Col0") cls.column_1 = Column("Col1") cls.column_2 = Column("Col2") columns = [cls.column_0, cls.column_1, cls.column_2] cls.table = Table("TableA") cls.table.add_columns(columns)
def setUpClass(cls): cls.column_a_0 = Column("Col0") cls.column_a_1 = Column("Col1") cls.table_a = Table("TableA") cls.table_a.add_columns([cls.column_a_0, cls.column_a_1]) cls.column_b_0 = Column("Col0") cls.table_b = Table("TableB") cls.table_b.add_columns([cls.column_b_0]) cls.index_0 = Index([cls.column_a_0]) cls.index_1 = Index([cls.column_b_0]) cls.index_2 = Index([cls.column_a_1])
def setUp(self): self.connector = MockConnector() self.algo = IBMAlgorithm(database_connector=self.connector) self.column_0 = Column("Col0") self.column_1 = Column("Col1") self.column_2 = Column("Col2") self.column_3 = Column("Col3") self.column_4 = Column("Col4") self.column_5 = Column("Col5") self.column_6 = Column("Col6") self.column_7 = Column("Col7") self.all_columns = [ self.column_0, self.column_1, self.column_2, self.column_3, self.column_4, self.column_5, self.column_6, self.column_7, ] self.table = Table("Table0") self.table.add_columns(self.all_columns) self.query_0 = Query( 0, "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", self.all_columns, )
def test_query(self): query = Query(17, "SELECT * FROM lineitem;") self.assertEqual(query.nr, 17) self.assertEqual(query.text, "SELECT * FROM lineitem;") self.assertEqual(query.columns, []) column_1 = Column(name="ColA") column_2 = Column(name="ColB") query_2 = Query(18, "SELECT * FROM nation;", columns=[column_1, column_2]) self.assertEqual(query_2.nr, 18) self.assertEqual(query_2.text, "SELECT * FROM nation;") self.assertEqual(query_2.columns, [column_1, column_2])
def test_table_add_column(self): table = Table("TableA") column_1 = Column("ColA") table.add_column(column_1) self.assertEqual(table.columns, [column_1]) self.assertEqual(column_1.table, table) column_2 = Column("ColB") column_3 = Column("ColC") table.add_columns([column_2, column_3]) self.assertEqual(table.columns, [column_1, column_2, column_3]) self.assertEqual(column_2.table, table) self.assertEqual(column_3.table, table)
def setUpClass(cls): cls.db_name = "TestDB" cls.table = Table("TestTableA") cls.columns = [ Column("Col0"), Column("Col1"), Column("Col2"), Column("Col3"), Column("Col4"), ] cls.table.add_columns(cls.columns) cls.index_0 = Index([cls.columns[0]]) cls.index_1 = Index([cls.columns[1]]) cls.index_2 = Index([cls.columns[2]])
def test_possible_indexes(self): column_0_table_1 = Column("Col0") table_1 = Table("Table1") table_1.add_column(column_0_table_1) query = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [self.column_0, self.column_1, self.column_2, column_0_table_1], ) indexes = self.algo._possible_indexes(query) self.assertIn(Index([column_0_table_1]), indexes) self.assertIn(Index([self.column_0]), indexes) self.assertIn(Index([self.column_1]), indexes) self.assertIn(Index([self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_0]), indexes) self.assertIn(Index([self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_0, self.column_1, self.column_2]), indexes) self.assertIn(Index([self.column_0, self.column_2, self.column_1]), indexes) self.assertIn(Index([self.column_1, self.column_0, self.column_2]), indexes) self.assertIn(Index([self.column_1, self.column_2, self.column_0]), indexes) self.assertIn(Index([self.column_2, self.column_0, self.column_1]), indexes) self.assertIn(Index([self.column_2, self.column_1, self.column_0]), indexes)
def setUpClass(cls): cls.column_0 = Column("Col0") cls.column_1 = Column("Col1") cls.column_2 = Column("Col2") cls.table = Table("Table0") cls.table.add_columns([cls.column_0, cls.column_1, cls.column_2]) cls.column_0_table_1 = Column("Col0") cls.table_1 = Table("Table1") cls.table_1.add_column(cls.column_0_table_1) cls.query_0 = Query( 17, """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1" AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""", [cls.column_0, cls.column_1, cls.column_2, cls.column_0_table_1], )
def test_appendable_by_other_table(self): column = Column("ColZ") table = Table("TableZ") table.add_column(column) index_on_other_table = Index([column]) index_0 = Index([self.column_0]) self.assertFalse(index_0.appendable_by(index_on_other_table))
def test_generate_tpds(self): table_generator = TableGenerator("tpcds", 0.001, self.generating_connector) # Check that lineitem table exists in TableGenerator item_table = None for table in table_generator.tables: if table.name == "item": item_table = table break self.assertIsNotNone(item_table) # Check that i_item_sk column exists in TableGenerator and Table object i_item_sk = Column("i_item_sk") item_table.add_column(i_item_sk) self.assertIn(i_item_sk, table_generator.columns) self.assertIn(i_item_sk, table.columns) database_connect = PostgresDatabaseConnector( table_generator.database_name(), autocommit=True) tpcds_tables = [ "call_center", "catalog_page", "catalog_returns", "catalog_sales", "customer", "customer_address", "customer_demographics", "date_dim", "household_demographics", "income_band", "inventory", "item", "promotion", "reason", "ship_mode", "store", "store_returns", "store_sales", "time_dim", "warehouse", "web_page", "web_returns", "web_sales", "web_site", ] for tpcds_table in tpcds_tables: self.assertTrue(database_connect.table_exists(tpcds_table)) self.generating_connector.close() database_connect.close()
def test_table_eq_with_columns(self): table_1 = Table("TableA") table_1.add_column(Column("ColA")) table_2 = Table("TableA") self.assertFalse(table_1 == table_2) table_2.add_column(Column("ColA")) self.assertTrue(table_1 == table_2) table_1.add_column(Column("ColB")) table_1.add_column(Column("ColC")) self.assertFalse(table_1 == table_2) table_2.add_column(Column("ColB")) table_2.add_column(Column("ColC")) self.assertTrue(table_1 == table_2) # Testing same column names, but different order table_3 = Table("TableA") table_3.add_column(Column("ColC")) table_3.add_column(Column("ColB")) table_3.add_column(Column("ColA")) self.assertFalse(table_1 == table_3)
def test_runtime_data_logging(self): db = PostgresDatabaseConnector(self.db_name, "postgres") query = Query(17, "SELECT count(*) FROM nation;") db.get_cost(query) self.assertEqual(db.cost_estimations, 1) self.assertGreater(db.cost_estimation_duration, 0) column_n_name = Column("n_name") nation_table = Table("nation") nation_table.add_column(column_n_name) index = Index([column_n_name]) index_oid = db.simulate_index(index)[0] self.assertGreater(db.index_simulation_duration, 0) self.assertEqual(db.simulated_indexes, 1) previou_simulation_duration = db.index_simulation_duration db.drop_simulated_index(index_oid) self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
def test_merge(self): index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1]) self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1, self.column_2]) self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_0]) result = index_merge(index_0, index_1) expected = Index([self.column_0, self.column_1]) self.assertEqual(result, expected) # Example from Bruno's paper column_a = Column("a") column_b = Column("b") column_c = Column("c") column_d = Column("d") column_e = Column("e") column_f = Column("f") column_g = Column("g") columns = [ column_a, column_b, column_c, column_d, column_e, column_f, column_g ] table = Table("TableB") table.add_columns(columns) index_1 = Index( [column_a, column_b, column_c, column_d, column_e, column_f]) index_2 = Index([column_c, column_d, column_g, column_e]) result = index_merge(index_1, index_2) expected = Index([ column_a, column_b, column_c, column_d, column_e, column_f, column_g ]) self.assertEqual(result, expected)
def test_generate_tpch(self): table_generator = TableGenerator("tpch", 0.001, self.generating_connector) # Check that lineitem table exists in TableGenerator lineitem_table = None for table in table_generator.tables: if table.name == "lineitem": lineitem_table = table break self.assertIsNotNone(lineitem_table) # Check that l_receiptdate column exists in TableGenerator and Table object l_receiptdate = Column("l_receiptdate") lineitem_table.add_column(l_receiptdate) self.assertIn(l_receiptdate, table_generator.columns) self.assertIn(l_receiptdate, table.columns) database_connect = PostgresDatabaseConnector( table_generator.database_name(), autocommit=True) tpch_tables = [ "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", ] for tpch_table in tpch_tables: self.assertTrue(database_connect.table_exists(tpch_table)) self.generating_connector.close() database_connect.close()
def test_column(self): column = Column(name="ColA") self.assertEqual(column.name, "cola") self.assertIsNone(column.table)
def test_split(self): # If there are no common columns, index splits are undefined index_0 = Index([self.column_0]) index_1 = Index([self.column_1]) result = index_split(index_0, index_1) expected = None self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_0 = Index([self.column_0]) expected = {common_column_index, residual_column_index_0} self.assertEqual(result, expected) index_0 = Index([self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_1 = Index([self.column_2]) expected = {common_column_index, residual_column_index_1} self.assertEqual(result, expected) index_0 = Index([self.column_0, self.column_1]) index_1 = Index([self.column_1, self.column_2]) result = index_split(index_0, index_1) common_column_index = Index([self.column_1]) residual_column_index_0 = Index([self.column_0]) residual_column_index_1 = Index([self.column_2]) expected = { common_column_index, residual_column_index_0, residual_column_index_1, } self.assertEqual(result, expected) # Example from Bruno's paper column_a = Column("a") column_b = Column("b") column_c = Column("c") column_d = Column("d") column_e = Column("e") column_f = Column("f") column_g = Column("g") columns = [ column_a, column_b, column_c, column_d, column_e, column_f, column_g ] table = Table("TableB") table.add_columns(columns) index_1 = Index( [column_a, column_b, column_c, column_d, column_e, column_f]) index_2 = Index([column_c, column_a, column_e]) index_3 = Index([column_a, column_b, column_d, column_g]) result = index_split(index_1, index_2) # expected is different from the paper, because there was an error for I_R2 expected = { Index([column_a, column_c, column_e]), Index([column_b, column_d, column_f]), } self.assertEqual(result, expected) result = index_split(index_1, index_3) # expected is different from the paper, # because all columns are part of the key (there is no suffix) expected = { Index([column_a, column_b, column_d]), Index([column_c, column_e, column_f]), Index([column_g]), } self.assertEqual(result, expected)
def test_column_added_to_table(self): column = Column(name="ColA") table = Table("TableA") table.add_column(column) self.assertEqual(column.table, table)
def test_column_repr(self): column = Column(name="ColA") table = Table("TableA") table.add_column(column) self.assertEqual(repr(column), "C tablea.cola")
from selection.cost_evaluation import CostEvaluation from selection.index import Index from selection.workload import Column, Query, Table import itertools table_A = Table("TableA") column_A_0 = Column("Col0") column_A_1 = Column("Col1") column_A_2 = Column("Col2") table_A.add_columns([column_A_0, column_A_1, column_A_2]) query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [column_A_0]) query_1 = Query( 1, "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;", [column_A_0, column_A_1, column_A_2], ) mock_cache = {} table_A_potential_indexes = [] # Calculate potential indexes for TableA for number_of_columns in range(1, len(table_A.columns) + 1): for column_list in itertools.permutations(table_A.columns, number_of_columns): table_A_potential_indexes.append(Index(column_list)) # Calculate relevant indexes for query_0 based on potential indexes relevant_indexes_query1_table_A = CostEvaluation._relevant_indexes( query_0, table_A_potential_indexes )