def test_workload_indexable_columns(self):
        table = Table("TableA")
        column_1 = Column(name="ColA")
        column_2 = Column(name="ColB")
        column_3 = Column(name="ColC")
        table.add_column(column_1)
        table.add_column(column_2)
        table.add_column(column_3)

        query_1 = Query(
            17,
            "SELECT * FROM TableA WHERE ColA = 4 AND ColB = 5;",
            columns=[column_1, column_2],
        )
        query_2 = Query(
            18,
            "SELECT * FROM TableA WHERE ColA = 3 AND ColC = 2;",
            columns=[column_1, column_3],
        )
        database_name = "test_DB"

        workload = Workload([query_1, query_2], database_name)
        indexable_columns = workload.indexable_columns()
        self.assertEqual(sorted(indexable_columns),
                         sorted([column_1, column_2, column_3]))
Exemple #2
0
    def setUpClass(cls):
        cls.db_name = "TestDB"

        cls.table = Table("TestTableA")
        cls.columns = [
            Column("Col0"),
            Column("Col1"),
            Column("Col2"),
            Column("Col3"),
            Column("Col4"),
        ]
        cls.table.add_columns(cls.columns)

        cls.queries = [
            Query(0, "SELECT * FROM TestTableA WHERE Col0 = 4",
                  [cls.columns[0]]),
            Query(1, "SELECT * FROM TestTableA WHERE Col1 = 3",
                  [cls.columns[1]]),
            Query(
                2,
                "SELECT * FROM TestTableA WHERE Col0 = 14 AND Col1 = 13",
                [cls.columns[0], cls.columns[1]],
            ),
        ]

        cls.workload = Workload(cls.queries, cls.db_name)
Exemple #3
0
    def setUp(self):
        self.connector = MockConnector()
        self.algo = DropHeuristicAlgorithm(database_connector=self.connector)

        self.column_0 = Column("Col0")
        self.column_1 = Column("Col1")
        self.column_2 = Column("Col2")
        self.all_columns = [self.column_0, self.column_1, self.column_2]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_0 = Index([self.column_0])
        self.index_1 = Index([self.column_1])
        self.index_2 = Index([self.column_2])

        query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;",
                        [self.column_0])
        query_1 = Query(
            1,
            "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_0, query_1])
        self.algo.workload = self.workload
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock)
    def setUp(self):
        self.connector = MockConnector()
        self.algo = EPICAlgorithm(database_connector=self.connector)

        self.column_1 = Column("ColA")
        self.column_2 = Column("ColB")
        self.column_3 = Column("ColC")
        self.all_columns = [self.column_1, self.column_2, self.column_3]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_1 = Index([self.column_1])
        self.index_1.estimated_size = 5
        self.index_2 = Index([self.column_2])
        self.index_2.estimated_size = 1
        self.index_3 = Index([self.column_3])
        self.index_3.estimated_size = 3

        query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1])
        query_2 = Query(
            1,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_1, query_2], self.database_name)
        self.algo.workload = self.workload
Exemple #5
0
    def test_get_utilized_indexes(self):
        class CostEvaluationMock:
            def which_indexes_utilized_and_cost(_, query, indexes):
                if query.nr == 0:
                    return [{self.index_0}, 17]
                if query.nr == 1:
                    return [{self.index_0, self.index_2}, 14]

            def calculate_cost(_, workload, indexes):
                assert len(workload.queries) == 1, (
                    "get_utilized_indexes' calculate_cost_mock should not be "
                    "called with workloads that contain more than one query"
                )
                assert indexes == [], (
                    "get_utilized_indexes' calculate_cost_mock should not be "
                    "called with indexes"
                )

                query = workload.queries[0]

                if query.nr == 0:
                    return 170
                if query.nr == 1:
                    return 140

        query_0 = Query(0, "SELECT * FROM tablea WHERE col0 = 4;", [self.column_a_0])
        query_1 = Query(
            1,
            (
                "SELECT * FROM tablea as a, tableb as b WHERE a.col0 = 4 AND "
                "a.col1 = 17AND b.col0 = 3;"
            ),
            [self.column_a_0, self.column_a_1, self.column_b_0],
        )
        workload = Workload([query_0, query_1])
        candidates = candidates_per_query(workload, 2, syntactically_relevant_indexes)

        utilized_indexes, query_details = get_utilized_indexes(
            workload, candidates, CostEvaluationMock()
        )
        self.assertEqual(query_details, {})
        self.assertEqual(utilized_indexes, {self.index_0, self.index_2})

        expected_first_result = {
            "cost_without_indexes": 170,
            "cost_with_indexes": 17,
            "utilized_indexes": {self.index_0},
        }
        expected_second_result = {
            "cost_without_indexes": 140,
            "cost_with_indexes": 14,
            "utilized_indexes": {self.index_0, self.index_2},
        }
        utilized_indexes, query_details = get_utilized_indexes(
            workload, candidates, CostEvaluationMock(), detailed_query_information=True
        )
        self.assertEqual(query_details[query_0], expected_first_result)
        self.assertEqual(query_details[query_1], expected_second_result)
        self.assertEqual(utilized_indexes, {self.index_0, self.index_2})
    def test_workload(self):
        query_1 = Query(17, "SELECT * FROM TableA;")
        query_2 = Query(18, "SELECT * FROM nation;")
        database_name = "test_DB"

        workload = Workload([query_1, query_2], database_name)
        self.assertEqual(workload.queries, [query_1, query_2])
        self.assertEqual(workload.database_name, database_name)
Exemple #7
0
    def test_exploit_virtual_indexes(self):
        def _simulate_index_mock(index, store_size):
            index.hypopg_name = f"<1337>btree_{index.columns}"

        # For some reason, the database decides to only use an index for one of
        # the filters
        def _simulate_get_plan(query):
            if "Table0" in query.text:
                return {
                    "Total Cost": 17,
                    "Plans": [{
                        "Index Name": "<1337>btree_(C table0.col1,)"
                    }],
                }

            return {
                "Total Cost": 5,
                "Plans": [{
                    "Simple Table Retrieve": "table1"
                }]
            }

        query_0 = Query(
            0,
            "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;",
            [self.column_0, self.column_1],
        )
        query_1 = Query(1, "SELECT * FROM Table1;", [])
        workload = Workload([query_0, query_1], "database_name")

        self.algo.database_connector.get_plan = MagicMock(
            side_effect=_simulate_get_plan)
        self.algo.what_if.simulate_index = MagicMock(
            side_effect=_simulate_index_mock)
        self.algo.what_if.drop_all_simulated_indexes = MagicMock()
        query_results, index_candidates = self.algo._exploit_virtual_indexes(
            workload)
        self.assertEqual(len(query_results), len(workload.queries))
        expected_first_result = {
            "cost_without_indexes": 17,
            "cost_with_recommended_indexes": 17,
            "recommended_indexes": set([Index([self.column_1])]),
        }
        expected_second_result = {
            "cost_without_indexes": 5,
            "cost_with_recommended_indexes": 5,
            "recommended_indexes": set(),
        }
        self.assertEqual(query_results[query_0], expected_first_result)
        self.assertEqual(query_results[query_1], expected_second_result)
        self.assertEqual(index_candidates, set([Index([self.column_1])]))
Exemple #8
0
    def test_query(self):
        query = Query(17, "SELECT * FROM lineitem;")
        self.assertEqual(query.nr, 17)
        self.assertEqual(query.text, "SELECT * FROM lineitem;")
        self.assertEqual(query.columns, [])

        column_1 = Column(name="ColA")
        column_2 = Column(name="ColB")
        query_2 = Query(18,
                        "SELECT * FROM nation;",
                        columns=[column_1, column_2])
        self.assertEqual(query_2.nr, 18)
        self.assertEqual(query_2.text, "SELECT * FROM nation;")
        self.assertEqual(query_2.columns, [column_1, column_2])
Exemple #9
0
 def test_possible_indexes(self):
     column_0_table_1 = Column("Col0")
     table_1 = Table("Table1")
     table_1.add_column(column_0_table_1)
     query = Query(
         17,
         """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1"
             AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""",
         [self.column_0, self.column_1, self.column_2, column_0_table_1],
     )
     indexes = self.algo._possible_indexes(query)
     self.assertIn(Index([column_0_table_1]), indexes)
     self.assertIn(Index([self.column_0]), indexes)
     self.assertIn(Index([self.column_1]), indexes)
     self.assertIn(Index([self.column_2]), indexes)
     self.assertIn(Index([self.column_0, self.column_1]), indexes)
     self.assertIn(Index([self.column_0, self.column_2]), indexes)
     self.assertIn(Index([self.column_1, self.column_0]), indexes)
     self.assertIn(Index([self.column_1, self.column_2]), indexes)
     self.assertIn(Index([self.column_2, self.column_0]), indexes)
     self.assertIn(Index([self.column_2, self.column_1]), indexes)
     self.assertIn(Index([self.column_0, self.column_1, self.column_2]),
                   indexes)
     self.assertIn(Index([self.column_0, self.column_2, self.column_1]),
                   indexes)
     self.assertIn(Index([self.column_1, self.column_0, self.column_2]),
                   indexes)
     self.assertIn(Index([self.column_1, self.column_2, self.column_0]),
                   indexes)
     self.assertIn(Index([self.column_2, self.column_0, self.column_1]),
                   indexes)
     self.assertIn(Index([self.column_2, self.column_1, self.column_0]),
                   indexes)
Exemple #10
0
    def setUp(self):
        self.connector = MockConnector()
        self.algo = IBMAlgorithm(database_connector=self.connector)

        self.column_0 = Column("Col0")
        self.column_1 = Column("Col1")
        self.column_2 = Column("Col2")
        self.column_3 = Column("Col3")
        self.column_4 = Column("Col4")
        self.column_5 = Column("Col5")
        self.column_6 = Column("Col6")
        self.column_7 = Column("Col7")
        self.all_columns = [
            self.column_0,
            self.column_1,
            self.column_2,
            self.column_3,
            self.column_4,
            self.column_5,
            self.column_6,
            self.column_7,
        ]

        self.table = Table("Table0")
        self.table.add_columns(self.all_columns)

        self.query_0 = Query(
            0,
            "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;",
            self.all_columns,
        )
    def setUpClass(cls):
        cls.column_0 = Column("Col0")
        cls.column_1 = Column("Col1")
        cls.column_2 = Column("Col2")

        cls.table = Table("Table0")
        cls.table.add_columns([cls.column_0, cls.column_1, cls.column_2])

        cls.column_0_table_1 = Column("Col0")
        cls.table_1 = Table("Table1")
        cls.table_1.add_column(cls.column_0_table_1)
        cls.query_0 = Query(
            17,
            """SELECT * FROM Table0 as t0, Table1 as t1 WHERE t0.Col0 = 1"
                AND t0.Col1 = 2 AND t0.Col2 = 3 AND t1.Col0 = 17;""",
            [cls.column_0, cls.column_1, cls.column_2, cls.column_0_table_1],
        )
    def test_runtime_data_logging(self):
        db = PostgresDatabaseConnector(self.db_name, "postgres")

        query = Query(17, "SELECT count(*) FROM nation;")
        db.get_cost(query)
        self.assertEqual(db.cost_estimations, 1)
        self.assertGreater(db.cost_estimation_duration, 0)

        column_n_name = Column("n_name")
        nation_table = Table("nation")
        nation_table.add_column(column_n_name)
        index = Index([column_n_name])
        index_oid = db.simulate_index(index)[0]
        self.assertGreater(db.index_simulation_duration, 0)
        self.assertEqual(db.simulated_indexes, 1)

        previou_simulation_duration = db.index_simulation_duration
        db.drop_simulated_index(index_oid)
        self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
    def test_candidates_per_query(self):
        MAX_INDEX_WIDTH = 2
        query_1 = Query(18, """SELECT * FROM 1;""")
        workload = Workload([self.query_0, query_1])

        syntactically_relevant_indexes_mock = MagicMock(
            return_value=syntactically_relevant_indexes)

        result = candidates_per_query(
            workload,
            max_index_width=MAX_INDEX_WIDTH,
            candidate_generator=syntactically_relevant_indexes_mock,
        )

        self.assertEqual(len(result), len(workload.queries))
        syntactically_relevant_indexes_mock.assert_called_with(
            query_1, MAX_INDEX_WIDTH)
        syntactically_relevant_indexes_mock.assert_any_call(
            self.query_0, MAX_INDEX_WIDTH)
Exemple #14
0
    def test_calculate_best_indexes_scenario_3(self):
        query_1 = Query(
            0,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;",
            [self.column_1, self.column_2],
        )
        workload = Workload([query_1])
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock_3)

        # Budget too small for multi
        self.algo.budget = 2
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2])]
        self.assertEqual(indexes, expected_indexes)

        # Picks multi with best ratio
        self.algo.budget = 4
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2, self.column_1])]
        self.assertEqual(indexes, expected_indexes)
Exemple #15
0
    def test_recommended_indexes(self):
        def _simulate_index_mock(index, store_size):
            index.hypopg_name = f"<1337>btree_{index.columns}"

        # For some reason, the database decides to only use an index for one of
        # the filters
        def _simulate_get_plan(query):
            plan = {
                "Total Cost":
                17,
                "Plans": [{
                    "Index Name": "<1337>btree_(C table0.col1,)",
                    "Filter": "(Col0 = 1)",
                }],
            }

            return plan

        query = Query(
            17,
            "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;",
            [self.column_0, self.column_1],
        )

        self.algo.database_connector.get_plan = MagicMock(
            side_effect=_simulate_get_plan)
        self.algo.what_if.simulate_index = MagicMock(
            side_effect=_simulate_index_mock)
        self.algo.what_if.drop_all_simulated_indexes = MagicMock()

        indexes, cost = self.algo._recommended_indexes(query)
        self.assertEqual(cost, 17)
        self.assertEqual(indexes, {Index([self.column_1])})

        self.assertEqual(self.algo.what_if.simulate_index.call_count, 4)
        self.algo.what_if.drop_all_simulated_indexes.assert_called_once()
        self.algo.database_connector.get_plan.assert_called_once_with(query)
from selection.cost_evaluation import CostEvaluation
from selection.index import Index
from selection.workload import Column, Query, Table
import itertools

table_A = Table("TableA")
column_A_0 = Column("Col0")
column_A_1 = Column("Col1")
column_A_2 = Column("Col2")
table_A.add_columns([column_A_0, column_A_1, column_A_2])

query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;", [column_A_0])
query_1 = Query(
    1,
    "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;",
    [column_A_0, column_A_1, column_A_2],
)

mock_cache = {}

table_A_potential_indexes = []
# Calculate potential indexes for TableA
for number_of_columns in range(1, len(table_A.columns) + 1):
    for column_list in itertools.permutations(table_A.columns, number_of_columns):
        table_A_potential_indexes.append(Index(column_list))

# Calculate relevant indexes for query_0 based on potential indexes
relevant_indexes_query1_table_A = CostEvaluation._relevant_indexes(
    query_0, table_A_potential_indexes
)
Exemple #17
0
    def test_workload(self):
        query_1 = Query(17, "SELECT * FROM TableA;")
        query_2 = Query(18, "SELECT * FROM nation;")

        workload = Workload([query_1, query_2])
        self.assertEqual(workload.queries, [query_1, query_2])
Exemple #18
0
 def test_query_repr(self):
     query = Query(17, "SELECT * FROM lineitem;")
     self.assertEqual(repr(query), "Q17")