def test_attach_to_indexes(self):
        index_combination = [self.index_1, self.index_2]
        candidate = self.index_3
        self.algo.initial_cost = 10
        best = {"combination": [], "benefit_to_size_ratio": 0}
        self.algo._evaluate_combination = MagicMock()
        self.algo._attach_to_indexes(
            index_combination, candidate, best, self.algo.initial_cost
        )

        first_new_combination = [
            index_combination[1],
            Index(index_combination[0].columns + candidate.columns),
        ]
        self.algo._evaluate_combination.assert_any_call(
            first_new_combination, best, self.algo.initial_cost, 5
        )

        second_new_combination = [
            index_combination[0],
            Index(index_combination[1].columns + candidate.columns),
        ]
        self.algo._evaluate_combination.assert_any_call(
            second_new_combination, best, self.algo.initial_cost, 1
        )

        multi_column_candidate = Index([self.column_2, self.column_3])
        with self.assertRaises(AssertionError):
            self.algo._attach_to_indexes(
                index_combination, multi_column_candidate, best, self.algo.initial_cost
            )
Esempio n. 2
0
    def test_calculate_indexes_1MB_2column(self, get_utilized_indexes_mock):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={
                "max_index_width": 2,
                "budget_MB": 1
            },
        )
        algorithm.cost_evaluation.cache = mock_cache

        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes)
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        get_utilized_indexes_mock.return_value = (
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
            None,
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1]))
        # The single column index is dropped first, because of the lower penalty.
        # The multi column index is prefixed second.
        self.assertEqual(set(index_selection), {Index([column_A_0])})
Esempio n. 3
0
    def setUp(self):
        self.connector = MockConnector()
        self.algo = DropHeuristicAlgorithm(database_connector=self.connector)

        self.column_0 = Column("Col0")
        self.column_1 = Column("Col1")
        self.column_2 = Column("Col2")
        self.all_columns = [self.column_0, self.column_1, self.column_2]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_0 = Index([self.column_0])
        self.index_1 = Index([self.column_1])
        self.index_2 = Index([self.column_2])

        query_0 = Query(0, "SELECT * FROM TableA WHERE Col0 = 4;",
                        [self.column_0])
        query_1 = Query(
            1,
            "SELECT * FROM TableA WHERE Col0 = 1 AND Col1 = 2 AND Col2 = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_0, query_1])
        self.algo.workload = self.workload
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock)
Esempio n. 4
0
    def test_calculate_indexes_1MB_2column(self):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={"max_index_columns": 2, "budget": 1},
        )
        algorithm.cost_evaluation.cache = mock_cache

        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes
        )
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        algorithm._exploit_virtual_indexes = lambda workload: (
            None,
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1], self.database_name)
        )
        # The single column index is dropped first, because of the lower penalty.
        # The multi column index is prefixed second.
        self.assertEqual(set(index_selection), {Index([column_A_0])})
Esempio n. 5
0
    def test_appendable_by_index_with_already_present_column(self):
        index_with_already_present_column = Index([self.column_0])

        index_0_1 = Index([self.column_0, self.column_1])

        self.assertFalse(
            index_0_1.appendable_by(index_with_already_present_column))
Esempio n. 6
0
    def test_calculate_indexes_3000MB_2column(self):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={"max_index_columns": 2, "budget": 3},
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes
        )
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        algorithm._exploit_virtual_indexes = lambda workload: (
            None,
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1], self.database_name)
        )
        self.assertEqual(
            set(index_selection),
            set([Index([column_A_0]), Index([column_A_0, column_A_1])]),
        )
    def test_potential_indexes(self):
        index_set_1 = set([Index([column_A_0])])
        index_set_2 = set(
            [Index([column_A_0]),
             Index([column_A_1]),
             Index([column_A_2])])

        self.assertEqual(
            set(
                Workload([query_0],
                         database_name="test_DB").potential_indexes()),
            index_set_1,
        )
        self.assertEqual(
            set(
                Workload([query_1],
                         database_name="test_DB").potential_indexes()),
            index_set_2,
        )
        self.assertEqual(
            set(
                Workload([query_0, query_1],
                         database_name="test_DB").potential_indexes()),
            index_set_2,
        )
    def setUp(self):
        self.connector = MockConnector()
        self.algo = EPICAlgorithm(database_connector=self.connector)

        self.column_1 = Column("ColA")
        self.column_2 = Column("ColB")
        self.column_3 = Column("ColC")
        self.all_columns = [self.column_1, self.column_2, self.column_3]

        self.table = Table("TableA")
        self.table.add_columns(self.all_columns)

        self.index_1 = Index([self.column_1])
        self.index_1.estimated_size = 5
        self.index_2 = Index([self.column_2])
        self.index_2.estimated_size = 1
        self.index_3 = Index([self.column_3])
        self.index_3.estimated_size = 3

        query_1 = Query(0, "SELECT * FROM TableA WHERE ColA = 4;", [self.column_1])
        query_2 = Query(
            1,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2 AND ColC = 3;",
            self.all_columns,
        )
        self.database_name = "test_DB"

        self.workload = Workload([query_1, query_2], self.database_name)
        self.algo.workload = self.workload
Esempio n. 9
0
    def test_calculate_indexes_3000MB_2column(self, get_utilized_indexes_mock):
        algorithm = RelaxationAlgorithm(
            database_connector=self.connector,
            parameters={
                "max_index_width": 2,
                "budget_MB": 3
            },
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            self.set_estimated_index_sizes)
        algorithm.cost_evaluation.estimate_size = self.set_estimated_index_size
        get_utilized_indexes_mock.return_value = (
            {
                Index([column_A_0], 1000 * 1000),
                Index([column_A_0, column_A_1], 2000 * 1000),
            },
            None,
        )

        index_selection = algorithm.calculate_best_indexes(
            Workload([query_0, query_1]))
        self.assertEqual(
            set(index_selection),
            set([Index([column_A_0]),
                 Index([column_A_0, column_A_1])]),
        )
Esempio n. 10
0
    def test_index(self):
        columns = [self.column_0, self.column_1]
        index = Index(columns)
        self.assertEqual(index.columns, tuple(columns))
        self.assertEqual(index.estimated_size, None)
        self.assertEqual(index.hypopg_name, None)

        with self.assertRaises(ValueError):
            Index([])
Esempio n. 11
0
    def test_appendable_by_other_table(self):
        column = Column("ColZ")
        table = Table("TableZ")
        table.add_column(column)
        index_on_other_table = Index([column])

        index_0 = Index([self.column_0])

        self.assertFalse(index_0.appendable_by(index_on_other_table))
Esempio n. 12
0
    def test_evaluate_workload(self):
        index_0 = Index([self.column_0])
        index_1 = Index([self.column_1])
        self.algo.cost_evaluation.calculate_cost = MagicMock()

        self.algo._evaluate_workload(
            [IndexBenefit(index_0, 10),
             IndexBenefit(index_1, 9)], workload=[])
        self.algo.cost_evaluation.calculate_cost.assert_called_once_with(
            [], [index_0, index_1])
Esempio n. 13
0
    def setUpClass(cls):
        cls.column_a_0 = Column("Col0")
        cls.column_a_1 = Column("Col1")
        cls.table_a = Table("TableA")
        cls.table_a.add_columns([cls.column_a_0, cls.column_a_1])

        cls.column_b_0 = Column("Col0")
        cls.table_b = Table("TableB")
        cls.table_b.add_columns([cls.column_b_0])

        cls.index_0 = Index([cls.column_a_0])
        cls.index_1 = Index([cls.column_b_0])
        cls.index_2 = Index([cls.column_a_1])
Esempio n. 14
0
    def test_index_eq(self):
        index_0 = Index([self.column_0])
        index_1 = Index([self.column_1])
        index_2 = Index([self.column_0])

        self.assertFalse(index_0 == index_1)
        self.assertTrue(index_0 == index_2)

        index_0_1 = Index([self.column_0, self.column_1])
        self.assertTrue(index_0_1 == Index([self.column_0, self.column_1]))

        # Check comparing object of different class
        self.assertFalse(index_0_1 == int(3))
Esempio n. 15
0
    def test_exploit_virtual_indexes(self):
        def _simulate_index_mock(index, store_size):
            index.hypopg_name = f"<1337>btree_{index.columns}"

        # For some reason, the database decides to only use an index for one of
        # the filters
        def _simulate_get_plan(query):
            if "Table0" in query.text:
                return {
                    "Total Cost": 17,
                    "Plans": [{
                        "Index Name": "<1337>btree_(C table0.col1,)"
                    }],
                }

            return {
                "Total Cost": 5,
                "Plans": [{
                    "Simple Table Retrieve": "table1"
                }]
            }

        query_0 = Query(
            0,
            "SELECT * FROM Table0 WHERE Col0 = 1 AND Col1 = 2;",
            [self.column_0, self.column_1],
        )
        query_1 = Query(1, "SELECT * FROM Table1;", [])
        workload = Workload([query_0, query_1], "database_name")

        self.algo.database_connector.get_plan = MagicMock(
            side_effect=_simulate_get_plan)
        self.algo.what_if.simulate_index = MagicMock(
            side_effect=_simulate_index_mock)
        self.algo.what_if.drop_all_simulated_indexes = MagicMock()
        query_results, index_candidates = self.algo._exploit_virtual_indexes(
            workload)
        self.assertEqual(len(query_results), len(workload.queries))
        expected_first_result = {
            "cost_without_indexes": 17,
            "cost_with_recommended_indexes": 17,
            "recommended_indexes": set([Index([self.column_1])]),
        }
        expected_second_result = {
            "cost_without_indexes": 5,
            "cost_with_recommended_indexes": 5,
            "recommended_indexes": set(),
        }
        self.assertEqual(query_results[query_0], expected_first_result)
        self.assertEqual(query_results[query_1], expected_second_result)
        self.assertEqual(index_candidates, set([Index([self.column_1])]))
Esempio n. 16
0
    def test_index_lt(self):
        index_0 = Index([self.column_0])
        index_1 = Index([self.column_1])

        self.assertTrue(index_0 < index_1)
        self.assertFalse(index_1 < index_0)

        index_0_1_2 = Index([self.column_0, self.column_1, self.column_2])
        self.assertTrue(index_0 < index_0_1_2)
        self.assertFalse(index_0_1_2 < index_0)

        index_0_1 = Index([self.column_0, self.column_1])
        index_0_2 = Index([self.column_0, self.column_2])
        self.assertTrue(index_0_1 < index_0_2)
        self.assertFalse(index_0_2 < index_0_1)
    def test_cache_hit_different_index_same_columns(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]])

        self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])]))
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([Index([self.columns[0]])]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 1)
        self.assertEqual(self.connector.get_cost.call_count, 1)
    def setUpClass(cls):
        cls.db_name = "TestDB"

        cls.table = Table("TestTableA")
        cls.columns = [
            Column("Col0"),
            Column("Col1"),
            Column("Col2"),
            Column("Col3"),
            Column("Col4"),
        ]
        cls.table.add_columns(cls.columns)

        cls.index_0 = Index([cls.columns[0]])
        cls.index_1 = Index([cls.columns[1]])
        cls.index_2 = Index([cls.columns[2]])
    def test_calculate_best_indexes_scenario_2(self):
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock_2
        )

        # There is only one index fitting the budget
        self.algo.budget = 1
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [Index([self.column_1])]
        self.assertEqual(indexes, expected_indexes)

        # Theoretically, two indexes fit, but one has a better benefit/cost ratio
        self.algo.budget = 3
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [Index([self.column_1])]
        self.assertEqual(indexes, expected_indexes)

        # The two indexes with the best ratio should be chosen
        self.algo.budget = 5
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [Index([self.column_1]), Index([self.column_2])]
        self.assertEqual(indexes, expected_indexes)

        # All single column indexes are chosen
        self.algo.budget = 9
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [
            Index([self.column_1]),
            Index([self.column_2]),
            Index([self.column_3]),
        ]
        self.assertEqual(indexes, expected_indexes)
    def test_relevant_indexes(self):
        index_0 = Index([self.columns[0]])
        index_1 = Index([self.columns[1]])

        result = self.cost_evaluation._relevant_indexes(self.queries[0], indexes=set())
        self.assertEqual(result, frozenset())

        result = self.cost_evaluation._relevant_indexes(self.queries[0], set([index_0]))
        self.assertEqual(result, frozenset([index_0]))

        result = self.cost_evaluation._relevant_indexes(
            self.queries[0], set([index_1, index_0])
        )
        self.assertEqual(result, frozenset([index_0]))

        result = self.cost_evaluation._relevant_indexes(
            self.queries[2], set([index_1, index_0])
        )
        self.assertEqual(result, frozenset([index_1, index_0]))
Esempio n. 21
0
    def test_calculate_best_indexes_scenario_3(self):
        query_1 = Query(
            0,
            "SELECT * FROM TableA WHERE ColA = 1 AND ColB = 2;",
            [self.column_1, self.column_2],
        )
        workload = Workload([query_1])
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock_3)

        # Budget too small for multi
        self.algo.budget = 2
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2])]
        self.assertEqual(indexes, expected_indexes)

        # Picks multi with best ratio
        self.algo.budget = 4
        indexes = self.algo._calculate_best_indexes(workload)
        expected_indexes = [Index([self.column_2, self.column_1])]
        self.assertEqual(indexes, expected_indexes)
    def test_calculate_indexes_2indexes_2columns(self):
        algorithm = AutoAdminAlgorithm(
            database_connector=self.connector,
            parameters={"max_indexes": 2, "max_index_width": 2},
        )
        algorithm.cost_evaluation.cache = mock_cache
        algorithm.cost_evaluation._prepare_cost_calculation = (
            lambda indexes, store_size=False: None
        )

        index_selection = algorithm.calculate_best_indexes(Workload([query_0, query_1]))
        self.assertEqual(set(index_selection), set([Index([column_A_0, column_A_1])]))
Esempio n. 23
0
    def test_calculate_index_benefits(self):
        index_0 = Index([self.column_0])
        index_0.estimated_size = 5
        index_1 = Index([self.column_1])
        index_1.estimated_size = 1
        index_2 = Index([self.column_2])
        index_2.estimated_size = 3

        query_result_0 = {
            "cost_without_indexes": 100,
            "cost_with_recommended_indexes": 50,
            "recommended_indexes": [index_0, index_1],
        }
        # Yes, negative benefit is possible
        query_result_1 = {
            "cost_without_indexes": 50,
            "cost_with_recommended_indexes": 60,
            "recommended_indexes": [index_1],
        }
        query_result_2 = {
            "cost_without_indexes": 60,
            "cost_with_recommended_indexes": 57,
            "recommended_indexes": [index_2],
        }
        query_result_3 = {
            "cost_without_indexes": 60,
            "cost_with_recommended_indexes": 60,
            "recommended_indexes": [],
        }
        query_results = {
            "q0": query_result_0,
            "q1": query_result_1,
            "q2": query_result_2,
            "q3": query_result_3,
        }

        index_benefits = self.algo._calculate_index_benefits(
            [index_0, index_1, index_2], query_results)
        expected_index_benefits = [
            IndexBenefit(index_1, 40),
            IndexBenefit(index_0, 50),
            IndexBenefit(index_2, 3),
        ]

        self.assertEqual(index_benefits, expected_index_benefits)
    def test_calculate_best_indexes_scenario_1(self):
        self.algo.cost_evaluation.calculate_cost = MagicMock(
            side_effect=self._calculate_cost_mock_1
        )

        # Each one alone of the single column indexes would fit,
        # but the one with the best benefit/cost ratio is chosen
        self.algo.budget = 1
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [Index([self.column_3])]
        self.assertEqual(indexes, expected_indexes)

        # Two single column indexes would fit, but the two best ones are chosen
        self.algo.budget = 2
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [Index([self.column_3]), Index([self.column_2])]
        self.assertEqual(indexes, expected_indexes)

        # All single column indexes are chosen
        self.algo.budget = 3
        indexes = self.algo._calculate_best_indexes(self.workload)
        expected_indexes = [
            Index([self.column_3]),
            Index([self.column_2]),
            Index([self.column_1]),
        ]
        self.assertEqual(indexes, expected_indexes)
Esempio n. 25
0
    def test_prefixes(self):
        index = Index([self.column_0, self.column_1, self.column_2])
        result = index.prefixes()
        expected = [
            Index([self.column_0, self.column_1]),
            Index([self.column_0])
        ]
        self.assertEqual(result, expected)

        # A single-column index has no prefixes.
        index = Index([self.column_0])
        result = index.prefixes()
        expected = []
        self.assertEqual(result, expected)
Esempio n. 26
0
    def test_no_cache_hit_unseen(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]], self.db_name)
        index_0 = Index([self.columns[0]])

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([index_0]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 2)
        self.connector.simulate_index.assert_called_with(index_0)
    def test_cache_hit_non_relevant_index(self):
        self.assertEqual(self.cost_evaluation.cost_requests, 0)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)

        workload = Workload([self.queries[0]])
        index_1 = Index([self.columns[1]])

        self.cost_evaluation.calculate_cost(workload, indexes=set())
        self.assertEqual(self.cost_evaluation.cost_requests, 1)
        self.assertEqual(self.cost_evaluation.cache_hits, 0)
        self.assertEqual(self.connector.get_cost.call_count, 1)

        self.cost_evaluation.calculate_cost(workload, set([index_1]))
        self.assertEqual(self.cost_evaluation.cost_requests, 2)
        self.assertEqual(self.cost_evaluation.cache_hits, 1)
        self.assertEqual(self.connector.get_cost.call_count, 1)
        self.connector.simulate_index.assert_called_with(index_1)
    def test_runtime_data_logging(self):
        db = PostgresDatabaseConnector(self.db_name, "postgres")

        query = Query(17, "SELECT count(*) FROM nation;")
        db.get_cost(query)
        self.assertEqual(db.cost_estimations, 1)
        self.assertGreater(db.cost_estimation_duration, 0)

        column_n_name = Column("n_name")
        nation_table = Table("nation")
        nation_table.add_column(column_n_name)
        index = Index([column_n_name])
        index_oid = db.simulate_index(index)[0]
        self.assertGreater(db.index_simulation_duration, 0)
        self.assertEqual(db.simulated_indexes, 1)

        previou_simulation_duration = db.index_simulation_duration
        db.drop_simulated_index(index_oid)
        self.assertGreater(db.index_simulation_duration, previou_simulation_duration)
    def test_index_benefit__lt__(self):
        index_0 = Index([self.column_0])
        index_0.estimated_size = 1
        index_1 = Index([self.column_1])
        index_1.estimated_size = 2

        # Due to its size, index_0 has the better ratio
        index_benefit_0 = IndexBenefit(index_0, 10)
        index_benefit_1 = IndexBenefit(index_1, 10)
        self.assertTrue(index_benefit_1 < index_benefit_0)

        # The ratios are equal, the columns are taken into consideration
        index_benefit_1 = IndexBenefit(index_1, 20)
        self.assertTrue(index_benefit_0 < index_benefit_1)
    def test_which_indexes_utilized_and_cost(self):
        def _simulate_index_mock(index, store_size):
            index.hypopg_name = f"<1337>btree_{index.columns}"

        # For some reason, the database decides to only use an index for one of
        # the filters
        def _simulate_get_plan(query):
            plan = {
                "Total Cost": 17,
                "Plans": [
                    {
                        "Index Name": "<1337>btree_(C testtablea.col1,)",
                        "Filter": "(Col0 = 14)",
                    }
                ],
            }

            return plan

        query = self.queries[2]

        self.cost_evaluation.db_connector.get_plan = MagicMock(
            side_effect=_simulate_get_plan
        )
        self.cost_evaluation.what_if.simulate_index = MagicMock(
            side_effect=_simulate_index_mock
        )

        candidates = syntactically_relevant_indexes(query, max_index_width=2)
        indexes, cost = self.cost_evaluation.which_indexes_utilized_and_cost(
            query, candidates
        )
        self.assertEqual(cost, 17)
        self.assertEqual(indexes, {Index([self.columns[1]])})

        self.assertEqual(
            self.cost_evaluation.what_if.simulate_index.call_count, len(candidates)
        )
        self.cost_evaluation.db_connector.get_plan.assert_called_once_with(query)
        self.assertCountEqual(self.cost_evaluation.current_indexes, candidates)