Esempio n. 1
0
 def get_execution_time_for_indexes_configuration(indexes):
     total_time = 0
     for index in indexes:
         add_index(connector, index, table_name)
         total_time = 0
     for query in queries:
         total_time += get_estimated_execution_time(
             connector, query['query'])
     drop_indexes(connector, table_name)
     return total_time
 def reset(self):
     self.step_number = 0
     if self.episode_number >= self.max_episodes:
         self.query_batch = np.random.choice(self.query_pull,
                                             self.batch_size)
         self.episode_number = 0
     if self.episode_number == 0:
         indexes = touched_indexes(self.query_batch)
         print("New query batch, touched indexes: " + str(indexes))
     self.episode_number += 1
     drop_indexes(self.connector, self.table_name)
     self.state = list(False for _ in range(len(self.state)))
     self.action_space = Dynamic(len(self.state))
     return np.array(
         [self.state, *[x['sf_array'] for x in self.query_batch]])
 def reset(self):
     self.step_number = 0
     drop_indexes(self.connector, self.table_name)
     self.state = list(False for _ in range(len(self.state)))
     self.action_space = Dynamic(len(self.state))
     return self.state
Esempio n. 4
0
    def __test_results(self, columns_participating):
        def get_execution_time_for_indexes_configuration(indexes):
            total_time = 0
            for index in indexes:
                add_index(connector, index, table_name)
                total_time = 0
            for query in queries:
                total_time += get_estimated_execution_time(
                    connector, query['query'])
            drop_indexes(connector, table_name)
            return total_time

        def add_execution_time_for_method_and_indexes_configuration(
                method, indexes):
            if method in methods:
                methods[method].append(
                    get_execution_time_for_indexes_configuration(indexes))
            else:
                methods[method] = [
                    get_execution_time_for_indexes_configuration(indexes)
                ]

        def get_indexes_dqn():
            env = DatabaseIndexesEnv(n=const.COLUMNS_AMOUNT,
                                     table_name=table_name,
                                     query_pull=queries,
                                     batch_size=const.BATCH_SIZE,
                                     connector=connector,
                                     k=3,
                                     max_episodes=1)
            dqn = load_agent(
                path.join(
                    "..",
                    "dqn_{}_weights_6_4_2_1_50000_episodes_estimated.h5f".
                    format(ENV_NAME)))
            dqn.test(env, nb_episodes=1)
            return [i for i, x in enumerate(env.state) if x]

        connector = PostgresConnector()
        drop_indexes(connector, table_name)
        methods = {}
        i = 0
        np.warnings.filterwarnings('ignore')
        while True:
            queries = generate_query_pull(
                '../.test_query_pull_' + str(columns_participating) + '_' +
                str(i), self.__queries_amount, columns_participating,
                table_column_types, table_column_names, table_name, connector)
            i += 1
            sf_array = np.array([query['sf_array']
                                 for query in queries]).sum(axis=0)

            indexes_to_add = [
                i[0] for i in (sorted(enumerate(sf_array), key=lambda x: x[1])
                               )[:self.__index_amount]
            ]
            add_execution_time_for_method_and_indexes_configuration(
                'heuristic', indexes_to_add)

            indexes_to_add = get_indexes_qagent(self.__index_amount, queries,
                                                True)
            add_execution_time_for_method_and_indexes_configuration(
                'qlearning', indexes_to_add)
            # #extra clean up to make sure no indices left from the agent
            drop_indexes(connector, table_name)

            # dqn
            indexes_to_add = get_indexes_dqn()
            drop_indexes(connector, table_name)
            add_execution_time_for_method_and_indexes_configuration(
                'dqn', indexes_to_add)
            drop_indexes(connector, table_name)

            indexes_to_add = get_indexes_supervised(self.__index_amount,
                                                    queries)
            add_execution_time_for_method_and_indexes_configuration(
                'supervised', indexes_to_add)
            drop_indexes(connector, table_name)

            indexes_to_add = random.sample(range(COLUMNS_AMOUNT),
                                           self.__index_amount)
            add_execution_time_for_method_and_indexes_configuration(
                'random', indexes_to_add)

            times_combinations = list(
                itertools.combinations(methods.values(), 2))
            p_values = [
                stats.ttest_ind(time[0], time[1])[1]
                for time in times_combinations
            ]
            print(p_values)
            if all(p_value < 0.01
                   for p_value in p_values) and i >= 5 or i >= 5:
                break
            print('try #' + str(i))
            for method, times in methods.items():
                print('{}: {}'.format(method, np.mean(times)))
        print('')

        for method, times in methods.items():
            print('{}: {}'.format(method, np.mean(times)))
Esempio n. 5
0
    def __test_against_tpch(self):
        """test our heuristic algorithm, Q-learning, supervised and random approach with TPC-H Queries"""
        def get_execution_time_for_indexes_configuration(indexes):
            total_time = 0
            for index in indexes:
                add_index(connector, index, table_name)
                total_time = 0
            for query in queries:
                total_time += get_estimated_execution_time(
                    connector, query['query'])
            drop_indexes(connector, table_name)
            return total_time

        def add_execution_time_for_method_and_indexes_configuration(
                method, indexes):
            methods[method] = get_execution_time_for_indexes_configuration(
                indexes)

        def get_indexes_dqn():
            env = DatabaseIndexesEnv(n=const.COLUMNS_AMOUNT,
                                     table_name=table_name,
                                     query_pull=queries,
                                     batch_size=const.BATCH_SIZE,
                                     connector=connector,
                                     k=3,
                                     max_episodes=1)
            dqn = load_agent(
                path.join(
                    "..",
                    "dqn_{}_weights_6_4_2_1_50000_episodes_estimated.h5f".
                    format(ENV_NAME)))
            dqn.test(env, nb_episodes=1)
            return [i for i, x in enumerate(env.state) if x]

        connector = PostgresConnector()
        drop_indexes(connector, table_name)
        methods = {}
        np.warnings.filterwarnings('ignore')
        total_amount_of_rows = connector.query("select count (*) from " +
                                               table_name + ";").fetchone()[0]
        queries = []
        with open("../tpc_h_queries/tpch.json") as infile:
            json_obj = json.load(infile)
        for elem in json_obj:
            sf_list = [1] * 17
            for subquery, included_col in zip(elem["subquery"], elem["cols"]):
                subquery = subquery.format(table_name)
                sf_list[int(included_col)] = float(
                    connector.query(subquery.format(table_name)).fetchone()
                    [0]) / float(total_amount_of_rows)
            queries.append({
                'query': elem["query"].format(table_name),
                'sf_array': sf_list
            })

        sf_array = np.array([query["sf_array"] for query in queries])
        sf_array = [sum(i) for i in zip(*sf_array)]

        indexes_to_add = [
            i[0] for i in (sorted(enumerate(sf_array), key=lambda x: x[1])
                           )[:self.__index_amount]
        ]

        add_execution_time_for_method_and_indexes_configuration(
            'heuristic', indexes_to_add)

        indexes_to_add = get_indexes_qagent(self.__index_amount, queries, True)
        add_execution_time_for_method_and_indexes_configuration(
            'qlearning', indexes_to_add)
        drop_indexes(connector, table_name)

        indexes_to_add = get_indexes_dqn()
        drop_indexes(connector, table_name)
        add_execution_time_for_method_and_indexes_configuration(
            'dqn', indexes_to_add)
        drop_indexes(connector, table_name)

        indexes_to_add = get_indexes_supervised(self.__index_amount, queries)
        add_execution_time_for_method_and_indexes_configuration(
            'supervised', indexes_to_add)

        indexes_to_add = random.sample(range(COLUMNS_AMOUNT),
                                       self.__index_amount)
        add_execution_time_for_method_and_indexes_configuration(
            'random', indexes_to_add)

        for method, extime in methods.items():
            print('{}: {}'.format(method, extime))