def get_execution_time_for_indexes_configuration(indexes): total_time = 0 for index in indexes: add_index(connector, index, table_name) total_time = 0 for query in queries: total_time += get_estimated_execution_time( connector, query['query']) drop_indexes(connector, table_name) return total_time
def reset(self): self.step_number = 0 if self.episode_number >= self.max_episodes: self.query_batch = np.random.choice(self.query_pull, self.batch_size) self.episode_number = 0 if self.episode_number == 0: indexes = touched_indexes(self.query_batch) print("New query batch, touched indexes: " + str(indexes)) self.episode_number += 1 drop_indexes(self.connector, self.table_name) self.state = list(False for _ in range(len(self.state))) self.action_space = Dynamic(len(self.state)) return np.array( [self.state, *[x['sf_array'] for x in self.query_batch]])
def reset(self): self.step_number = 0 drop_indexes(self.connector, self.table_name) self.state = list(False for _ in range(len(self.state))) self.action_space = Dynamic(len(self.state)) return self.state
def __test_results(self, columns_participating): def get_execution_time_for_indexes_configuration(indexes): total_time = 0 for index in indexes: add_index(connector, index, table_name) total_time = 0 for query in queries: total_time += get_estimated_execution_time( connector, query['query']) drop_indexes(connector, table_name) return total_time def add_execution_time_for_method_and_indexes_configuration( method, indexes): if method in methods: methods[method].append( get_execution_time_for_indexes_configuration(indexes)) else: methods[method] = [ get_execution_time_for_indexes_configuration(indexes) ] def get_indexes_dqn(): env = DatabaseIndexesEnv(n=const.COLUMNS_AMOUNT, table_name=table_name, query_pull=queries, batch_size=const.BATCH_SIZE, connector=connector, k=3, max_episodes=1) dqn = load_agent( path.join( "..", "dqn_{}_weights_6_4_2_1_50000_episodes_estimated.h5f". format(ENV_NAME))) dqn.test(env, nb_episodes=1) return [i for i, x in enumerate(env.state) if x] connector = PostgresConnector() drop_indexes(connector, table_name) methods = {} i = 0 np.warnings.filterwarnings('ignore') while True: queries = generate_query_pull( '../.test_query_pull_' + str(columns_participating) + '_' + str(i), self.__queries_amount, columns_participating, table_column_types, table_column_names, table_name, connector) i += 1 sf_array = np.array([query['sf_array'] for query in queries]).sum(axis=0) indexes_to_add = [ i[0] for i in (sorted(enumerate(sf_array), key=lambda x: x[1]) )[:self.__index_amount] ] add_execution_time_for_method_and_indexes_configuration( 'heuristic', indexes_to_add) indexes_to_add = get_indexes_qagent(self.__index_amount, queries, True) add_execution_time_for_method_and_indexes_configuration( 'qlearning', indexes_to_add) # #extra clean up to make sure no indices left from the agent drop_indexes(connector, table_name) # dqn indexes_to_add = get_indexes_dqn() drop_indexes(connector, table_name) add_execution_time_for_method_and_indexes_configuration( 'dqn', indexes_to_add) drop_indexes(connector, table_name) indexes_to_add = get_indexes_supervised(self.__index_amount, queries) add_execution_time_for_method_and_indexes_configuration( 'supervised', indexes_to_add) drop_indexes(connector, table_name) indexes_to_add = random.sample(range(COLUMNS_AMOUNT), self.__index_amount) add_execution_time_for_method_and_indexes_configuration( 'random', indexes_to_add) times_combinations = list( itertools.combinations(methods.values(), 2)) p_values = [ stats.ttest_ind(time[0], time[1])[1] for time in times_combinations ] print(p_values) if all(p_value < 0.01 for p_value in p_values) and i >= 5 or i >= 5: break print('try #' + str(i)) for method, times in methods.items(): print('{}: {}'.format(method, np.mean(times))) print('') for method, times in methods.items(): print('{}: {}'.format(method, np.mean(times)))
def __test_against_tpch(self): """test our heuristic algorithm, Q-learning, supervised and random approach with TPC-H Queries""" def get_execution_time_for_indexes_configuration(indexes): total_time = 0 for index in indexes: add_index(connector, index, table_name) total_time = 0 for query in queries: total_time += get_estimated_execution_time( connector, query['query']) drop_indexes(connector, table_name) return total_time def add_execution_time_for_method_and_indexes_configuration( method, indexes): methods[method] = get_execution_time_for_indexes_configuration( indexes) def get_indexes_dqn(): env = DatabaseIndexesEnv(n=const.COLUMNS_AMOUNT, table_name=table_name, query_pull=queries, batch_size=const.BATCH_SIZE, connector=connector, k=3, max_episodes=1) dqn = load_agent( path.join( "..", "dqn_{}_weights_6_4_2_1_50000_episodes_estimated.h5f". format(ENV_NAME))) dqn.test(env, nb_episodes=1) return [i for i, x in enumerate(env.state) if x] connector = PostgresConnector() drop_indexes(connector, table_name) methods = {} np.warnings.filterwarnings('ignore') total_amount_of_rows = connector.query("select count (*) from " + table_name + ";").fetchone()[0] queries = [] with open("../tpc_h_queries/tpch.json") as infile: json_obj = json.load(infile) for elem in json_obj: sf_list = [1] * 17 for subquery, included_col in zip(elem["subquery"], elem["cols"]): subquery = subquery.format(table_name) sf_list[int(included_col)] = float( connector.query(subquery.format(table_name)).fetchone() [0]) / float(total_amount_of_rows) queries.append({ 'query': elem["query"].format(table_name), 'sf_array': sf_list }) sf_array = np.array([query["sf_array"] for query in queries]) sf_array = [sum(i) for i in zip(*sf_array)] indexes_to_add = [ i[0] for i in (sorted(enumerate(sf_array), key=lambda x: x[1]) )[:self.__index_amount] ] add_execution_time_for_method_and_indexes_configuration( 'heuristic', indexes_to_add) indexes_to_add = get_indexes_qagent(self.__index_amount, queries, True) add_execution_time_for_method_and_indexes_configuration( 'qlearning', indexes_to_add) drop_indexes(connector, table_name) indexes_to_add = get_indexes_dqn() drop_indexes(connector, table_name) add_execution_time_for_method_and_indexes_configuration( 'dqn', indexes_to_add) drop_indexes(connector, table_name) indexes_to_add = get_indexes_supervised(self.__index_amount, queries) add_execution_time_for_method_and_indexes_configuration( 'supervised', indexes_to_add) indexes_to_add = random.sample(range(COLUMNS_AMOUNT), self.__index_amount) add_execution_time_for_method_and_indexes_configuration( 'random', indexes_to_add) for method, extime in methods.items(): print('{}: {}'.format(method, extime))