def step(self, action_num): action = self.action_list[action_num] action_num_l = action[0] action_num_r = action[1] if (type(self.action_obj[action_num_l]) is not EmptyQuery) and (type( self.action_obj[action_num_r]) is not EmptyQuery): new_action_space = [] for subquery in self.action_obj: if subquery is self.action_obj[action_num_l]: new_action_space.append( Query(self.action_obj[action_num_l], self.action_obj[action_num_r])) elif subquery not in (self.action_obj[action_num_l], self.action_obj[action_num_r]): new_action_space.append(subquery) else: new_action_space.append( EmptyQuery(list(np.zeros(len(self.obs[0]), dtype=int)))) self.action_obj = new_action_space costs = 0 done_counter = 0 for subquery in self.action_obj: if not ((type(subquery) is Relation) or (type(subquery) is Query)): done_counter += 1 else: costs = 0 done_counter = 0 self.obs = [] for obj in self.action_obj: self.obs.append(obj.mask) if done_counter is len(self.action_obj) - 1: #costs = [] for subquery in self.action_obj: if (type(subquery) is Relation) or (type(subquery) is Query): #costs = -1*((cm1(subquery,self.cursor)-self.cost['min'])/(self.cost['max']-self.cost['min'])*10) #LINEAR #costs = -1*((log(cm1(subquery,self.cursor))-log(self.cost['min']))/(log(self.cost['max'])-log(self.cost['min']))*10) # LN EACH #costs = -1*((log(cm1(subquery,self.cursor)-self.cost['min']))/(log(self.cost['max']-self.cost['min']))*10) # LN SUM try: costs = -1 * ( (sqrt( cm1(subquery, self.cursor) - self.cost['min'])) / (sqrt(self.cost['max'] - self.cost['min'])) * 10 ) # sqrt SUM except: print("costs: " + str(cm1(subquery, self.cursor))) costs = 0 pass if costs < -10.: costs = -10. #print(self.render()) self.is_done = True #self.obs = np.matrix(self.obs).flatten()[0] #return self.obs, costs, self.is_done, {} return np.matrix( self.obs).flatten().tolist()[0], costs, self.is_done, {}
def dynamic_programming_right_deep(sql, schema, primary,lambda_f=1): try: conn = psycopg2.connect(host="localhost", database="imdbload", user="******", password="******") except: print("I am unable to connect to the database") # print(query) cursor = conn.cursor() query_env = Query_Init(sql, schema, primary) num_of_checks = 0 actions = [] for rel in query_env.actions: if type(rel) is Relation: actions.append(rel) query_env.actions = actions # Step 1 # generate all possible 2-way joins # select cheapest of same outcome queries = {} for query_list in permutations(query_env.actions, 2): num_of_checks += 1 query = Query(query_list[0], query_list[1]) if query.join_condition is not []: # no cross-join rule! cost = cm1(query, cursor,lambda_f) query_name = query.name Query_Init(sql, schema, primary) # to reset global variables if query_name in queries: if cost < queries[query_name]['cost']: queries[query_name]['cost'] = cost queries[query_name]['rel'] = [query_list[0], query_list[1]] queries[query_name]['obj'] = query else: queries[query_name] = {} queries[query_name]['cost'] = cost queries[query_name]['rel'] = [query_list[0], query_list[1]] queries[query_name]['obj'] = query for i in range(1, len(query_env.actions) - 1): print("STEP: " + str(i) + " | #candidates: " + str(len(queries))) new_queries = {} # Step 2-x # generate (3-x)-way joins # throw away equivalent queries which are more expensive for relation in query_env.actions: for key, subquery in queries.items(): if relation not in subquery['rel']: num_of_checks += 1 query = Query(subquery['rel'][0], subquery['rel'][1]) for j in range(2, len(subquery['rel'])): query = Query(subquery['rel'][j],query) query = Query(relation, query) if query.join_condition is not []: # no cross-join rule! cost = cm1(query, cursor,lambda_f) query_name=query.name Query_Init(sql, schema, primary) # to reset global variables if query_name in new_queries: if cost < new_queries[query_name]['cost']: new_queries[query_name]['cost'] = cost new_queries[query_name]['rel'] = subquery['rel'] + [relation] new_queries[query_name]['obj'] = query else: new_queries[query_name] = {} new_queries[query_name]['cost'] = cost new_queries[query_name]['rel'] = subquery['rel'] + [relation] new_queries[query_name]['obj'] = query queries = new_queries print("STEP: " + str(i + 1) + " | #candidates: " + str(len(queries))) for key, val in queries.items(): display('dynamic prog. right deep', val['obj'].__str__(), num_of_checks, val['cost']) return [val['obj'].__str__(), num_of_checks, val['cost']]
def greedy_left_deep(sql, schema, primary): try: conn = psycopg2.connect(host="localhost", database="imdbload", user="******", password="******") except: print("I am unable to connect to the database") # print(query) cursor = conn.cursor() query_env = Query_Init(sql, schema, primary) num_of_checks = 0 actions = [] for rel in query_env.actions: if type(rel) is Relation: actions.append(rel) query_env.actions = actions # Step 1 # generate all possible 2-way joins # select cheapest queries = {} queries['cost'] = float('inf') for query_list in permutations(query_env.actions, 2): num_of_checks += 1 query = Query(query_list[0], query_list[1]) if query.join_condition is not []: # no cross-join rule! cost = cm1(query, cursor) query_name = query.name Query_Init(sql, schema, primary) # to reset global variables if cost < queries['cost']: queries['cost'] = cost queries['rel'] = [query_list[0], query_list[1]] queries['obj'] = query for i in range(1,len(query_env.actions)-1): new_queries={} new_queries['cost'] = float('inf') print("STEP: "+str(i)+" | #candidates: "+str(len(queries))) #Step 2-x # generate (3-x)-way joins # throw away equivalent queries which are more expensive for relation in query_env.actions: #print("rel: "+str(relation)) #print("_______") subquery = queries #for x in subquery['rel']: print(x) #print("_______") if relation not in subquery['rel']: num_of_checks += 1 query=Query(subquery['rel'][0],subquery['rel'][1]) for j in range(2,len(subquery['rel'])): query=Query(query,subquery['rel'][j]) query = Query(query,relation) if query.join_condition is not []: #True: ## no cross-join rule! cost=cm1(query,cursor) Query_Init(sql, schema, primary) # to reset global variables #print(query_name,cost) if cost < new_queries['cost']: new_queries['cost'] = cost new_queries['rel'] = subquery['rel']+[relation] new_queries['obj'] = query queries = new_queries cursor.close() print("STEP: " + str(i + 1) + " | #candidates: " + str(len(queries))) val = queries #for key, val in queries.items(): display('greedy left deep', val['obj'].__str__(), num_of_checks, val['cost']) return [val['obj'].__str__(), num_of_checks, val['cost']]