Example #1
0
    def step(self, action_num):
        action = self.action_list[action_num]
        action_num_l = action[0]
        action_num_r = action[1]
        if (type(self.action_obj[action_num_l]) is not EmptyQuery) and (type(
                self.action_obj[action_num_r]) is not EmptyQuery):
            new_action_space = []
            for subquery in self.action_obj:
                if subquery is self.action_obj[action_num_l]:
                    new_action_space.append(
                        Query(self.action_obj[action_num_l],
                              self.action_obj[action_num_r]))
                elif subquery not in (self.action_obj[action_num_l],
                                      self.action_obj[action_num_r]):
                    new_action_space.append(subquery)
                else:
                    new_action_space.append(
                        EmptyQuery(list(np.zeros(len(self.obs[0]),
                                                 dtype=int))))
            self.action_obj = new_action_space

            costs = 0
            done_counter = 0
            for subquery in self.action_obj:
                if not ((type(subquery) is Relation) or
                        (type(subquery) is Query)):
                    done_counter += 1
        else:
            costs = 0
            done_counter = 0

        self.obs = []
        for obj in self.action_obj:
            self.obs.append(obj.mask)

        if done_counter is len(self.action_obj) - 1:

            #costs = []
            for subquery in self.action_obj:
                if (type(subquery) is Relation) or (type(subquery) is Query):
                    #costs = -1*((cm1(subquery,self.cursor)-self.cost['min'])/(self.cost['max']-self.cost['min'])*10) #LINEAR
                    #costs = -1*((log(cm1(subquery,self.cursor))-log(self.cost['min']))/(log(self.cost['max'])-log(self.cost['min']))*10) # LN EACH
                    #costs = -1*((log(cm1(subquery,self.cursor)-self.cost['min']))/(log(self.cost['max']-self.cost['min']))*10) # LN SUM
                    try:
                        costs = -1 * (
                            (sqrt(
                                cm1(subquery, self.cursor) - self.cost['min']))
                            / (sqrt(self.cost['max'] - self.cost['min'])) * 10
                        )  # sqrt SUM
                    except:
                        print("costs: " + str(cm1(subquery, self.cursor)))
                        costs = 0
                        pass
                    if costs < -10.: costs = -10.
                    #print(self.render())
            self.is_done = True
        #self.obs = np.matrix(self.obs).flatten()[0]
        #return self.obs, costs, self.is_done, {}
        return np.matrix(
            self.obs).flatten().tolist()[0], costs, self.is_done, {}
def dynamic_programming_right_deep(sql, schema, primary,lambda_f=1):
    try:
        conn = psycopg2.connect(host="localhost", database="imdbload", user="******", password="******")
    except:
        print("I am unable to connect to the database")
    # print(query)
    cursor = conn.cursor()

    query_env = Query_Init(sql, schema, primary)
    num_of_checks = 0

    actions = []
    for rel in query_env.actions:
        if type(rel) is Relation:
            actions.append(rel)
    query_env.actions = actions
    # Step 1
    # generate all possible 2-way joins
    # select cheapest of same outcome
    queries = {}
    for query_list in permutations(query_env.actions, 2):
        num_of_checks += 1
        query = Query(query_list[0], query_list[1])
        if query.join_condition is not []:  # no cross-join rule!
            cost = cm1(query, cursor,lambda_f)
            query_name = query.name
            Query_Init(sql, schema, primary)  # to reset global variables
            if query_name in queries:
                if cost < queries[query_name]['cost']:
                    queries[query_name]['cost'] = cost
                    queries[query_name]['rel'] = [query_list[0], query_list[1]]
                    queries[query_name]['obj'] = query
            else:
                queries[query_name] = {}
                queries[query_name]['cost'] = cost
                queries[query_name]['rel'] = [query_list[0], query_list[1]]
                queries[query_name]['obj'] = query

    for i in range(1, len(query_env.actions) - 1):
        print("STEP: " + str(i) + " | #candidates: " + str(len(queries)))
        new_queries = {}
        # Step 2-x
        # generate (3-x)-way joins
        # throw away equivalent queries which are more expensive
        for relation in query_env.actions:
            for key, subquery in queries.items():
                if relation not in subquery['rel']:
                    num_of_checks += 1
                    query = Query(subquery['rel'][0], subquery['rel'][1])
                    for j in range(2, len(subquery['rel'])):
                        query = Query(subquery['rel'][j],query)
                    query = Query(relation, query)
                    if query.join_condition is not []:  # no cross-join rule!
                        cost = cm1(query, cursor,lambda_f)
                        query_name=query.name
                        Query_Init(sql, schema, primary)  # to reset global variables
                        if query_name in new_queries:
                            if cost < new_queries[query_name]['cost']:
                                new_queries[query_name]['cost'] = cost
                                new_queries[query_name]['rel'] = subquery['rel'] + [relation]
                                new_queries[query_name]['obj'] = query
                        else:
                            new_queries[query_name] = {}
                            new_queries[query_name]['cost'] = cost
                            new_queries[query_name]['rel'] = subquery['rel'] + [relation]
                            new_queries[query_name]['obj'] = query
        queries = new_queries
    print("STEP: " + str(i + 1) + " | #candidates: " + str(len(queries)))
    for key, val in queries.items():
        display('dynamic prog. right deep', val['obj'].__str__(), num_of_checks, val['cost'])
        return [val['obj'].__str__(), num_of_checks, val['cost']]
def greedy_left_deep(sql, schema, primary):
    try:
        conn = psycopg2.connect(host="localhost", database="imdbload", user="******", password="******")
    except:
        print("I am unable to connect to the database")
    # print(query)
    cursor = conn.cursor()

    query_env = Query_Init(sql, schema, primary)
    num_of_checks = 0

    actions = []
    for rel in query_env.actions:
        if type(rel) is Relation:
            actions.append(rel)
    query_env.actions = actions
    # Step 1
    # generate all possible 2-way joins
    # select cheapest
    queries = {}
    queries['cost'] = float('inf')
    for query_list in permutations(query_env.actions, 2):
        num_of_checks += 1
        query = Query(query_list[0], query_list[1])
        if query.join_condition is not []:  # no cross-join rule!
            cost = cm1(query, cursor)
            query_name = query.name
            Query_Init(sql, schema, primary)  # to reset global variables
            if cost < queries['cost']:
                queries['cost'] = cost
                queries['rel'] = [query_list[0], query_list[1]]
                queries['obj'] = query
    for i in range(1,len(query_env.actions)-1):
        new_queries={}
        new_queries['cost'] = float('inf')
        print("STEP: "+str(i)+" | #candidates: "+str(len(queries)))
        #Step 2-x
        # generate (3-x)-way joins
        # throw away equivalent queries which are more expensive
        for relation in query_env.actions:
            #print("rel: "+str(relation))
            #print("_______")
            subquery = queries
            #for x in subquery['rel']: print(x)
            #print("_______")
            if relation not in subquery['rel']:
                num_of_checks += 1
                query=Query(subquery['rel'][0],subquery['rel'][1])
                for j in range(2,len(subquery['rel'])):
                    query=Query(query,subquery['rel'][j])
                query = Query(query,relation)
                if query.join_condition is not []:  #True: ## no cross-join rule!
                    cost=cm1(query,cursor)
                    Query_Init(sql, schema, primary)  # to reset global variables
                    #print(query_name,cost)
                    if cost < new_queries['cost']:
                        new_queries['cost'] = cost
                        new_queries['rel'] = subquery['rel']+[relation]
                        new_queries['obj'] = query
        queries = new_queries
    cursor.close()
    print("STEP: " + str(i + 1) + " | #candidates: " + str(len(queries)))
    val = queries
    #for key, val in queries.items():
    display('greedy left deep', val['obj'].__str__(), num_of_checks, val['cost'])
    return [val['obj'].__str__(), num_of_checks, val['cost']]