Beispiel #1
0
def dict_to_logical(plan_dict):
    left = None
    right = None
    join = None

    for key, value in plan_dict.items():

        if key == 'right':
            right = dict_to_logical(plan_dict['right'])
        if key == 'left':
            left = dict_to_logical(plan_dict['left'])
        if key == 'type':
            if value == 'NLJ':
                join = Xnjoin
            else:
                join = Fjoin

        if key == 'tpf':
            arguments = value.split(" ")[:-1]
            # triple_pattern = " ".join(triples)
            triple_pattern = TriplePattern(Argument(arguments[0]),
                                           Argument(arguments[1]),
                                           Argument(arguments[2]))
            return LogicalPlan(triple_pattern)

    return LogicalPlan(left, right, join)
Beispiel #2
0
def custom_plan(sources):

    tp_1 = TriplePattern(Argument("?v3"),
                         Argument("<http://schema.org/trailer>"),
                         Argument("?v5"))
    tp_2 = TriplePattern(
        Argument("?v3"),
        Argument("http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"),
        Argument("<http://db.uwaterloo.ca/~galuc/wsdbm/ProductCategory2>"))
    tp_3 = TriplePattern(
        Argument("?v3"),
        Argument("<http://db.uwaterloo.ca/~galuc/wsdbm/hasGenre>"),
        Argument("?v0"))
    tps = [tp_1, tp_2, tp_3]

    # XNJoin = Nested Loop Join
    # FJoin: Hash Join
    l_plan = LogicalPlan(LogicalPlan(LogicalPlan(tp_1),
                                     LogicalPlan(tp_2),
                                     operator=Xnjoin),
                         LogicalPlan(tp_3),
                         operator=Xnjoin)

    plan = PhysicalPlan(sources, 2, l_plan, poly_operator=False)
    return plan
Beispiel #3
0
def decomposition_to_plan(decomposition):

    access_plans = []
    for tp in decomposition:
        access_plans.append(LogicalPlan(tp))

    todo = sorted(access_plans, key=lambda x: x.cardinality)
    plan = todo[0]
    todo.remove(plan)

    while len(todo):
        for i in range(len(todo)):
            if len(plan.variables.intersection(todo[i].variables)) > 0:
                plan = LogicalPlan(plan, todo[i],
                                   get_physical_operator(plan, todo[i]))
                plan.compute_cardinality(cardinality_estimation)
                todo.remove(todo[i])
                break
        else:
            # In case we cannot find another join able triple pattern
            next_tp = todo[0]
            plan = LogicalPlan(plan, next_tp,
                               get_physical_operator(plan, next_tp))
            plan.compute_cardinality(cardinality_estimation)
            todo.remove(next_tp)
    return plan
Beispiel #4
0
    def joinPlans(self, L, R, best=False, operators=[Fjoin, Xnjoin]):
        join_plans = []

        for operator in operators:

            # For Xnjoin: Either side must be a triple pattern to be the dependent operator in the plan
            if operator == Xnjoin:
                if L.is_triple_pattern or R.is_triple_pattern:
                    plan = LogicalPlan(L, R, operator)
                    cost = plan.compute_cost(self.cost_model)
                    plan.cost = cost
                    join_plans.append(plan)
            else:
                plan = LogicalPlan(L, R, operator)
                cost = plan.compute_cost(self.cost_model)
                plan.cost = cost
                join_plans.append(plan)

        if best:
            best_join = sorted(join_plans, key=lambda x: x.cost)[0]
            return best_join
        else:
            # If the leafs are triple patterns, the cheapest plan can be selected only
            if L.is_triple_pattern and R.is_triple_pattern:
                best_join = sorted(join_plans, key=lambda x: x.cost)[0]
                return [best_join]
            else:
                return join_plans
Beispiel #5
0
def dict_to_logical(plan_dict, sources):
    left = None
    right = None
    join = None

    for key, value in plan_dict.items():
        if key == 'right':
            right = dict_to_logical(plan_dict['right'], sources)
        if key == 'left':
            left = dict_to_logical(plan_dict['left'], sources)
        if key == 'type':
            if value == 'NLJ':
                join = Xnjoin
            else:
                join = Fjoin

        if key == 'tpf':
            pattern_var = re.compile(r'\?\w+')
            pattern_uri = re.compile(r'\<[^<^>]+\>')
            pattern_literal = re.compile(r'[\'"].*[\'"]@?\w*')

            matches_var = pattern_var.finditer(value)
            matches_uri = pattern_uri.finditer(value)
            matches_literal = pattern_literal.finditer(value)

            matches_var = [(m.start(), m.group(0)) for m in matches_var]
            matches_uri = [(m.start(), m.group(0)) for m in matches_uri]
            matches_literal = [(m.start(), m.group(0))
                               for m in matches_literal]

            arguments = [matches_var, matches_uri, matches_literal]

            arguments = proc_arguments(arguments)

            triple_pattern = TriplePattern(Argument(arguments[0]),
                                           Argument(arguments[1]),
                                           Argument(arguments[2]))
            cardinality = int(plan_dict.get("cardinality", 0))
            triple_pattern.cardinality = cardinality
            triple_pattern.sources = {sources[0]: cardinality}
            print('--- Now printing Triple Pattern: ---')
            print(triple_pattern)
            print('------')
            return LogicalPlan(triple_pattern)

    print plan_dict
    logical_plan = LogicalPlan(left, right, join)
    logical_plan.cardinality = int(plan_dict.get("estimated_cardinality", 0))
    return logical_plan
Beispiel #6
0
    def get_logical_plan(self, body):

        if isinstance(body, UnionBlock):
            subplans = []
            for ggp in body.triples:
                subplan = self.get_logical_plan(ggp)
                if subplan:
                    subplans.append(subplan)
            if len(subplans) == 1:
                # No need for an additional union here
                return subplans[0]
            else:
                return LogicalUnion(subplans, Xunion)

        elif isinstance(body, JoinBlock):
            if body.bgp:
                l_plan = self.iterative_dynamic_programming1(body.triples)
            elif len(body.triples) == 1:
                return self.get_logical_plan(body.triples[0])
            else:
                left_plan = self.get_logical_plan(body.triples[0])
                right_plan = self.get_logical_plan(body.triples[1])
                l_plan = LogicalPlan(left_plan, right_plan, Fjoin)
            return l_plan

        elif isinstance(body, Optional):
            plan = self.get_logical_plan(body.triples)
            return plan
Beispiel #7
0
    def lw_plan_from_tree(self,
                          node_id,
                          out_edges,
                          leafs,
                          leaf_map,
                          prefix=""):
        l = out_edges[node_id][0]
        r = out_edges[node_id][1]
        operator = choice([Xnjoin, Fjoin])

        lid = prefix + "001"
        rid = prefix + "010"

        if operator == Xnjoin:
            lid = prefix + "011"
            rid = prefix + "100"
        if l in leafs and r in leafs:
            tp_l = LogicalPlan(leaf_map[l], node_id=lid)
            tp_r = LogicalPlan(leaf_map[r], node_id=rid)
            if not leaf_map[l].compatible(leaf_map[r]):
                raise Exception("Incompatible leafs")
            plan = LogicalPlan(tp_l, tp_r, operator)
            return plan
        elif l in leafs:
            tp_l = LogicalPlan(leaf_map[l], node_id=lid)
            plan = LogicalPlan(
                tp_l,
                self.lw_plan_from_tree(r,
                                       out_edges,
                                       leafs,
                                       leaf_map,
                                       prefix=rid), operator)
            return plan
        elif r in leafs:
            tp_r = LogicalPlan(leaf_map[r], node_id=rid)
            plan = LogicalPlan(
                tp_r,
                self.lw_plan_from_tree(l,
                                       out_edges,
                                       leafs,
                                       leaf_map,
                                       prefix=lid), operator)
            return plan
        else:
            plan = LogicalPlan(
                self.lw_plan_from_tree(l,
                                       out_edges,
                                       leafs,
                                       leaf_map,
                                       prefix="001"),
                self.lw_plan_from_tree(r,
                                       out_edges,
                                       leafs,
                                       leaf_map,
                                       prefix="010"), Fjoin)
            return plan
Beispiel #8
0
    def decompostion_to_plan(self, decomposition):
        access_plans = []
        filters = []
        for subplan in decomposition:
            if isinstance(subplan, Filter):
                filters.append(subplan)
            else:
                if isinstance(subplan, BGP):
                    access_plans.append(LogicalPlan(subplan))
                else:
                    access_plans.append(LogicalPlan(subplan))
                self.bgp_count.append(float(len(subplan)))

        todo = sorted(access_plans, key=lambda x: x.cardinality)
        plan = todo[0]
        todo.remove(plan)

        root = True

        while len(todo):
            for i in range(len(todo)):
                if len(plan.variables.intersection(todo[i].variables)) > 0:

                    join_operator = self.get_physical_join_operator(
                        plan, todo[i])

                    if root and plan.is_basic_graph_pattern and join_operator == Xnjoin:
                        plan = LogicalUnion([plan])

                    plan = LogicalPlan(plan, todo[i], join_operator)
                    plan.compute_cardinality(self.cardinality_estimation)
                    todo.remove(todo[i])
                    root = False
                    break
            else:
                # In case we cannot find another join able triple pattern
                next_tp = todo[0]
                join_operator = self.get_physical_join_operator(plan, next_tp)
                plan = LogicalPlan(plan, next_tp, join_operator)
                plan.compute_cardinality(self.cardinality_estimation)
                todo.remove(next_tp)

        plan.filters = filters
        return plan
Beispiel #9
0
    def get_logical_plan_simple(self, body):

        if isinstance(body, UnionBlock):
            subplans = []
            for ggp in body.triples:
                subplan = self.get_logical_plan(ggp)
                if subplan:
                    subplans.append(subplan)
            if len(subplans) == 1:
                # No need for an additional union here
                return subplans[0]
            else:
                return LogicalUnion(subplans, Xunion)

        elif isinstance(body, JoinBlock):
            if body.bgp:
                l_plan = self.optimize_bgp(body.triples)
            elif len(body.triples) == 1:
                return self.get_logical_plan(body.triples[0])
            elif len(body.triples) == 2 and isinstance(body.triples[1],
                                                       Optional):
                # Get operator for Optional
                # TODO: Handle case with several optionals
                left_plan = self.get_logical_plan(body.triples[0])
                right_plan = self.get_logical_plan(body.triples[1])
                operator = self.get_optional_operator(left_plan, right_plan)
                l_plan = LogicalPlan(left_plan, right_plan, operator)
                return l_plan
            else:
                left_plan = self.get_logical_plan(body.triples[0])
                right_plan = self.get_logical_plan(body.triples[1])
                l_plan = LogicalPlan(left_plan, right_plan, Fjoin)
            return l_plan

        elif isinstance(body, Optional):
            plan = self.get_logical_plan(body.triples)
            return plan

        return None
Beispiel #10
0
    def optimize_subquery(self, subquery, filters):

        plans = []
        for tp_combination in product(*subquery):

            access_plans = []
            for tp in tp_combination:
                access_plans.append(LogicalPlan(tp))

            todo = sorted(access_plans, key=lambda x: x.cardinality)
            plan = todo[0]
            todo.remove(plan)

            while len(todo):
                for i in range(len(todo)):
                    if len(plan.variables.intersection(todo[i].variables)) > 0:
                        plan = LogicalPlan(
                            plan, todo[i],
                            self.get_physical_join_operator(plan, todo[i]))
                        plan.compute_cardinality(self.cardinality_estimation)
                        todo.remove(todo[i])
                        break
                else:
                    # In case we cannot find another join able triple pattern
                    next_tp = todo[0]
                    plan = LogicalPlan(
                        plan, next_tp,
                        self.get_physical_join_operator(plan, next_tp))
                    plan.compute_cardinality(self.cardinality_estimation)
                    todo.remove(next_tp)

            plan.filters = filters
            plans.append(plan)
        if len(plans) == 0:
            return None
        plan = self.union_subplans(plans)
        return plan
Beispiel #11
0
    def iterative_dynamic_programming1(self, triple_patterns):

        if len(triple_patterns) == 1:
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple_patterns[0])
            return LogicalPlan(triple_patterns[0])

        best_row = False
        opt_plan = {}
        toDo = set()

        k = min(len(triple_patterns), self.k)
        if self.adaptive_k and len(triple_patterns) >= 6:
            k = 2

        for index, triple_pattern in enumerate(triple_patterns):
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple_pattern)
            accessPlan = set([LogicalPlan(triple_pattern)])
            opt_plan[(triple_pattern, )] = accessPlan
            toDo.add(triple_pattern)

        while len(toDo) > 1:
            k = min(k, len(toDo))
            for i in range(2, k + 1):
                for S in combinations(toDo, i):

                    opt_plan[S] = set()

                    for O in self.true_subset(S):

                        try:
                            opt_plan_O = opt_plan[O]
                            S_minus_O = tuple(set(S).difference(set(O)))

                            opt_plan_S_minus_O = opt_plan.get(S_minus_O, None)
                            if not opt_plan_S_minus_O or not opt_plan_O:
                                continue

                            for opt_plan_o in opt_plan_O:

                                for opt_plan_s_minus_o in opt_plan_S_minus_O:
                                    join_vars = opt_plan_o + opt_plan_s_minus_o
                                    if join_vars > 0:
                                        join_plans = self.joinPlans(
                                            opt_plan_o, opt_plan_s_minus_o)
                                        join_plans_S = opt_plan[S].union(
                                            join_plans)
                                        opt_plan[S] = self.best_n_plans(
                                            list(join_plans_S), self.top_t)
                                        #opt_plan[S] = join_plans_S

                        except Exception as e:
                            raise e

            best_plans = []
            V = set()
            for key, values in opt_plan.items():
                for value in values:
                    k_len = len(key)
                    if k_len == k and value and set(key).issubset(toDo):
                        V.add(key)
                        rob = value.cost
                        best_plans.append((value, value.cost, rob, key))

            if len(best_plans) == 0:
                raise Exception("IDP Error: No best plan")

            for v in V:
                del opt_plan[v]

            try:
                if len(best_plans) > 0:
                    # In intermediate steps of IDP: Take best plan only
                    best_plan = sorted(best_plans, key=lambda x:
                                       (x[1], x[2]))[0]

                    tps = best_plan[3]
                    opt_plan[(tps, )] = set([best_plan[0]])

                    if best_row:
                        best_plans.remove(best_plan)
                        # Best Row
                        for bp in best_plans:
                            if bp[3] == tps:
                                opt_plan[(tps, )].add(bp[0])

                    # Remove triple patterns from todo list
                    for tp in tps:
                        toDo.remove(tp)
                    toDo.add(best_plan[3])

            except Exception as e:
                raise e

        tmp_plans = []
        for plan in best_plans:
            cost = plan[0].cost
            rob = plan[0].average_cost(self.robust_model)
            tmp_plans.append((plan, cost, rob, cost / rob))
            #print((plan, cost, rob, cost/rob))

        cheap_plan = sorted(tmp_plans, key=lambda x: (x[1], x[3]))[0]

        # Decision rule for robust plan
        self.robust_over_cost = False
        rob_cost_ratio = cheap_plan[1] / cheap_plan[2]
        self.cost_robust_ratio = rob_cost_ratio

        #logger.debug("{} {}".format(self.cost_robust_ratio, len(tmp_plans) ))
        if len(tmp_plans) > 1:
            tmp_plans.remove(cheap_plan)
            plans_over_thrshld = filter(
                lambda x: x[3] >= self.robustness_threshold, tmp_plans)
            if not plans_over_thrshld or len(plans_over_thrshld) == 0:
                plans_over_thrshld = tmp_plans
            robust_plan = sorted(plans_over_thrshld,
                                 key=lambda x: (x[1], x[2]))[0]
        else:
            robust_plan = cheap_plan

        # What is the cost ratio of the cheapest and the most robust plan
        cost_cost_ratio = cheap_plan[1] / robust_plan[1]
        self.cost_cost_ratio = cost_cost_ratio

        self.robust_over_cost = rob_cost_ratio <= self.robustness_threshold and cost_cost_ratio >= self.cost_threshold

        if self.enable_robustplan and self.robust_over_cost:
            logger.debug("IDP: Robust Plan over Cheapest Plan")
            return robust_plan[0][0]

        return cheap_plan[0][0]
Beispiel #12
0
    def optimize_subquery(self, triples):

        subtrees = []
        for triple in triples:
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple)
            leaf = LogicalPlan(triple)
            subtrees.append(leaf)

        subtrees.sort(key=lambda  x: x.cardinality)

        stars = []
        while len(subtrees) > 0:

            to_delete = []
            star_tree = subtrees.pop(0)
            star_vars = star_tree.variables


            for j in range(0, len(subtrees)):
                subtree_j = subtrees[j]
                join_variables = set(star_vars).intersection(subtree_j.variables)

                # Case: There is a join.
                if len(join_variables) > 0:

                    to_delete.append(subtree_j)

                    # Place physical operator estimating cardinality.
                    if star_tree.is_triple_pattern:

                        res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                        # Place a Nested Loop join.
                        # Paper; if (tpi.count / tpi.pagesize) <= s.count then
                        #if star_tree.total_res < (subtree_j.total_res / 100.0):
                        if star_tree.cardinality < (subtree_j.cardinality / 100.0):
                            join_type = Xnjoin

                            # If NLJ is placed, set res = 0/1 to force NLJs later
                            res = 1

                        # Place a Symmetric Hash join.
                        else:
                            join_type = Fjoin

                    else:
                        res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                        if (star_tree.cardinality / float(subtree_j.cardinality) < 0.30) or (subtree_j.cardinality > 100*1000 and star_tree.cardinality < 100*1000) or (subtree_j.cardinality < 100*5):
                            join_type = Xnjoin

                            # If NLJ is placed, set res = 0/1 to force NLJs later
                            res = 1

                        else:
                            join_type = Fjoin

                    star_tree = LogicalPlan(star_tree, subtree_j, join_type)
                    star_tree.cardinality = res

            # Add current tree to the list of stars and
            # remove from the list of subtrees to process.
            stars.append(star_tree)
            for elem in to_delete:
                subtrees.remove(elem)

        # Stage 2: Build bushy tree to combine SSGs with common variables.
        while len(stars) > 1:

            subtree_i = stars.pop(0)
            star_vars = subtree_i.variables

            for j in range(0, len(stars)):
                subtree_j = stars[j]

                join_variables = set(star_vars).intersection(subtree_j.variables)

                # Case: There is a join between stars.
                if len(join_variables) > 0:

                    stars.pop(j)

                    res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                    # Place physical operators between stars.
                    if subtree_j.is_triple_pattern:

                        # This case models a satellite, therefore apply cardinality estimation.
                        if subtree_i.cardinality < (subtree_j.cardinality / 100.0):
                            join_type = Xnjoin
                        else:
                            join_type = Fjoin
                    else:
                        res = (subtree_i.cardinality + subtree_j.cardinality) / 2
                        join_type = Fjoin

                    star_tree = LogicalPlan(subtree_i, subtree_j, join_type)
                    star_tree.cardinality = res
                    stars.append(star_tree)

                    break


        tree = stars.pop()
        return tree
Beispiel #13
0
def decomposition_to_plan(decomposition):

    leafs = {}

    for subquery, source, cardinality in decomposition:

        if isinstance(subquery, TriplePattern):
            if subquery in leafs.keys():
                leafs[subquery].sources[source] = cardinality
                leafs[subquery].cardinality += cardinality
            else:
                new_triple_pattern = TriplePattern(
                    subquery[0],
                    subquery[1],
                    subquery[2],
                    sources={source: cardinality})
                new_triple_pattern.cardinality = cardinality
                leafs[subquery] = new_triple_pattern
        elif isinstance(subquery, BGP):
            if subquery in leafs.keys():
                for tp in leafs[subquery]:
                    for bgp_tp in subquery:
                        if tp == bgp_tp:
                            tp.sources[source] = bgp_tp.cardinality
                leafs[subquery].cardinality += cardinality

            else:
                new_tps = []
                for triple_pattern in subquery:
                    new_triple_pattern = TriplePattern(
                        triple_pattern[0],
                        triple_pattern[1],
                        triple_pattern[2],
                        sources={source: cardinality})
                    new_tps.append(new_triple_pattern)
                new_bgp = BGP(new_tps)
                new_bgp.cardinality = cardinality
                leafs[subquery] = new_bgp

    access_plans = []

    for tp in leafs.values():
        access_plans.append(LogicalPlan(tp))

    todo = sorted(access_plans, key=lambda x: x.cardinality)
    plan = todo[0]
    todo.remove(plan)

    while len(todo):
        for i in range(len(todo)):
            if len(plan.variables.intersection(todo[i].variables)) > 0:
                plan = LogicalPlan(plan, todo[i],
                                   get_physical_operator(plan, todo[i]))
                plan.compute_cardinality(cardinality_estimation)
                todo.remove(todo[i])
                break
        else:
            # In case we cannot find another join able triple pattern
            next_tp = todo[0]
            plan = LogicalPlan(plan, next_tp,
                               get_physical_operator(plan, next_tp))
            plan.compute_cardinality(cardinality_estimation)
            todo.remove(next_tp)

    return plan
Beispiel #14
0
    def plan_from_tree(self, node_id, out_edges, leafs, leaf_map):

        l = out_edges[node_id][0]
        r = out_edges[node_id][1]
        if l in leafs and r in leafs:
            tp_l = LogicalPlan(leaf_map[l])
            tp_r = LogicalPlan(leaf_map[r])
            if not leaf_map[l].compatible(leaf_map[r]):
                raise Exception
            plan = LogicalPlan(tp_l, tp_r, choice([Xnjoin, Fjoin]))
            plan.compute_cost(self.cost_model)
            return plan
        elif l in leafs:
            tp_l = LogicalPlan(leaf_map[l])
            plan = LogicalPlan(
                tp_l, self.plan_from_tree(r, out_edges, leafs, leaf_map),
                choice([Xnjoin, Fjoin]))
            plan.compute_cost(self.cost_model)
            return plan
        elif r in leafs:
            tp_r = LogicalPlan(leaf_map[r])
            plan = LogicalPlan(
                tp_r, self.plan_from_tree(l, out_edges, leafs, leaf_map),
                choice([Xnjoin, Fjoin]))
            plan.compute_cost(self.cost_model)
            return plan
        else:
            plan = LogicalPlan(
                self.plan_from_tree(l, out_edges, leafs, leaf_map),
                self.plan_from_tree(r, out_edges, leafs, leaf_map), Fjoin)
            plan.compute_cost(self.cost_model)
            return plan