Esempio n. 1
0
    def select_sources(self, triple_pattern):

        if not self.__full_stats:
            get_metadata(self.__sources, triple_pattern)
            true_sources = set([
                name.replace("tpf@", "")
                for name in triple_pattern.sources.keys()
            ])
        else:
            true_sources = set(
                self.__full_stats.predicate_counts(triple_pattern[1]).keys())

        self.processed_predicates += 1
        sources = self.__stats.predicate_counts(triple_pattern[1])

        delta = true_sources - set(sources).intersection(true_sources)
        #print(len(delta))
        if len(sources) == 0:
            if triple_pattern[1].isuri():
                self.missed_predicate_cnt += 1
                self.missed_predicates[triple_pattern[1]] = list(delta)

        elif len(delta) > 0 and triple_pattern[1].isuri():
            self.missed_sources[triple_pattern[1]] = delta

        triple_pattern.sources = sources
Esempio n. 2
0
 def select_sources(self, triple_pattern):
     if self.__stats and triple_pattern[1].isuri():
         predicate = triple_pattern[1].value.replace("<",
                                                     "").replace(">", "")
         # Update stats
         #card, auth_stats = get_metadata_tpf_stats(self.__sources, triple_pattern)
         #self.__stats.update_authorities(predicate,  auth_stats)
         #return card
         return get_metadata(self.__sources, triple_pattern)
     else:
         return get_metadata(self.__sources, triple_pattern)
Esempio n. 3
0
    def optimize_bgp(self, triple_patterns):

        for tp in triple_patterns:
            if isinstance(tp, TriplePattern):
                # For each server, we need one requests to get the metadata
                self.planning_requests += len(self.sources)
                get_metadata(self.sources, tp)

        # Compute E_star for completeness and max cost
        E_star = self.compute_completeness(triple_patterns)
        self.max_cost = self.compute_cost(triple_patterns)

        # Prune Sources
        if self.prune_sources:
            triple_patterns = self.prune_relevant_source(triple_patterns)

        # Compute E and completness

        E = self.compute_completeness(triple_patterns)
        try:
            bgp_comp = E / E_star
        except ZeroDivisionError:
            bgp_comp = 1.0

        self.decompostion_completeness.append(bgp_comp)

        # Decomposition
        if self.decomposer:
            # Compute Decomposition
            decomposition = LDFF_Decomposer.get_decomposition(triple_patterns)
        else:
            decomposition = triple_patterns

        # Compute Cost
        try:
            bgp_cost = self.compute_cost(decomposition) / self.max_cost
        except ZeroDivisionError:
            bgp_cost = 1.0

        self.decompostion_cost.append(bgp_cost)

        # Get plan
        plan = self.decompostion_to_plan(decomposition)
        return plan
Esempio n. 4
0
    def select_sources(self, triple_pattern):

        predicate = triple_pattern[1].value[1:-1]
        if triple_pattern.variable_position == 5:
            sources = self.__stats.predicate_counts(predicate)
            triple_pattern.sources = sources
            triple_pattern.cardinality = sum(sources.values())
            return triple_pattern.cardinality
        else:
            relevant_sources = self.__stats.sources_by_predicate(predicate)
            sources = relevant_sources if len(
                relevant_sources) > 0 else self.sources
            return get_metadata(sources, triple_pattern)
Esempio n. 5
0
def capability_aware_decomp(decomposition, sparql_exclusive=True):

    D = []
    for id, subquery in decomposition.items():
        for source in subquery[1]:
            if source.startswith("sparql@") and len(subquery[0]) > 1:
                count = get_metadata([source], subquery[0])
                if count > 0:
                    S_c = (BGP(subquery[0]), source, count)
                    D.append(S_c)
                else:
                    for triple_pattern in subquery[0]:
                        S_c = (triple_pattern, source,
                               triple_pattern.sources[source])
                        D.append(S_c)
            else:
                for triple_pattern in subquery[0]:
                    if source in triple_pattern.sources.keys():
                        S_c = (triple_pattern, source,
                               triple_pattern.sources[source])
                        D.append(S_c)

    return D
Esempio n. 6
0
    def get_decomposition(triple_patterns):
        Q = triple_patterns
        change = True
        while change:

            for sq_i, sq_j in combinations(Q, 2):
                if isinstance(sq_i, Filter) or isinstance(sq_j, Filter):
                    continue
                if sq_i.compatible(sq_j):
                    if len(sq_i.sources.keys()) == 1 and len(sq_j.sources.keys()) == 1 \
                        and len(set(sq_i.sources.keys()).intersection(set(sq_j.sources.keys()))) == 1:
                        if sq_i.sources.keys()[0].startswith("sparql@"):
                            Q.remove(sq_j)
                            Q.remove(sq_i)

                            if isinstance(sq_i, BGP):
                                tps = sq_i.triple_patterns
                            else:
                                tps = [sq_i]

                            if isinstance(sq_j, BGP):
                                tps.extend(sq_j.triple_patterns)
                            else:
                                tps.append(sq_j)

                            new_sq = BGP(tps)
                            Q.append(new_sq)
                            break
            else:
                change = False

        # Update Cardinalities if a BGP was created
        for sq in Q:
            if isinstance(sq, BGP):
                sq.cardinality = get_metadata(sq.sources.keys(), sq)

        return Q
Esempio n. 7
0
    def iterative_dynamic_programming1(self, triple_patterns):

        if len(triple_patterns) == 1:
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple_patterns[0])
            return LogicalPlan(triple_patterns[0])

        best_row = False
        opt_plan = {}
        toDo = set()

        k = min(len(triple_patterns), self.k)
        if self.adaptive_k and len(triple_patterns) >= 6:
            k = 2

        for index, triple_pattern in enumerate(triple_patterns):
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple_pattern)
            accessPlan = set([LogicalPlan(triple_pattern)])
            opt_plan[(triple_pattern, )] = accessPlan
            toDo.add(triple_pattern)

        while len(toDo) > 1:
            k = min(k, len(toDo))
            for i in range(2, k + 1):
                for S in combinations(toDo, i):

                    opt_plan[S] = set()

                    for O in self.true_subset(S):

                        try:
                            opt_plan_O = opt_plan[O]
                            S_minus_O = tuple(set(S).difference(set(O)))

                            opt_plan_S_minus_O = opt_plan.get(S_minus_O, None)
                            if not opt_plan_S_minus_O or not opt_plan_O:
                                continue

                            for opt_plan_o in opt_plan_O:

                                for opt_plan_s_minus_o in opt_plan_S_minus_O:
                                    join_vars = opt_plan_o + opt_plan_s_minus_o
                                    if join_vars > 0:
                                        join_plans = self.joinPlans(
                                            opt_plan_o, opt_plan_s_minus_o)
                                        join_plans_S = opt_plan[S].union(
                                            join_plans)
                                        opt_plan[S] = self.best_n_plans(
                                            list(join_plans_S), self.top_t)
                                        #opt_plan[S] = join_plans_S

                        except Exception as e:
                            raise e

            best_plans = []
            V = set()
            for key, values in opt_plan.items():
                for value in values:
                    k_len = len(key)
                    if k_len == k and value and set(key).issubset(toDo):
                        V.add(key)
                        rob = value.cost
                        best_plans.append((value, value.cost, rob, key))

            if len(best_plans) == 0:
                raise Exception("IDP Error: No best plan")

            for v in V:
                del opt_plan[v]

            try:
                if len(best_plans) > 0:
                    # In intermediate steps of IDP: Take best plan only
                    best_plan = sorted(best_plans, key=lambda x:
                                       (x[1], x[2]))[0]

                    tps = best_plan[3]
                    opt_plan[(tps, )] = set([best_plan[0]])

                    if best_row:
                        best_plans.remove(best_plan)
                        # Best Row
                        for bp in best_plans:
                            if bp[3] == tps:
                                opt_plan[(tps, )].add(bp[0])

                    # Remove triple patterns from todo list
                    for tp in tps:
                        toDo.remove(tp)
                    toDo.add(best_plan[3])

            except Exception as e:
                raise e

        tmp_plans = []
        for plan in best_plans:
            cost = plan[0].cost
            rob = plan[0].average_cost(self.robust_model)
            tmp_plans.append((plan, cost, rob, cost / rob))
            #print((plan, cost, rob, cost/rob))

        cheap_plan = sorted(tmp_plans, key=lambda x: (x[1], x[3]))[0]

        # Decision rule for robust plan
        self.robust_over_cost = False
        rob_cost_ratio = cheap_plan[1] / cheap_plan[2]
        self.cost_robust_ratio = rob_cost_ratio

        #logger.debug("{} {}".format(self.cost_robust_ratio, len(tmp_plans) ))
        if len(tmp_plans) > 1:
            tmp_plans.remove(cheap_plan)
            plans_over_thrshld = filter(
                lambda x: x[3] >= self.robustness_threshold, tmp_plans)
            if not plans_over_thrshld or len(plans_over_thrshld) == 0:
                plans_over_thrshld = tmp_plans
            robust_plan = sorted(plans_over_thrshld,
                                 key=lambda x: (x[1], x[2]))[0]
        else:
            robust_plan = cheap_plan

        # What is the cost ratio of the cheapest and the most robust plan
        cost_cost_ratio = cheap_plan[1] / robust_plan[1]
        self.cost_cost_ratio = cost_cost_ratio

        self.robust_over_cost = rob_cost_ratio <= self.robustness_threshold and cost_cost_ratio >= self.cost_threshold

        if self.enable_robustplan and self.robust_over_cost:
            logger.debug("IDP: Robust Plan over Cheapest Plan")
            return robust_plan[0][0]

        return cheap_plan[0][0]
Esempio n. 8
0
    def join_subplans(self,
                      left,
                      right,
                      join_type=None,
                      card=-1,
                      logial_plan=None):

        # Set Operator ID
        if logial_plan:
            self.operator_id2logical_plan[self.id_operator] = logial_plan
            logial_plan.operator_id = self.id_operator

        # Get Metadata for operator
        if isinstance(left, TriplePattern):
            # Get cardinality; Query only if necessary
            left_card = left.count if not left.count is None else get_metadata(
                self.source, left)
        else:
            left_card = left.total_res

        if isinstance(right, TriplePattern):
            # Get cardinality; Query only if necessary
            right_card = right.count if not right.count is None else get_metadata(
                self.source, right)
        else:
            right_card = right.total_res

        # Pre-decided Join Type
        if join_type:
            xn_join = True if (issubclass(join_type, Xnjoin)) else False
            xn_optional = issubclass(join_type, Xnoptional)
            xg_optional = issubclass(join_type, Xgoptional)
            if xn_join or xn_optional:
                # Switch sides for NLJ
                if left_card > right_card:
                    tmp = left
                    left = right
                    right = tmp

        # Decide based in heursitics
        else:
            # Decide Join Type: xn = NLJ, FJ = SHJ
            if isinstance(left, IndependentOperator):
                xn_join = True if left_card < (right_card / 100.0) else False
            else:
                xn_join = True if left_card <= right_card else False

        # Joins Variable info
        join_vars = set(left.variables).intersection(right.variables)
        all_variables = set(left.variables).union(right.variables)

        # If the subplans have no varibale in common,
        # always place a Hash Join to handle the Cross-Product
        if len(join_vars) == 0:
            xn_join = False

        # Tree Plans as Leafs
        if isinstance(left, TreePlan):
            leaf_left = left
            for source in left.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 0

            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

        if isinstance(right, TreePlan):
            leaf_right = right
            for source in right.sources.keys():
                self.source_by_operator[
                    source] = self.source_by_operator[source] | pow(
                        2, self.id_operator)
                self.operators_desc.setdefault(self.id_operator,
                                               {})[source] = 1

        if xn_join and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")
        if xn_optional and isinstance(left, TreePlan) and isinstance(
                right, TreePlan):
            print("Invalid plan")

        # Operator Leafs
        if isinstance(left, TriplePattern) or isinstance(left, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = left.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 0

            # Base on operator, create operator
            # If SHJ(FJ), use IO
            # Or if it is a NLJ(XN) and left_plan is a TP, then use IO
            if (not xn_join) or (xn_join and (isinstance(right, TriplePattern)
                                              or isinstance(right, BGP))):
                leaf_left = IndependentOperator(self.source_id,
                                                self.source,
                                                left,
                                                self.source_by_operator,
                                                left.variables,
                                                self.eddies,
                                                self.source_by_operator,
                                                sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            elif (xn_join or xn_optional) and isinstance(right, TreePlan):
                leaf_left = DependentOperator(self.source_id, self.source,
                                              left, self.source_by_operator,
                                              left.variables,
                                              self.source_by_operator)
                self.dependent_sources += 1

            leaf_left.total_res = left_card
            self.source_id += 1

        if isinstance(right, TriplePattern) or isinstance(right, BGP):

            self.eofs_desc.update({self.source_id: 0})
            self.sources[self.source_id] = right.variables
            self.source_by_operator[self.source_id] = pow(2, self.id_operator)
            self.eofs_desc[self.source_id] = pow(2, self.id_operator)
            self.operators_desc.setdefault(self.id_operator,
                                           {})[self.source_id] = 1

            # Base on operator, create operator
            if xn_join or xn_optional:
                leaf_right = DependentOperator(self.source_id, self.source,
                                               right, self.source_by_operator,
                                               right.variables,
                                               self.source_by_operator)
                self.dependent_sources += 1

            else:
                leaf_right = IndependentOperator(
                    self.source_id,
                    self.source,
                    right,
                    self.source_by_operator,
                    right.variables,
                    self.eddies,
                    self.source_by_operator,
                    sparql_limit=self.sparql_limit)
                self.independent_sources += 1

            leaf_right.total_res = right_card
            self.source_id += 1

        self.operators_vars[self.id_operator] = join_vars

        self.plan_order[self.id_operator] = max(leaf_left.height,
                                                leaf_right.height)

        # Place Join
        if xn_join:  # NLJ
            #if isinstance(left, TreePlan) and isinstance(right, TriplePattern) and self.poly: # First condition only
            # needed for poly bind join
            if (isinstance(right, TriplePattern)
                    or isinstance(right, BGP)) and self.poly:
                logger.debug("Placing Poly XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=self.brtpf_mappings,
                                 sparql_mappings=self.sparql_mappings)

                #logger.debug("Placing Poly Bind Join")
                #op = Poly_Bind_Join(self.id_operator, join_vars, self.eddies, left_card=card)
            else:
                logger.debug("Placing XN Join")
                op = Poly_Xnjoin(self.id_operator,
                                 join_vars,
                                 self.eddies,
                                 brtpf_mappings=1,
                                 sparql_mappings=1)
                #op = Xnjoin(self.id_operator, join_vars, self.eddies)

            self.operators_sym.update({self.id_operator: True})

            # If Right side has to be DP
            if not isinstance(leaf_right, DependentOperator):
                # Switch Leafs
                tmp = leaf_right
                leaf_right = leaf_left
                leaf_left = tmp

                # Update operators_descs for current operator id
                for key, value in self.operators_desc[
                        self.id_operator].items():
                    # Leaf Right is now the DP and needs to be input Right, i.e. 1
                    if key == leaf_right.sources.keys()[0]:
                        self.operators_desc[self.id_operator][key] = 1
                    # All other will be on the left_plan input
                    else:
                        self.operators_desc[self.id_operator][key] = 0

        elif not xn_optional and not xg_optional:  # SHJ
            #op = Fjoin(self.id_operator, join_vars, self.eddies)
            if isinstance(left, TreePlan) and isinstance(
                    right, TriplePattern) and self.poly:
                # Place Polymorphic Hash Join Operator
                op = Fjoin(self.id_operator, join_vars, self.eddies)
                #logger.debug("Placing Poly FJoin")
                #op = Poly_Fjoin(self.id_operator, join_vars, self.eddies, leaf_left, leaf_right)
            else:
                op = Fjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: False})

        elif not xg_optional:  # XN Optional
            op = Xnoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            #op = Xnjoin(self.id_operator, join_vars, self.eddies)
            self.operators_sym.update({self.id_operator: True})

        else:  # XG Optional
            op = Xgoptional(self.id_operator, left.variables, right.variables,
                            self.eddies)
            self.operators_sym.update({self.id_operator: False})

        # Add Operator
        self.operators.append(op)

        tree_height = max(leaf_left.height, leaf_right.height) + 1
        #tree_sources = {k: v for k, v in self.sources.items()}
        # 2020-03-04: Changed here to route everything properly
        tree_sources = dict(leaf_left.sources)
        tree_sources.update(dict(leaf_right.sources))
        # Create Tree Plan
        join_card = card
        tree_plan = TreePlan(op, all_variables, join_vars, tree_sources,
                             leaf_left, leaf_right, tree_height, join_card)

        if isinstance(op, Xnjoin) and isinstance(
                leaf_left, TreePlan) and isinstance(leaf_right, TreePlan):
            raise Exception

        self.id_operator += 1
        return tree_plan
Esempio n. 9
0
    def optimize_subquery(self, triples):

        subtrees = []
        for triple in triples:
            # For each server, we need one requests to get the metadata
            self.planning_requests += len(self.sources)
            get_metadata(self.sources, triple)
            leaf = LogicalPlan(triple)
            subtrees.append(leaf)

        subtrees.sort(key=lambda  x: x.cardinality)

        stars = []
        while len(subtrees) > 0:

            to_delete = []
            star_tree = subtrees.pop(0)
            star_vars = star_tree.variables


            for j in range(0, len(subtrees)):
                subtree_j = subtrees[j]
                join_variables = set(star_vars).intersection(subtree_j.variables)

                # Case: There is a join.
                if len(join_variables) > 0:

                    to_delete.append(subtree_j)

                    # Place physical operator estimating cardinality.
                    if star_tree.is_triple_pattern:

                        res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                        # Place a Nested Loop join.
                        # Paper; if (tpi.count / tpi.pagesize) <= s.count then
                        #if star_tree.total_res < (subtree_j.total_res / 100.0):
                        if star_tree.cardinality < (subtree_j.cardinality / 100.0):
                            join_type = Xnjoin

                            # If NLJ is placed, set res = 0/1 to force NLJs later
                            res = 1

                        # Place a Symmetric Hash join.
                        else:
                            join_type = Fjoin

                    else:
                        res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                        if (star_tree.cardinality / float(subtree_j.cardinality) < 0.30) or (subtree_j.cardinality > 100*1000 and star_tree.cardinality < 100*1000) or (subtree_j.cardinality < 100*5):
                            join_type = Xnjoin

                            # If NLJ is placed, set res = 0/1 to force NLJs later
                            res = 1

                        else:
                            join_type = Fjoin

                    star_tree = LogicalPlan(star_tree, subtree_j, join_type)
                    star_tree.cardinality = res

            # Add current tree to the list of stars and
            # remove from the list of subtrees to process.
            stars.append(star_tree)
            for elem in to_delete:
                subtrees.remove(elem)

        # Stage 2: Build bushy tree to combine SSGs with common variables.
        while len(stars) > 1:

            subtree_i = stars.pop(0)
            star_vars = subtree_i.variables

            for j in range(0, len(stars)):
                subtree_j = stars[j]

                join_variables = set(star_vars).intersection(subtree_j.variables)

                # Case: There is a join between stars.
                if len(join_variables) > 0:

                    stars.pop(j)

                    res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality)
                    # Place physical operators between stars.
                    if subtree_j.is_triple_pattern:

                        # This case models a satellite, therefore apply cardinality estimation.
                        if subtree_i.cardinality < (subtree_j.cardinality / 100.0):
                            join_type = Xnjoin
                        else:
                            join_type = Fjoin
                    else:
                        res = (subtree_i.cardinality + subtree_j.cardinality) / 2
                        join_type = Fjoin

                    star_tree = LogicalPlan(subtree_i, subtree_j, join_type)
                    star_tree.cardinality = res
                    stars.append(star_tree)

                    break


        tree = stars.pop()
        return tree
Esempio n. 10
0
    def create_plan_original(self, query, eddies, source):

        # Plan structures.
        tree_height = 0
        id_operator = 0
        operators = []
        operators_desc = {}
        plan_order = {}
        operators_vars = {}
        ordered_subtrees = []
        independent_sources = 0
        eofs_operators_desc = {}
        operators_sym = {}
        sources_desc = {}
        eofs_desc = {}
        subtrees = []

        # Create initial signatures and leaves of the plan.
        for subquery in query.where.left.triple_patterns:
            sources_desc.update({id_operator: 0})
            eofs_desc.update({id_operator: 0})
            leaf = IndependentOperator(id_operator, source, subquery, sources_desc, subquery.get_variables(), eddies, eofs_desc)
            leaf.total_res = get_metadata(leaf.server, leaf.query)
            subtrees.append(leaf)
            ordered_subtrees.append(leaf.total_res)
            id_operator += 1

        # Order leaves depending on the cardinality of fragments.
        keydict = dict(zip(subtrees, ordered_subtrees))
        subtrees.sort(key=keydict.get)

        # Stage 1: Generate left_plan-linear index nested stars.
        stars = []
        id_operator = 0
        while len(subtrees) > 0:

            to_delete = []
            star_tree = subtrees.pop(0)
            star_vars = star_tree.vars
            tree_height = 0
            independent_sources = independent_sources + 1

            for j in range(0, len(subtrees)):
                subtree_j = subtrees[j]
                join_variables = set(star_vars) & set(subtree_j.join_vars)
                all_variables = set(star_tree.vars) | set(subtree_j.vars)

                # Case: There is a join.
                if len(join_variables) > 0:

                    to_delete.append(subtree_j)

                    # Update signatures.
                    sources = {}
                    sources.update(star_tree.sources)
                    sources.update(subtree_j.sources)
                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    # The current tree is the left_plan argument of the plan.
                    for source in star_tree.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    # The subtree j is the right_plan argument of the plan.
                    for source in subtree_j.sources.keys():
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next q for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = tree_height
                    operators_vars[id_operator] = join_variables
                    tree_height = tree_height + 1

                    # Place physical operator estimating cardinality.
                    if isinstance(star_tree, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        # Place a Nested Loop join.
                        if star_tree.total_res < (subtree_j.total_res / 100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, 0)
                            operators_sym.update({id_operator: False})

                        # Place a Symmetric Hash join.
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    else:
                        # TODO: new change here
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)
                        #res = (2.0 * star_tree.total_res * subtree_j.total_res) / (star_tree.total_res + subtree_j.total_res)
                        #res = (star_tree.total_res + subtree_j.total_res) / 2
                        if (star_tree.total_res / float(subtree_j.total_res) < 0.30) or (subtree_j.total_res > 100*1000 and star_tree.total_res < 100*1000) or (subtree_j.total_res < 100*5):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables, join_variables, sources,
                                                 star_tree, subtree_j, tree_height)
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            star_tree = TreePlan(op, all_variables,
                                                 join_variables, sources, star_tree, subtree_j, tree_height, res)
                            independent_sources = independent_sources + 1
                            operators_sym.update({id_operator: True})
                    id_operator += 1

            # Add current tree to the list of stars and
            # remove from the list of subtrees to process.
            stars.append(star_tree)
            for elem in to_delete:
                subtrees.remove(elem)

        # Stage 2: Build bushy tree to combine SSGs with common variables.
        while len(stars) > 1:

            subtree_i = stars.pop(0)

            for j in range(0, len(stars)):
                subtree_j = stars[j]

                all_variables = set(subtree_i.vars) | set(subtree_j.vars)
                join_variables = set(subtree_i.join_vars) & set(subtree_j.join_vars)

                # Case: There is a join between stars.
                if len(join_variables) > 0:

                    # Update signatures.
                    sources = {}
                    sources.update(subtree_i.sources)
                    sources.update(subtree_j.sources)

                    operators_desc[id_operator] = {}
                    operators_vars[id_operator] = join_variables
                    eofs_operators_desc[id_operator] = {}

                    for source in subtree_i.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 0 for len of something
                            operators_desc[id_operator].update({source: 0})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 0})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    for source in subtree_j.sources.keys():
                        # This models the restriction: a tuple must have the join
                        # variable instantiated to be routed to a certain join.
                        if len(set(sources[source]) & join_variables) > 0:
                            # TODO: Change the next 1 for len of something
                            operators_desc[id_operator].update({source: 1})
                        sources_desc[source] = sources_desc[source] | pow(2, id_operator)

                        # TODO: Check this.
                        eofs_operators_desc[id_operator].update({source: 1})
                        eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)

                    plan_order[id_operator] = max(subtree_i.height, subtree_j.height)
                    stars.pop(j)

                    # Place physical operators between stars.
                    if isinstance(subtree_j, IndependentOperator):
                        res = self.estimate_card(star_tree.total_res, subtree_j.total_res)

                        # This case models a satellite, therefore apply cardinality estimation.
                        if subtree_i.total_res < (subtree_j.total_res/100.0):
                            subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query,
                                                          subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res)
                            op = Xnjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables,
                                                  join_variables, sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            # Adjust number of asynchronous leaves.
                            independent_sources = independent_sources - 1
                            operators_sym.update({id_operator: False})
                        else:
                            op = Fjoin(id_operator, join_variables, eddies)
                            operators.append(op)
                            stars.append(TreePlan(op, all_variables, join_variables,
                                                  sources, subtree_i, subtree_j,
                                                  max(subtree_i.height, subtree_j.height, res)))
                            operators_sym.update({id_operator: True})
                    else:
                        res = (subtree_i.total_res + subtree_j.total_res) / 2
                        op = Fjoin(id_operator, join_variables, eddies)
                        operators.append(op)
                        stars.append(TreePlan(op, all_variables, join_variables,
                                              sources, subtree_i, subtree_j,
                                              max(subtree_i.height, subtree_j.height, res)))
                        operators_sym.update({id_operator: True})
                    id_operator += 1
                    break

            if len(subtrees) % 2 == 0:
                tree_height += 1

        tree_height += 1
        tree = stars.pop()


        # Adds the projection operator to the plan.
        if query.projection:
            op = Xproject(id_operator, query.projection, eddies)
            operators.append(op)
            tree = TreePlan(op,
                            tree.vars, tree.join_vars, tree.sources, tree, None, tree_height+1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        # Adds the distinct operator to the plan.
        if query.distinct:
            op = Xdistinct(id_operator, eddies)
            operators.append(op)
            tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources,
                            tree, None, tree_height + 1, tree.total_res)

            # Update signature of tuples.
            operators_sym.update ({id_operator: False})
            operators_desc[id_operator] = {}
            eofs_operators_desc[id_operator] = {}
            for source in tree.sources:
                operators_desc[id_operator].update({source: 0})
                eofs_operators_desc[id_operator].update({source: 0})
                eofs_desc[source] = eofs_desc[source] | pow(2, id_operator)
                sources_desc[source] = sources_desc[source] | pow(2, id_operator)
            plan_order[id_operator] = tree_height
            operators_vars[id_operator] = tree.vars
            id_operator += 1
            tree_height += 1

        physical_plan = Plan(query_tree=tree, tree_height=tree.height,
                                  operators_desc=operators_desc, sources_desc=sources_desc,
                                  plan_order=plan_order, operators_vars=operators_vars,
                                  independent_sources=independent_sources,
                                  operators_sym=operators_sym, operators=operators)

        return physical_plan
Esempio n. 11
0
    query_str = open(
        "/Users/larsheling/Documents/Development/crop.nosync/queries/fedbench/LD9.rq"
    ).read()
    query_parsed = parse_new(query_str)

    fhandler = logging.FileHandler('../../logs/{}.log'.format(queryname), 'w')
    fhandler.setLevel(logging.INFO)
    logger.addHandler(fhandler)

    tps = get_tps(query_parsed.body.triples)

    id2tp = {}
    index = 0
    for tp in tps:
        if isinstance(tp, TriplePattern):
            get_metadata(sources, tp)
            id2tp[index] = tp
            index += 1
    print("Got Metadata")

    tp_sources = set()
    graph = {}
    for i in range(index):
        for j in range(i, index):
            if i != j:
                a = id2tp[i]
                b = id2tp[j]

                a_just_tp = True if False in [
                    s_i.startswith("sparql@") for s_i in a.sources.keys()
                ] else False
Esempio n. 12
0
    def create_plan(self, query):
        triple_patterns = list(query.where.left.triple_patterns)
        for triple_pattern in triple_patterns:
            get_metadata(self.source, triple_pattern)

        return self.create_best(triple_patterns, query)