def select_sources(self, triple_pattern): if not self.__full_stats: get_metadata(self.__sources, triple_pattern) true_sources = set([ name.replace("tpf@", "") for name in triple_pattern.sources.keys() ]) else: true_sources = set( self.__full_stats.predicate_counts(triple_pattern[1]).keys()) self.processed_predicates += 1 sources = self.__stats.predicate_counts(triple_pattern[1]) delta = true_sources - set(sources).intersection(true_sources) #print(len(delta)) if len(sources) == 0: if triple_pattern[1].isuri(): self.missed_predicate_cnt += 1 self.missed_predicates[triple_pattern[1]] = list(delta) elif len(delta) > 0 and triple_pattern[1].isuri(): self.missed_sources[triple_pattern[1]] = delta triple_pattern.sources = sources
def select_sources(self, triple_pattern): if self.__stats and triple_pattern[1].isuri(): predicate = triple_pattern[1].value.replace("<", "").replace(">", "") # Update stats #card, auth_stats = get_metadata_tpf_stats(self.__sources, triple_pattern) #self.__stats.update_authorities(predicate, auth_stats) #return card return get_metadata(self.__sources, triple_pattern) else: return get_metadata(self.__sources, triple_pattern)
def optimize_bgp(self, triple_patterns): for tp in triple_patterns: if isinstance(tp, TriplePattern): # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, tp) # Compute E_star for completeness and max cost E_star = self.compute_completeness(triple_patterns) self.max_cost = self.compute_cost(triple_patterns) # Prune Sources if self.prune_sources: triple_patterns = self.prune_relevant_source(triple_patterns) # Compute E and completness E = self.compute_completeness(triple_patterns) try: bgp_comp = E / E_star except ZeroDivisionError: bgp_comp = 1.0 self.decompostion_completeness.append(bgp_comp) # Decomposition if self.decomposer: # Compute Decomposition decomposition = LDFF_Decomposer.get_decomposition(triple_patterns) else: decomposition = triple_patterns # Compute Cost try: bgp_cost = self.compute_cost(decomposition) / self.max_cost except ZeroDivisionError: bgp_cost = 1.0 self.decompostion_cost.append(bgp_cost) # Get plan plan = self.decompostion_to_plan(decomposition) return plan
def select_sources(self, triple_pattern): predicate = triple_pattern[1].value[1:-1] if triple_pattern.variable_position == 5: sources = self.__stats.predicate_counts(predicate) triple_pattern.sources = sources triple_pattern.cardinality = sum(sources.values()) return triple_pattern.cardinality else: relevant_sources = self.__stats.sources_by_predicate(predicate) sources = relevant_sources if len( relevant_sources) > 0 else self.sources return get_metadata(sources, triple_pattern)
def capability_aware_decomp(decomposition, sparql_exclusive=True): D = [] for id, subquery in decomposition.items(): for source in subquery[1]: if source.startswith("sparql@") and len(subquery[0]) > 1: count = get_metadata([source], subquery[0]) if count > 0: S_c = (BGP(subquery[0]), source, count) D.append(S_c) else: for triple_pattern in subquery[0]: S_c = (triple_pattern, source, triple_pattern.sources[source]) D.append(S_c) else: for triple_pattern in subquery[0]: if source in triple_pattern.sources.keys(): S_c = (triple_pattern, source, triple_pattern.sources[source]) D.append(S_c) return D
def get_decomposition(triple_patterns): Q = triple_patterns change = True while change: for sq_i, sq_j in combinations(Q, 2): if isinstance(sq_i, Filter) or isinstance(sq_j, Filter): continue if sq_i.compatible(sq_j): if len(sq_i.sources.keys()) == 1 and len(sq_j.sources.keys()) == 1 \ and len(set(sq_i.sources.keys()).intersection(set(sq_j.sources.keys()))) == 1: if sq_i.sources.keys()[0].startswith("sparql@"): Q.remove(sq_j) Q.remove(sq_i) if isinstance(sq_i, BGP): tps = sq_i.triple_patterns else: tps = [sq_i] if isinstance(sq_j, BGP): tps.extend(sq_j.triple_patterns) else: tps.append(sq_j) new_sq = BGP(tps) Q.append(new_sq) break else: change = False # Update Cardinalities if a BGP was created for sq in Q: if isinstance(sq, BGP): sq.cardinality = get_metadata(sq.sources.keys(), sq) return Q
def iterative_dynamic_programming1(self, triple_patterns): if len(triple_patterns) == 1: # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple_patterns[0]) return LogicalPlan(triple_patterns[0]) best_row = False opt_plan = {} toDo = set() k = min(len(triple_patterns), self.k) if self.adaptive_k and len(triple_patterns) >= 6: k = 2 for index, triple_pattern in enumerate(triple_patterns): # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple_pattern) accessPlan = set([LogicalPlan(triple_pattern)]) opt_plan[(triple_pattern, )] = accessPlan toDo.add(triple_pattern) while len(toDo) > 1: k = min(k, len(toDo)) for i in range(2, k + 1): for S in combinations(toDo, i): opt_plan[S] = set() for O in self.true_subset(S): try: opt_plan_O = opt_plan[O] S_minus_O = tuple(set(S).difference(set(O))) opt_plan_S_minus_O = opt_plan.get(S_minus_O, None) if not opt_plan_S_minus_O or not opt_plan_O: continue for opt_plan_o in opt_plan_O: for opt_plan_s_minus_o in opt_plan_S_minus_O: join_vars = opt_plan_o + opt_plan_s_minus_o if join_vars > 0: join_plans = self.joinPlans( opt_plan_o, opt_plan_s_minus_o) join_plans_S = opt_plan[S].union( join_plans) opt_plan[S] = self.best_n_plans( list(join_plans_S), self.top_t) #opt_plan[S] = join_plans_S except Exception as e: raise e best_plans = [] V = set() for key, values in opt_plan.items(): for value in values: k_len = len(key) if k_len == k and value and set(key).issubset(toDo): V.add(key) rob = value.cost best_plans.append((value, value.cost, rob, key)) if len(best_plans) == 0: raise Exception("IDP Error: No best plan") for v in V: del opt_plan[v] try: if len(best_plans) > 0: # In intermediate steps of IDP: Take best plan only best_plan = sorted(best_plans, key=lambda x: (x[1], x[2]))[0] tps = best_plan[3] opt_plan[(tps, )] = set([best_plan[0]]) if best_row: best_plans.remove(best_plan) # Best Row for bp in best_plans: if bp[3] == tps: opt_plan[(tps, )].add(bp[0]) # Remove triple patterns from todo list for tp in tps: toDo.remove(tp) toDo.add(best_plan[3]) except Exception as e: raise e tmp_plans = [] for plan in best_plans: cost = plan[0].cost rob = plan[0].average_cost(self.robust_model) tmp_plans.append((plan, cost, rob, cost / rob)) #print((plan, cost, rob, cost/rob)) cheap_plan = sorted(tmp_plans, key=lambda x: (x[1], x[3]))[0] # Decision rule for robust plan self.robust_over_cost = False rob_cost_ratio = cheap_plan[1] / cheap_plan[2] self.cost_robust_ratio = rob_cost_ratio #logger.debug("{} {}".format(self.cost_robust_ratio, len(tmp_plans) )) if len(tmp_plans) > 1: tmp_plans.remove(cheap_plan) plans_over_thrshld = filter( lambda x: x[3] >= self.robustness_threshold, tmp_plans) if not plans_over_thrshld or len(plans_over_thrshld) == 0: plans_over_thrshld = tmp_plans robust_plan = sorted(plans_over_thrshld, key=lambda x: (x[1], x[2]))[0] else: robust_plan = cheap_plan # What is the cost ratio of the cheapest and the most robust plan cost_cost_ratio = cheap_plan[1] / robust_plan[1] self.cost_cost_ratio = cost_cost_ratio self.robust_over_cost = rob_cost_ratio <= self.robustness_threshold and cost_cost_ratio >= self.cost_threshold if self.enable_robustplan and self.robust_over_cost: logger.debug("IDP: Robust Plan over Cheapest Plan") return robust_plan[0][0] return cheap_plan[0][0]
def join_subplans(self, left, right, join_type=None, card=-1, logial_plan=None): # Set Operator ID if logial_plan: self.operator_id2logical_plan[self.id_operator] = logial_plan logial_plan.operator_id = self.id_operator # Get Metadata for operator if isinstance(left, TriplePattern): # Get cardinality; Query only if necessary left_card = left.count if not left.count is None else get_metadata( self.source, left) else: left_card = left.total_res if isinstance(right, TriplePattern): # Get cardinality; Query only if necessary right_card = right.count if not right.count is None else get_metadata( self.source, right) else: right_card = right.total_res # Pre-decided Join Type if join_type: xn_join = True if (issubclass(join_type, Xnjoin)) else False xn_optional = issubclass(join_type, Xnoptional) xg_optional = issubclass(join_type, Xgoptional) if xn_join or xn_optional: # Switch sides for NLJ if left_card > right_card: tmp = left left = right right = tmp # Decide based in heursitics else: # Decide Join Type: xn = NLJ, FJ = SHJ if isinstance(left, IndependentOperator): xn_join = True if left_card < (right_card / 100.0) else False else: xn_join = True if left_card <= right_card else False # Joins Variable info join_vars = set(left.variables).intersection(right.variables) all_variables = set(left.variables).union(right.variables) # If the subplans have no varibale in common, # always place a Hash Join to handle the Cross-Product if len(join_vars) == 0: xn_join = False # Tree Plans as Leafs if isinstance(left, TreePlan): leaf_left = left for source in left.sources.keys(): self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[source] = 0 self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 0 if isinstance(right, TreePlan): leaf_right = right for source in right.sources.keys(): self.source_by_operator[ source] = self.source_by_operator[source] | pow( 2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[source] = 1 if xn_join and isinstance(left, TreePlan) and isinstance( right, TreePlan): print("Invalid plan") if xn_optional and isinstance(left, TreePlan) and isinstance( right, TreePlan): print("Invalid plan") # Operator Leafs if isinstance(left, TriplePattern) or isinstance(left, BGP): self.eofs_desc.update({self.source_id: 0}) self.sources[self.source_id] = left.variables self.source_by_operator[self.source_id] = pow(2, self.id_operator) self.eofs_desc[self.source_id] = pow(2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 0 # Base on operator, create operator # If SHJ(FJ), use IO # Or if it is a NLJ(XN) and left_plan is a TP, then use IO if (not xn_join) or (xn_join and (isinstance(right, TriplePattern) or isinstance(right, BGP))): leaf_left = IndependentOperator(self.source_id, self.source, left, self.source_by_operator, left.variables, self.eddies, self.source_by_operator, sparql_limit=self.sparql_limit) self.independent_sources += 1 elif (xn_join or xn_optional) and isinstance(right, TreePlan): leaf_left = DependentOperator(self.source_id, self.source, left, self.source_by_operator, left.variables, self.source_by_operator) self.dependent_sources += 1 leaf_left.total_res = left_card self.source_id += 1 if isinstance(right, TriplePattern) or isinstance(right, BGP): self.eofs_desc.update({self.source_id: 0}) self.sources[self.source_id] = right.variables self.source_by_operator[self.source_id] = pow(2, self.id_operator) self.eofs_desc[self.source_id] = pow(2, self.id_operator) self.operators_desc.setdefault(self.id_operator, {})[self.source_id] = 1 # Base on operator, create operator if xn_join or xn_optional: leaf_right = DependentOperator(self.source_id, self.source, right, self.source_by_operator, right.variables, self.source_by_operator) self.dependent_sources += 1 else: leaf_right = IndependentOperator( self.source_id, self.source, right, self.source_by_operator, right.variables, self.eddies, self.source_by_operator, sparql_limit=self.sparql_limit) self.independent_sources += 1 leaf_right.total_res = right_card self.source_id += 1 self.operators_vars[self.id_operator] = join_vars self.plan_order[self.id_operator] = max(leaf_left.height, leaf_right.height) # Place Join if xn_join: # NLJ #if isinstance(left, TreePlan) and isinstance(right, TriplePattern) and self.poly: # First condition only # needed for poly bind join if (isinstance(right, TriplePattern) or isinstance(right, BGP)) and self.poly: logger.debug("Placing Poly XN Join") op = Poly_Xnjoin(self.id_operator, join_vars, self.eddies, brtpf_mappings=self.brtpf_mappings, sparql_mappings=self.sparql_mappings) #logger.debug("Placing Poly Bind Join") #op = Poly_Bind_Join(self.id_operator, join_vars, self.eddies, left_card=card) else: logger.debug("Placing XN Join") op = Poly_Xnjoin(self.id_operator, join_vars, self.eddies, brtpf_mappings=1, sparql_mappings=1) #op = Xnjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: True}) # If Right side has to be DP if not isinstance(leaf_right, DependentOperator): # Switch Leafs tmp = leaf_right leaf_right = leaf_left leaf_left = tmp # Update operators_descs for current operator id for key, value in self.operators_desc[ self.id_operator].items(): # Leaf Right is now the DP and needs to be input Right, i.e. 1 if key == leaf_right.sources.keys()[0]: self.operators_desc[self.id_operator][key] = 1 # All other will be on the left_plan input else: self.operators_desc[self.id_operator][key] = 0 elif not xn_optional and not xg_optional: # SHJ #op = Fjoin(self.id_operator, join_vars, self.eddies) if isinstance(left, TreePlan) and isinstance( right, TriplePattern) and self.poly: # Place Polymorphic Hash Join Operator op = Fjoin(self.id_operator, join_vars, self.eddies) #logger.debug("Placing Poly FJoin") #op = Poly_Fjoin(self.id_operator, join_vars, self.eddies, leaf_left, leaf_right) else: op = Fjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: False}) elif not xg_optional: # XN Optional op = Xnoptional(self.id_operator, left.variables, right.variables, self.eddies) #op = Xnjoin(self.id_operator, join_vars, self.eddies) self.operators_sym.update({self.id_operator: True}) else: # XG Optional op = Xgoptional(self.id_operator, left.variables, right.variables, self.eddies) self.operators_sym.update({self.id_operator: False}) # Add Operator self.operators.append(op) tree_height = max(leaf_left.height, leaf_right.height) + 1 #tree_sources = {k: v for k, v in self.sources.items()} # 2020-03-04: Changed here to route everything properly tree_sources = dict(leaf_left.sources) tree_sources.update(dict(leaf_right.sources)) # Create Tree Plan join_card = card tree_plan = TreePlan(op, all_variables, join_vars, tree_sources, leaf_left, leaf_right, tree_height, join_card) if isinstance(op, Xnjoin) and isinstance( leaf_left, TreePlan) and isinstance(leaf_right, TreePlan): raise Exception self.id_operator += 1 return tree_plan
def optimize_subquery(self, triples): subtrees = [] for triple in triples: # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple) leaf = LogicalPlan(triple) subtrees.append(leaf) subtrees.sort(key=lambda x: x.cardinality) stars = [] while len(subtrees) > 0: to_delete = [] star_tree = subtrees.pop(0) star_vars = star_tree.variables for j in range(0, len(subtrees)): subtree_j = subtrees[j] join_variables = set(star_vars).intersection(subtree_j.variables) # Case: There is a join. if len(join_variables) > 0: to_delete.append(subtree_j) # Place physical operator estimating cardinality. if star_tree.is_triple_pattern: res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) # Place a Nested Loop join. # Paper; if (tpi.count / tpi.pagesize) <= s.count then #if star_tree.total_res < (subtree_j.total_res / 100.0): if star_tree.cardinality < (subtree_j.cardinality / 100.0): join_type = Xnjoin # If NLJ is placed, set res = 0/1 to force NLJs later res = 1 # Place a Symmetric Hash join. else: join_type = Fjoin else: res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) if (star_tree.cardinality / float(subtree_j.cardinality) < 0.30) or (subtree_j.cardinality > 100*1000 and star_tree.cardinality < 100*1000) or (subtree_j.cardinality < 100*5): join_type = Xnjoin # If NLJ is placed, set res = 0/1 to force NLJs later res = 1 else: join_type = Fjoin star_tree = LogicalPlan(star_tree, subtree_j, join_type) star_tree.cardinality = res # Add current tree to the list of stars and # remove from the list of subtrees to process. stars.append(star_tree) for elem in to_delete: subtrees.remove(elem) # Stage 2: Build bushy tree to combine SSGs with common variables. while len(stars) > 1: subtree_i = stars.pop(0) star_vars = subtree_i.variables for j in range(0, len(stars)): subtree_j = stars[j] join_variables = set(star_vars).intersection(subtree_j.variables) # Case: There is a join between stars. if len(join_variables) > 0: stars.pop(j) res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) # Place physical operators between stars. if subtree_j.is_triple_pattern: # This case models a satellite, therefore apply cardinality estimation. if subtree_i.cardinality < (subtree_j.cardinality / 100.0): join_type = Xnjoin else: join_type = Fjoin else: res = (subtree_i.cardinality + subtree_j.cardinality) / 2 join_type = Fjoin star_tree = LogicalPlan(subtree_i, subtree_j, join_type) star_tree.cardinality = res stars.append(star_tree) break tree = stars.pop() return tree
def create_plan_original(self, query, eddies, source): # Plan structures. tree_height = 0 id_operator = 0 operators = [] operators_desc = {} plan_order = {} operators_vars = {} ordered_subtrees = [] independent_sources = 0 eofs_operators_desc = {} operators_sym = {} sources_desc = {} eofs_desc = {} subtrees = [] # Create initial signatures and leaves of the plan. for subquery in query.where.left.triple_patterns: sources_desc.update({id_operator: 0}) eofs_desc.update({id_operator: 0}) leaf = IndependentOperator(id_operator, source, subquery, sources_desc, subquery.get_variables(), eddies, eofs_desc) leaf.total_res = get_metadata(leaf.server, leaf.query) subtrees.append(leaf) ordered_subtrees.append(leaf.total_res) id_operator += 1 # Order leaves depending on the cardinality of fragments. keydict = dict(zip(subtrees, ordered_subtrees)) subtrees.sort(key=keydict.get) # Stage 1: Generate left_plan-linear index nested stars. stars = [] id_operator = 0 while len(subtrees) > 0: to_delete = [] star_tree = subtrees.pop(0) star_vars = star_tree.vars tree_height = 0 independent_sources = independent_sources + 1 for j in range(0, len(subtrees)): subtree_j = subtrees[j] join_variables = set(star_vars) & set(subtree_j.join_vars) all_variables = set(star_tree.vars) | set(subtree_j.vars) # Case: There is a join. if len(join_variables) > 0: to_delete.append(subtree_j) # Update signatures. sources = {} sources.update(star_tree.sources) sources.update(subtree_j.sources) operators_desc[id_operator] = {} operators_vars[id_operator] = join_variables eofs_operators_desc[id_operator] = {} # The current tree is the left_plan argument of the plan. for source in star_tree.sources.keys(): if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 0 for len of something operators_desc[id_operator].update({source: 0}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: check this. eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) # The subtree j is the right_plan argument of the plan. for source in subtree_j.sources.keys(): if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next q for len of something operators_desc[id_operator].update({source: 1}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: check this. eofs_operators_desc[id_operator].update({source: 1}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = join_variables tree_height = tree_height + 1 # Place physical operator estimating cardinality. if isinstance(star_tree, IndependentOperator): res = self.estimate_card(star_tree.total_res, subtree_j.total_res) # Place a Nested Loop join. if star_tree.total_res < (subtree_j.total_res / 100.0): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, 0) operators_sym.update({id_operator: False}) # Place a Symmetric Hash join. else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, res) independent_sources = independent_sources + 1 operators_sym.update({id_operator: True}) else: # TODO: new change here res = self.estimate_card(star_tree.total_res, subtree_j.total_res) #res = (2.0 * star_tree.total_res * subtree_j.total_res) / (star_tree.total_res + subtree_j.total_res) #res = (star_tree.total_res + subtree_j.total_res) / 2 if (star_tree.total_res / float(subtree_j.total_res) < 0.30) or (subtree_j.total_res > 100*1000 and star_tree.total_res < 100*1000) or (subtree_j.total_res < 100*5): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height) operators_sym.update({id_operator: False}) else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) star_tree = TreePlan(op, all_variables, join_variables, sources, star_tree, subtree_j, tree_height, res) independent_sources = independent_sources + 1 operators_sym.update({id_operator: True}) id_operator += 1 # Add current tree to the list of stars and # remove from the list of subtrees to process. stars.append(star_tree) for elem in to_delete: subtrees.remove(elem) # Stage 2: Build bushy tree to combine SSGs with common variables. while len(stars) > 1: subtree_i = stars.pop(0) for j in range(0, len(stars)): subtree_j = stars[j] all_variables = set(subtree_i.vars) | set(subtree_j.vars) join_variables = set(subtree_i.join_vars) & set(subtree_j.join_vars) # Case: There is a join between stars. if len(join_variables) > 0: # Update signatures. sources = {} sources.update(subtree_i.sources) sources.update(subtree_j.sources) operators_desc[id_operator] = {} operators_vars[id_operator] = join_variables eofs_operators_desc[id_operator] = {} for source in subtree_i.sources.keys(): # This models the restriction: a tuple must have the join # variable instantiated to be routed to a certain join. if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 0 for len of something operators_desc[id_operator].update({source: 0}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: Check this. eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) for source in subtree_j.sources.keys(): # This models the restriction: a tuple must have the join # variable instantiated to be routed to a certain join. if len(set(sources[source]) & join_variables) > 0: # TODO: Change the next 1 for len of something operators_desc[id_operator].update({source: 1}) sources_desc[source] = sources_desc[source] | pow(2, id_operator) # TODO: Check this. eofs_operators_desc[id_operator].update({source: 1}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) plan_order[id_operator] = max(subtree_i.height, subtree_j.height) stars.pop(j) # Place physical operators between stars. if isinstance(subtree_j, IndependentOperator): res = self.estimate_card(star_tree.total_res, subtree_j.total_res) # This case models a satellite, therefore apply cardinality estimation. if subtree_i.total_res < (subtree_j.total_res/100.0): subtree_j = DependentOperator(subtree_j.sources, subtree_j.server, subtree_j.query, subtree_j.sources_desc, subtree_j.vars, subtree_j.total_res) op = Xnjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) # Adjust number of asynchronous leaves. independent_sources = independent_sources - 1 operators_sym.update({id_operator: False}) else: op = Fjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) operators_sym.update({id_operator: True}) else: res = (subtree_i.total_res + subtree_j.total_res) / 2 op = Fjoin(id_operator, join_variables, eddies) operators.append(op) stars.append(TreePlan(op, all_variables, join_variables, sources, subtree_i, subtree_j, max(subtree_i.height, subtree_j.height, res))) operators_sym.update({id_operator: True}) id_operator += 1 break if len(subtrees) % 2 == 0: tree_height += 1 tree_height += 1 tree = stars.pop() # Adds the projection operator to the plan. if query.projection: op = Xproject(id_operator, query.projection, eddies) operators.append(op) tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree, None, tree_height+1, tree.total_res) # Update signature of tuples. operators_sym.update({id_operator: False}) operators_desc[id_operator] = {} eofs_operators_desc[id_operator] = {} for source in tree.sources: operators_desc[id_operator].update({source: 0}) eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) sources_desc[source] = sources_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = tree.vars id_operator += 1 tree_height += 1 # Adds the distinct operator to the plan. if query.distinct: op = Xdistinct(id_operator, eddies) operators.append(op) tree = TreePlan(op, tree.vars, tree.join_vars, tree.sources, tree, None, tree_height + 1, tree.total_res) # Update signature of tuples. operators_sym.update ({id_operator: False}) operators_desc[id_operator] = {} eofs_operators_desc[id_operator] = {} for source in tree.sources: operators_desc[id_operator].update({source: 0}) eofs_operators_desc[id_operator].update({source: 0}) eofs_desc[source] = eofs_desc[source] | pow(2, id_operator) sources_desc[source] = sources_desc[source] | pow(2, id_operator) plan_order[id_operator] = tree_height operators_vars[id_operator] = tree.vars id_operator += 1 tree_height += 1 physical_plan = Plan(query_tree=tree, tree_height=tree.height, operators_desc=operators_desc, sources_desc=sources_desc, plan_order=plan_order, operators_vars=operators_vars, independent_sources=independent_sources, operators_sym=operators_sym, operators=operators) return physical_plan
query_str = open( "/Users/larsheling/Documents/Development/crop.nosync/queries/fedbench/LD9.rq" ).read() query_parsed = parse_new(query_str) fhandler = logging.FileHandler('../../logs/{}.log'.format(queryname), 'w') fhandler.setLevel(logging.INFO) logger.addHandler(fhandler) tps = get_tps(query_parsed.body.triples) id2tp = {} index = 0 for tp in tps: if isinstance(tp, TriplePattern): get_metadata(sources, tp) id2tp[index] = tp index += 1 print("Got Metadata") tp_sources = set() graph = {} for i in range(index): for j in range(i, index): if i != j: a = id2tp[i] b = id2tp[j] a_just_tp = True if False in [ s_i.startswith("sparql@") for s_i in a.sources.keys() ] else False
def create_plan(self, query): triple_patterns = list(query.where.left.triple_patterns) for triple_pattern in triple_patterns: get_metadata(self.source, triple_pattern) return self.create_best(triple_patterns, query)