def dict_to_logical(plan_dict): left = None right = None join = None for key, value in plan_dict.items(): if key == 'right': right = dict_to_logical(plan_dict['right']) if key == 'left': left = dict_to_logical(plan_dict['left']) if key == 'type': if value == 'NLJ': join = Xnjoin else: join = Fjoin if key == 'tpf': arguments = value.split(" ")[:-1] # triple_pattern = " ".join(triples) triple_pattern = TriplePattern(Argument(arguments[0]), Argument(arguments[1]), Argument(arguments[2])) return LogicalPlan(triple_pattern) return LogicalPlan(left, right, join)
def custom_plan(sources): tp_1 = TriplePattern(Argument("?v3"), Argument("<http://schema.org/trailer>"), Argument("?v5")) tp_2 = TriplePattern( Argument("?v3"), Argument("http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"), Argument("<http://db.uwaterloo.ca/~galuc/wsdbm/ProductCategory2>")) tp_3 = TriplePattern( Argument("?v3"), Argument("<http://db.uwaterloo.ca/~galuc/wsdbm/hasGenre>"), Argument("?v0")) tps = [tp_1, tp_2, tp_3] # XNJoin = Nested Loop Join # FJoin: Hash Join l_plan = LogicalPlan(LogicalPlan(LogicalPlan(tp_1), LogicalPlan(tp_2), operator=Xnjoin), LogicalPlan(tp_3), operator=Xnjoin) plan = PhysicalPlan(sources, 2, l_plan, poly_operator=False) return plan
def decomposition_to_plan(decomposition): access_plans = [] for tp in decomposition: access_plans.append(LogicalPlan(tp)) todo = sorted(access_plans, key=lambda x: x.cardinality) plan = todo[0] todo.remove(plan) while len(todo): for i in range(len(todo)): if len(plan.variables.intersection(todo[i].variables)) > 0: plan = LogicalPlan(plan, todo[i], get_physical_operator(plan, todo[i])) plan.compute_cardinality(cardinality_estimation) todo.remove(todo[i]) break else: # In case we cannot find another join able triple pattern next_tp = todo[0] plan = LogicalPlan(plan, next_tp, get_physical_operator(plan, next_tp)) plan.compute_cardinality(cardinality_estimation) todo.remove(next_tp) return plan
def joinPlans(self, L, R, best=False, operators=[Fjoin, Xnjoin]): join_plans = [] for operator in operators: # For Xnjoin: Either side must be a triple pattern to be the dependent operator in the plan if operator == Xnjoin: if L.is_triple_pattern or R.is_triple_pattern: plan = LogicalPlan(L, R, operator) cost = plan.compute_cost(self.cost_model) plan.cost = cost join_plans.append(plan) else: plan = LogicalPlan(L, R, operator) cost = plan.compute_cost(self.cost_model) plan.cost = cost join_plans.append(plan) if best: best_join = sorted(join_plans, key=lambda x: x.cost)[0] return best_join else: # If the leafs are triple patterns, the cheapest plan can be selected only if L.is_triple_pattern and R.is_triple_pattern: best_join = sorted(join_plans, key=lambda x: x.cost)[0] return [best_join] else: return join_plans
def dict_to_logical(plan_dict, sources): left = None right = None join = None for key, value in plan_dict.items(): if key == 'right': right = dict_to_logical(plan_dict['right'], sources) if key == 'left': left = dict_to_logical(plan_dict['left'], sources) if key == 'type': if value == 'NLJ': join = Xnjoin else: join = Fjoin if key == 'tpf': pattern_var = re.compile(r'\?\w+') pattern_uri = re.compile(r'\<[^<^>]+\>') pattern_literal = re.compile(r'[\'"].*[\'"]@?\w*') matches_var = pattern_var.finditer(value) matches_uri = pattern_uri.finditer(value) matches_literal = pattern_literal.finditer(value) matches_var = [(m.start(), m.group(0)) for m in matches_var] matches_uri = [(m.start(), m.group(0)) for m in matches_uri] matches_literal = [(m.start(), m.group(0)) for m in matches_literal] arguments = [matches_var, matches_uri, matches_literal] arguments = proc_arguments(arguments) triple_pattern = TriplePattern(Argument(arguments[0]), Argument(arguments[1]), Argument(arguments[2])) cardinality = int(plan_dict.get("cardinality", 0)) triple_pattern.cardinality = cardinality triple_pattern.sources = {sources[0]: cardinality} print('--- Now printing Triple Pattern: ---') print(triple_pattern) print('------') return LogicalPlan(triple_pattern) print plan_dict logical_plan = LogicalPlan(left, right, join) logical_plan.cardinality = int(plan_dict.get("estimated_cardinality", 0)) return logical_plan
def get_logical_plan(self, body): if isinstance(body, UnionBlock): subplans = [] for ggp in body.triples: subplan = self.get_logical_plan(ggp) if subplan: subplans.append(subplan) if len(subplans) == 1: # No need for an additional union here return subplans[0] else: return LogicalUnion(subplans, Xunion) elif isinstance(body, JoinBlock): if body.bgp: l_plan = self.iterative_dynamic_programming1(body.triples) elif len(body.triples) == 1: return self.get_logical_plan(body.triples[0]) else: left_plan = self.get_logical_plan(body.triples[0]) right_plan = self.get_logical_plan(body.triples[1]) l_plan = LogicalPlan(left_plan, right_plan, Fjoin) return l_plan elif isinstance(body, Optional): plan = self.get_logical_plan(body.triples) return plan
def lw_plan_from_tree(self, node_id, out_edges, leafs, leaf_map, prefix=""): l = out_edges[node_id][0] r = out_edges[node_id][1] operator = choice([Xnjoin, Fjoin]) lid = prefix + "001" rid = prefix + "010" if operator == Xnjoin: lid = prefix + "011" rid = prefix + "100" if l in leafs and r in leafs: tp_l = LogicalPlan(leaf_map[l], node_id=lid) tp_r = LogicalPlan(leaf_map[r], node_id=rid) if not leaf_map[l].compatible(leaf_map[r]): raise Exception("Incompatible leafs") plan = LogicalPlan(tp_l, tp_r, operator) return plan elif l in leafs: tp_l = LogicalPlan(leaf_map[l], node_id=lid) plan = LogicalPlan( tp_l, self.lw_plan_from_tree(r, out_edges, leafs, leaf_map, prefix=rid), operator) return plan elif r in leafs: tp_r = LogicalPlan(leaf_map[r], node_id=rid) plan = LogicalPlan( tp_r, self.lw_plan_from_tree(l, out_edges, leafs, leaf_map, prefix=lid), operator) return plan else: plan = LogicalPlan( self.lw_plan_from_tree(l, out_edges, leafs, leaf_map, prefix="001"), self.lw_plan_from_tree(r, out_edges, leafs, leaf_map, prefix="010"), Fjoin) return plan
def decompostion_to_plan(self, decomposition): access_plans = [] filters = [] for subplan in decomposition: if isinstance(subplan, Filter): filters.append(subplan) else: if isinstance(subplan, BGP): access_plans.append(LogicalPlan(subplan)) else: access_plans.append(LogicalPlan(subplan)) self.bgp_count.append(float(len(subplan))) todo = sorted(access_plans, key=lambda x: x.cardinality) plan = todo[0] todo.remove(plan) root = True while len(todo): for i in range(len(todo)): if len(plan.variables.intersection(todo[i].variables)) > 0: join_operator = self.get_physical_join_operator( plan, todo[i]) if root and plan.is_basic_graph_pattern and join_operator == Xnjoin: plan = LogicalUnion([plan]) plan = LogicalPlan(plan, todo[i], join_operator) plan.compute_cardinality(self.cardinality_estimation) todo.remove(todo[i]) root = False break else: # In case we cannot find another join able triple pattern next_tp = todo[0] join_operator = self.get_physical_join_operator(plan, next_tp) plan = LogicalPlan(plan, next_tp, join_operator) plan.compute_cardinality(self.cardinality_estimation) todo.remove(next_tp) plan.filters = filters return plan
def get_logical_plan_simple(self, body): if isinstance(body, UnionBlock): subplans = [] for ggp in body.triples: subplan = self.get_logical_plan(ggp) if subplan: subplans.append(subplan) if len(subplans) == 1: # No need for an additional union here return subplans[0] else: return LogicalUnion(subplans, Xunion) elif isinstance(body, JoinBlock): if body.bgp: l_plan = self.optimize_bgp(body.triples) elif len(body.triples) == 1: return self.get_logical_plan(body.triples[0]) elif len(body.triples) == 2 and isinstance(body.triples[1], Optional): # Get operator for Optional # TODO: Handle case with several optionals left_plan = self.get_logical_plan(body.triples[0]) right_plan = self.get_logical_plan(body.triples[1]) operator = self.get_optional_operator(left_plan, right_plan) l_plan = LogicalPlan(left_plan, right_plan, operator) return l_plan else: left_plan = self.get_logical_plan(body.triples[0]) right_plan = self.get_logical_plan(body.triples[1]) l_plan = LogicalPlan(left_plan, right_plan, Fjoin) return l_plan elif isinstance(body, Optional): plan = self.get_logical_plan(body.triples) return plan return None
def optimize_subquery(self, subquery, filters): plans = [] for tp_combination in product(*subquery): access_plans = [] for tp in tp_combination: access_plans.append(LogicalPlan(tp)) todo = sorted(access_plans, key=lambda x: x.cardinality) plan = todo[0] todo.remove(plan) while len(todo): for i in range(len(todo)): if len(plan.variables.intersection(todo[i].variables)) > 0: plan = LogicalPlan( plan, todo[i], self.get_physical_join_operator(plan, todo[i])) plan.compute_cardinality(self.cardinality_estimation) todo.remove(todo[i]) break else: # In case we cannot find another join able triple pattern next_tp = todo[0] plan = LogicalPlan( plan, next_tp, self.get_physical_join_operator(plan, next_tp)) plan.compute_cardinality(self.cardinality_estimation) todo.remove(next_tp) plan.filters = filters plans.append(plan) if len(plans) == 0: return None plan = self.union_subplans(plans) return plan
def iterative_dynamic_programming1(self, triple_patterns): if len(triple_patterns) == 1: # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple_patterns[0]) return LogicalPlan(triple_patterns[0]) best_row = False opt_plan = {} toDo = set() k = min(len(triple_patterns), self.k) if self.adaptive_k and len(triple_patterns) >= 6: k = 2 for index, triple_pattern in enumerate(triple_patterns): # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple_pattern) accessPlan = set([LogicalPlan(triple_pattern)]) opt_plan[(triple_pattern, )] = accessPlan toDo.add(triple_pattern) while len(toDo) > 1: k = min(k, len(toDo)) for i in range(2, k + 1): for S in combinations(toDo, i): opt_plan[S] = set() for O in self.true_subset(S): try: opt_plan_O = opt_plan[O] S_minus_O = tuple(set(S).difference(set(O))) opt_plan_S_minus_O = opt_plan.get(S_minus_O, None) if not opt_plan_S_minus_O or not opt_plan_O: continue for opt_plan_o in opt_plan_O: for opt_plan_s_minus_o in opt_plan_S_minus_O: join_vars = opt_plan_o + opt_plan_s_minus_o if join_vars > 0: join_plans = self.joinPlans( opt_plan_o, opt_plan_s_minus_o) join_plans_S = opt_plan[S].union( join_plans) opt_plan[S] = self.best_n_plans( list(join_plans_S), self.top_t) #opt_plan[S] = join_plans_S except Exception as e: raise e best_plans = [] V = set() for key, values in opt_plan.items(): for value in values: k_len = len(key) if k_len == k and value and set(key).issubset(toDo): V.add(key) rob = value.cost best_plans.append((value, value.cost, rob, key)) if len(best_plans) == 0: raise Exception("IDP Error: No best plan") for v in V: del opt_plan[v] try: if len(best_plans) > 0: # In intermediate steps of IDP: Take best plan only best_plan = sorted(best_plans, key=lambda x: (x[1], x[2]))[0] tps = best_plan[3] opt_plan[(tps, )] = set([best_plan[0]]) if best_row: best_plans.remove(best_plan) # Best Row for bp in best_plans: if bp[3] == tps: opt_plan[(tps, )].add(bp[0]) # Remove triple patterns from todo list for tp in tps: toDo.remove(tp) toDo.add(best_plan[3]) except Exception as e: raise e tmp_plans = [] for plan in best_plans: cost = plan[0].cost rob = plan[0].average_cost(self.robust_model) tmp_plans.append((plan, cost, rob, cost / rob)) #print((plan, cost, rob, cost/rob)) cheap_plan = sorted(tmp_plans, key=lambda x: (x[1], x[3]))[0] # Decision rule for robust plan self.robust_over_cost = False rob_cost_ratio = cheap_plan[1] / cheap_plan[2] self.cost_robust_ratio = rob_cost_ratio #logger.debug("{} {}".format(self.cost_robust_ratio, len(tmp_plans) )) if len(tmp_plans) > 1: tmp_plans.remove(cheap_plan) plans_over_thrshld = filter( lambda x: x[3] >= self.robustness_threshold, tmp_plans) if not plans_over_thrshld or len(plans_over_thrshld) == 0: plans_over_thrshld = tmp_plans robust_plan = sorted(plans_over_thrshld, key=lambda x: (x[1], x[2]))[0] else: robust_plan = cheap_plan # What is the cost ratio of the cheapest and the most robust plan cost_cost_ratio = cheap_plan[1] / robust_plan[1] self.cost_cost_ratio = cost_cost_ratio self.robust_over_cost = rob_cost_ratio <= self.robustness_threshold and cost_cost_ratio >= self.cost_threshold if self.enable_robustplan and self.robust_over_cost: logger.debug("IDP: Robust Plan over Cheapest Plan") return robust_plan[0][0] return cheap_plan[0][0]
def optimize_subquery(self, triples): subtrees = [] for triple in triples: # For each server, we need one requests to get the metadata self.planning_requests += len(self.sources) get_metadata(self.sources, triple) leaf = LogicalPlan(triple) subtrees.append(leaf) subtrees.sort(key=lambda x: x.cardinality) stars = [] while len(subtrees) > 0: to_delete = [] star_tree = subtrees.pop(0) star_vars = star_tree.variables for j in range(0, len(subtrees)): subtree_j = subtrees[j] join_variables = set(star_vars).intersection(subtree_j.variables) # Case: There is a join. if len(join_variables) > 0: to_delete.append(subtree_j) # Place physical operator estimating cardinality. if star_tree.is_triple_pattern: res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) # Place a Nested Loop join. # Paper; if (tpi.count / tpi.pagesize) <= s.count then #if star_tree.total_res < (subtree_j.total_res / 100.0): if star_tree.cardinality < (subtree_j.cardinality / 100.0): join_type = Xnjoin # If NLJ is placed, set res = 0/1 to force NLJs later res = 1 # Place a Symmetric Hash join. else: join_type = Fjoin else: res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) if (star_tree.cardinality / float(subtree_j.cardinality) < 0.30) or (subtree_j.cardinality > 100*1000 and star_tree.cardinality < 100*1000) or (subtree_j.cardinality < 100*5): join_type = Xnjoin # If NLJ is placed, set res = 0/1 to force NLJs later res = 1 else: join_type = Fjoin star_tree = LogicalPlan(star_tree, subtree_j, join_type) star_tree.cardinality = res # Add current tree to the list of stars and # remove from the list of subtrees to process. stars.append(star_tree) for elem in to_delete: subtrees.remove(elem) # Stage 2: Build bushy tree to combine SSGs with common variables. while len(stars) > 1: subtree_i = stars.pop(0) star_vars = subtree_i.variables for j in range(0, len(stars)): subtree_j = stars[j] join_variables = set(star_vars).intersection(subtree_j.variables) # Case: There is a join between stars. if len(join_variables) > 0: stars.pop(j) res = self.estimate_card(star_tree.cardinality, subtree_j.cardinality) # Place physical operators between stars. if subtree_j.is_triple_pattern: # This case models a satellite, therefore apply cardinality estimation. if subtree_i.cardinality < (subtree_j.cardinality / 100.0): join_type = Xnjoin else: join_type = Fjoin else: res = (subtree_i.cardinality + subtree_j.cardinality) / 2 join_type = Fjoin star_tree = LogicalPlan(subtree_i, subtree_j, join_type) star_tree.cardinality = res stars.append(star_tree) break tree = stars.pop() return tree
def decomposition_to_plan(decomposition): leafs = {} for subquery, source, cardinality in decomposition: if isinstance(subquery, TriplePattern): if subquery in leafs.keys(): leafs[subquery].sources[source] = cardinality leafs[subquery].cardinality += cardinality else: new_triple_pattern = TriplePattern( subquery[0], subquery[1], subquery[2], sources={source: cardinality}) new_triple_pattern.cardinality = cardinality leafs[subquery] = new_triple_pattern elif isinstance(subquery, BGP): if subquery in leafs.keys(): for tp in leafs[subquery]: for bgp_tp in subquery: if tp == bgp_tp: tp.sources[source] = bgp_tp.cardinality leafs[subquery].cardinality += cardinality else: new_tps = [] for triple_pattern in subquery: new_triple_pattern = TriplePattern( triple_pattern[0], triple_pattern[1], triple_pattern[2], sources={source: cardinality}) new_tps.append(new_triple_pattern) new_bgp = BGP(new_tps) new_bgp.cardinality = cardinality leafs[subquery] = new_bgp access_plans = [] for tp in leafs.values(): access_plans.append(LogicalPlan(tp)) todo = sorted(access_plans, key=lambda x: x.cardinality) plan = todo[0] todo.remove(plan) while len(todo): for i in range(len(todo)): if len(plan.variables.intersection(todo[i].variables)) > 0: plan = LogicalPlan(plan, todo[i], get_physical_operator(plan, todo[i])) plan.compute_cardinality(cardinality_estimation) todo.remove(todo[i]) break else: # In case we cannot find another join able triple pattern next_tp = todo[0] plan = LogicalPlan(plan, next_tp, get_physical_operator(plan, next_tp)) plan.compute_cardinality(cardinality_estimation) todo.remove(next_tp) return plan
def plan_from_tree(self, node_id, out_edges, leafs, leaf_map): l = out_edges[node_id][0] r = out_edges[node_id][1] if l in leafs and r in leafs: tp_l = LogicalPlan(leaf_map[l]) tp_r = LogicalPlan(leaf_map[r]) if not leaf_map[l].compatible(leaf_map[r]): raise Exception plan = LogicalPlan(tp_l, tp_r, choice([Xnjoin, Fjoin])) plan.compute_cost(self.cost_model) return plan elif l in leafs: tp_l = LogicalPlan(leaf_map[l]) plan = LogicalPlan( tp_l, self.plan_from_tree(r, out_edges, leafs, leaf_map), choice([Xnjoin, Fjoin])) plan.compute_cost(self.cost_model) return plan elif r in leafs: tp_r = LogicalPlan(leaf_map[r]) plan = LogicalPlan( tp_r, self.plan_from_tree(l, out_edges, leafs, leaf_map), choice([Xnjoin, Fjoin])) plan.compute_cost(self.cost_model) return plan else: plan = LogicalPlan( self.plan_from_tree(l, out_edges, leafs, leaf_map), self.plan_from_tree(r, out_edges, leafs, leaf_map), Fjoin) plan.compute_cost(self.cost_model) return plan