def _order_to_tree_topology(order: List[int], pattern: Pattern): """ A helper method for converting a given order to a tree topology. """ tree_topology = TreePlanLeafNode(order[0]) for i in range(1, len(order)): tree_topology = TreePlanBuilder._instantiate_binary_node( pattern, tree_topology, TreePlanLeafNode(order[i])) return tree_topology
def __create_nested_structure(nested_operator: PatternStructure): """ This method is a temporal hack, hopefully it will be removed soon. # TODO: calculate the evaluation order in the way it should work - using a tree plan builder """ order = list(range(len(nested_operator.args))) if isinstance(nested_operator, CompositeStructure) else [0] operator_type = None if isinstance(nested_operator, AndOperator): operator_type = OperatorTypes.AND elif isinstance(nested_operator, SeqOperator): operator_type = OperatorTypes.SEQ ret = TreePlanLeafNode(order[0]) for i in range(1, len(order)): ret = TreePlanBinaryNode(operator_type, ret, TreePlanLeafNode(order[i])) return ret
def __init_tree_leaves(pattern: Pattern, nested_topologies: List[TreePlanNode] = None, nested_args: List[PatternStructure] = None, nested_cost: List[float] = None): """ Initializes the leaves of the tree plan. If the nested parameters are given, creates nested nodes instead of regular leaves where necessary. """ leaves = [] pattern_positive_args = pattern.get_top_level_structure_args( positive_only=True) for i, arg in enumerate(pattern_positive_args): if nested_topologies is None or nested_topologies[i] is None: # the current argument can either be a PrimitiveEventStructure or an UnaryOperator surrounding it event_structure = arg if isinstance( arg, PrimitiveEventStructure) else arg.child new_leaf = TreePlanLeafNode(i, event_structure.type, event_structure.name) else: nested_topology = nested_topologies[i].sub_tree_plan \ if isinstance(nested_topologies[i], TreePlanNestedNode) else nested_topologies[i] new_leaf = TreePlanNestedNode(i, nested_topology, nested_args[i], nested_cost[i]) if isinstance(arg, UnaryStructure): new_leaf = TreePlanBuilder._instantiate_unary_node( TreePlanBuilder.__create_dummy_subpattern(pattern, arg), new_leaf) leaves.append(new_leaf) return leaves
def _create_tree_topology(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() args_num = len(selectivity_matrix) if args_num == 1: return [0] items = frozenset(range(args_num)) # Save subsets' optimal topologies, the cost and the left to add items. sub_trees = {frozenset({i}): (TreePlanLeafNode(i), self._get_plan_cost(pattern, TreePlanLeafNode(i)), items.difference({i})) for i in items} # for each subset of size i, find optimal topology for these subsets according to size (i-1) subsets. for i in range(2, args_num + 1): for tSubset in combinations(items, i): subset = frozenset(tSubset) disjoint_sets_iter = get_all_disjoint_sets(subset) # iterator for all disjoint splits of a set. # use first option as speculative best. set1_, set2_ = next(disjoint_sets_iter) tree1_, _, _ = sub_trees[set1_] tree2_, _, _ = sub_trees[set2_] new_tree_ = TreePlanBuilder._instantiate_binary_node(pattern, tree1_, tree2_) new_cost_ = self._get_plan_cost(pattern, new_tree_) new_left_ = items.difference({subset}) sub_trees[subset] = new_tree_, new_cost_, new_left_ # find the best topology based on previous topologies for smaller subsets. for set1, set2 in disjoint_sets_iter: tree1, _, _ = sub_trees[set1] tree2, _, _ = sub_trees[set2] new_tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1, tree2) new_cost = self._get_plan_cost(pattern, new_tree) _, cost, left = sub_trees[subset] # if new subset's topology is better, then update to it. if new_cost < cost: sub_trees[subset] = new_tree, new_cost, left return sub_trees[items][0] # return the best topology (index 0 at tuple) for items - the set of all arguments.
def build_tree_plan(self, pattern: Pattern, statistics: Dict): """ Creates a tree-based evaluation plan for the given pattern. """ # as of now, the invariant-based method can only work on composite non-nested patterns leaves = [ TreePlanLeafNode(i) for i in range(len(pattern.full_structure.args)) ] tree_topology, invariants = self._create_tree_topology( pattern, statistics, leaves) return TreePlan(tree_topology), invariants
def _create_tree_topology(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() order = self._get_initial_order(selectivity_matrix, arrival_rates) args_num = len(order) items = tuple(order) suborders = { (i,): (TreePlanLeafNode(i), self._get_plan_cost(pattern, TreePlanLeafNode(i))) for i in items } # iterate over suborders' sizes for i in range(2, args_num + 1): # iterate over suborders of size i for j in range(args_num - i + 1): # create the suborder (slice) to find its optimum. suborder = tuple(order[t] for t in range(j, j + i)) # use first split of suborder as speculative best. order1_, order2_ = suborder[:1], suborder[1:] tree1_, _ = suborders[order1_] tree2_, _ = suborders[order2_] tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1_, tree2_) cost = self._get_plan_cost(pattern, tree) suborders[suborder] = tree, cost # iterate over splits of suborder for k in range(2, i): # find the optimal topology of this split, according to optimal topologies of subsplits. order1, order2 = suborder[:k], suborder[k:] tree1, _ = suborders[order1] tree2, _ = suborders[order2] _, prev_cost = suborders[suborder] new_tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1, tree2) new_cost = self._get_plan_cost(pattern, new_tree) if new_cost < prev_cost: suborders[suborder] = new_tree, new_cost return suborders[items][0] # return the topology (index 0 at tuple) of the entire order, indexed to 'items'.
def _order_to_tree_topology(order: List[int], pattern: Pattern, leaves: List[TreePlanNode] = None): """ A helper method for converting a given order to a tree topology. """ if leaves is None: leaves = [TreePlanLeafNode(i) for i in range(max(order) + 1)] tree_topology = leaves[order[0]] for i in range(1, len(order)): tree_topology = TreePlanBuilder._instantiate_binary_node( pattern, tree_topology, leaves[order[i]]) return tree_topology
def _create_tree_topology(self, pattern: Pattern, statistics: Dict, leaves: List[TreePlanNode]): if StatisticsTypes.ARRIVAL_RATES in statistics and \ StatisticsTypes.SELECTIVITY_MATRIX in statistics and \ len(statistics) == 2: selectivity_matrix = statistics[StatisticsTypes.SELECTIVITY_MATRIX] arrival_rates = statistics[StatisticsTypes.ARRIVAL_RATES] else: raise MissingStatisticsException() order = self._get_initial_order(selectivity_matrix, arrival_rates) args_num = len(order) items = tuple(order) suborders = {(i, ): (TreePlanLeafNode(i), self._get_plan_cost(pattern, TreePlanLeafNode(i), statistics)) for i in items} tree_to_second_min_tree_map = {} invariants = ZStreamTreeInvariants(self._get_plan_cost) all_sub_trees = [] # iterate over suborders sizes for i in range(2, args_num + 1): # iterate over suborders of size i for j in range(args_num - i + 1): # create the suborder (slice) to find its optimum. suborder = tuple(order[t] for t in range(j, j + i)) # use first split of suborder as speculative best. order1_, order2_ = suborder[:1], suborder[1:] tree1_, _ = suborders[order1_] tree2_, _ = suborders[order2_] tree = TreePlanBuilder._instantiate_binary_node( pattern, tree1_, tree2_) cost = self._get_plan_cost(pattern, tree, statistics) suborders[suborder] = tree, cost second_prev_cost = cost second_min_tree = tree # iterate over splits of suborder for k in range(2, i): # find the optimal topology of this split, according to optimal topologies of subsplits. order1, order2 = suborder[:k], suborder[k:] tree1, _ = suborders[order1] tree2, _ = suborders[order2] _, prev_cost = suborders[suborder] new_tree = TreePlanBuilder._instantiate_binary_node( pattern, tree1, tree2) new_cost = self._get_plan_cost(pattern, new_tree, statistics) if new_cost < prev_cost: second_prev_cost = prev_cost second_min_tree = suborders[suborder][0] suborders[suborder] = new_tree, new_cost elif new_cost < second_prev_cost or second_min_tree == tree: second_prev_cost = new_cost second_min_tree = new_tree if i != 2: tree_to_second_min_tree_map[suborders[suborder] [0]] = second_min_tree # Eliminates all trees that are not in best tree from map_tree_to_second_min_tree InvariantAwareZStreamTreeBuilder.__get_relevant_sub_trees( suborders[items][0], all_sub_trees) for tree in all_sub_trees: invariants.add(Invariant(tree, tree_to_second_min_tree_map[tree])) # return the topology (index 0 at tuple) of the entire order, indexed to 'items' return suborders[items][0], invariants