def _create_evaluation_order(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivityMatrix, arrivalRates) = pattern.statistics else: raise MissingStatisticsException() return DynamicProgrammingLeftDeepTreeBuilder.find_order( selectivityMatrix, arrivalRates, pattern.window.total_seconds())
def build_single_pattern_eval_mechanism(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivityMatrix, arrivalRates) = pattern.statistics else: raise MissingStatisticsException() tree_structure = self._find_tree(selectivityMatrix, arrivalRates, pattern.window.total_seconds()) return TreeBasedEvaluationMechanism(pattern, tree_structure)
def get_plan_cost(self, pattern: Pattern, plan: TreePlanNode): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() _, _, cost = IntermediateResultsTreeCostModel.__get_plan_cost_aux( plan, selectivity_matrix, arrival_rates, pattern.window.total_seconds()) return cost
def _create_evaluation_order(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivityMatrix, arrivalRates) = pattern.statistics else: raise MissingStatisticsException() order = None if self.__initType == IterativeImprovementInitType.RANDOM: order = self.__get_random_order(len(arrivalRates)) elif self.__initType == IterativeImprovementInitType.GREEDY: order = GreedyLeftDeepTreeBuilder.calculate_greedy_order( selectivityMatrix, arrivalRates) get_cost_callback = lambda o: self._get_order_cost(pattern, o) return self.__iterative_improvement.execute(self.__step_limit, order, get_cost_callback)
def _create_evaluation_order(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.FREQUENCY_DICT: frequency_dict = pattern.statistics order = get_order_by_occurrences(pattern.positive_structure.args, frequency_dict) elif pattern.statistics_type == StatisticsTypes.ARRIVAL_RATES: arrival_rates = pattern.statistics # create an index-arrival rate binding and sort according to arrival rate. sorted_order = sorted([(i, arrival_rates[i]) for i in range(len(arrival_rates))], key=lambda x: x[1]) order = [x for x, y in sorted_order ] # create order from sorted binding. else: raise MissingStatisticsException() return order
def _create_evaluation_order(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() args_num = len(selectivity_matrix) if args_num == 1: # boring extreme case return [0] items = frozenset(range(args_num)) # Save subsets' optimal orders, the cost and the left to add items. sub_orders = { frozenset({i}): ([i], self._get_order_cost(pattern, [i]), items.difference({i})) for i in items } for i in range(2, args_num + 1): # for each subset of size i, we will find the best order for each subset next_orders = {} for subset in sub_orders.keys(): order, _, left_to_add = sub_orders[subset] for item in left_to_add: # calculate for optional order for set of size i new_subset = frozenset(subset.union({item})) new_cost = self._get_order_cost(pattern, order) # check if it is not the first order for that set if new_subset in next_orders.keys(): _, t_cost, t_left = next_orders[new_subset] if new_cost < t_cost: # check if it is the current best order for that set new_order = order + [item] next_orders[ new_subset] = new_order, new_cost, t_left else: # if it is the first order for that set new_order = order + [item] next_orders[ new_subset] = new_order, new_cost, left_to_add.difference( {item}) # update subsets for next iteration sub_orders = next_orders return list(sub_orders.values())[0][ 0] # return the order (at index 0 in the tuple) of item 0, the only item in subsets of size n.
def _create_tree_topology(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() args_num = len(selectivity_matrix) if args_num == 1: return [0] items = frozenset(range(args_num)) # Save subsets' optimal topologies, the cost and the left to add items. sub_trees = {frozenset({i}): (TreePlanLeafNode(i), self._get_plan_cost(pattern, TreePlanLeafNode(i)), items.difference({i})) for i in items} # for each subset of size i, find optimal topology for these subsets according to size (i-1) subsets. for i in range(2, args_num + 1): for tSubset in combinations(items, i): subset = frozenset(tSubset) disjoint_sets_iter = get_all_disjoint_sets(subset) # iterator for all disjoint splits of a set. # use first option as speculative best. set1_, set2_ = next(disjoint_sets_iter) tree1_, _, _ = sub_trees[set1_] tree2_, _, _ = sub_trees[set2_] new_tree_ = TreePlanBuilder._instantiate_binary_node(pattern, tree1_, tree2_) new_cost_ = self._get_plan_cost(pattern, new_tree_) new_left_ = items.difference({subset}) sub_trees[subset] = new_tree_, new_cost_, new_left_ # find the best topology based on previous topologies for smaller subsets. for set1, set2 in disjoint_sets_iter: tree1, _, _ = sub_trees[set1] tree2, _, _ = sub_trees[set2] new_tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1, tree2) new_cost = self._get_plan_cost(pattern, new_tree) _, cost, left = sub_trees[subset] # if new subset's topology is better, then update to it. if new_cost < cost: sub_trees[subset] = new_tree, new_cost, left return sub_trees[items][0] # return the best topology (index 0 at tuple) for items - the set of all arguments.
def _create_tree_topology(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivity_matrix, arrival_rates) = pattern.statistics else: raise MissingStatisticsException() order = self._get_initial_order(selectivity_matrix, arrival_rates) args_num = len(order) items = tuple(order) suborders = { (i,): (TreePlanLeafNode(i), self._get_plan_cost(pattern, TreePlanLeafNode(i))) for i in items } # iterate over suborders' sizes for i in range(2, args_num + 1): # iterate over suborders of size i for j in range(args_num - i + 1): # create the suborder (slice) to find its optimum. suborder = tuple(order[t] for t in range(j, j + i)) # use first split of suborder as speculative best. order1_, order2_ = suborder[:1], suborder[1:] tree1_, _ = suborders[order1_] tree2_, _ = suborders[order2_] tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1_, tree2_) cost = self._get_plan_cost(pattern, tree) suborders[suborder] = tree, cost # iterate over splits of suborder for k in range(2, i): # find the optimal topology of this split, according to optimal topologies of subsplits. order1, order2 = suborder[:k], suborder[k:] tree1, _ = suborders[order1] tree2, _ = suborders[order2] _, prev_cost = suborders[suborder] new_tree = TreePlanBuilder._instantiate_binary_node(pattern, tree1, tree2) new_cost = self._get_plan_cost(pattern, new_tree) if new_cost < prev_cost: suborders[suborder] = new_tree, new_cost return suborders[items][0] # return the topology (index 0 at tuple) of the entire order, indexed to 'items'.
def _create_evaluation_order(self, pattern: Pattern): if pattern.statistics_type == StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES: (selectivityMatrix, arrivalRates) = pattern.statistics else: raise MissingStatisticsException() return self.calculate_greedy_order(selectivityMatrix, arrivalRates)