def generate_func(self, serialized_fn): user_defined_aggs = [] input_extractors = [] for i in range(len(serialized_fn.udfs)): if i != self.index_of_count_star: user_defined_agg, input_extractor = extract_user_defined_aggregate_function( serialized_fn.udfs[i]) else: user_defined_agg = Count1AggFunction() def dummy_input_extractor(value): return [] input_extractor = dummy_input_extractor user_defined_aggs.append(user_defined_agg) input_extractors.append(input_extractor) aggs_handler_function = SimpleAggsHandleFunction( user_defined_aggs, input_extractors, self.index_of_count_star, self.data_view_specs) key_selector = RowKeySelector(self.grouping) if len(self.data_view_specs) > 0: state_value_coder = DataViewFilterCoder(self.data_view_specs) else: state_value_coder = PickleCoder() self.group_agg_function = GroupAggFunction( aggs_handler_function, key_selector, self.keyed_state_backend, state_value_coder, self.generate_update_before, self.state_cleaning_enabled, self.index_of_count_star) return lambda it: map(self.process_element_or_timer, it), []
def generate_func(self, serialized_fn): user_defined_aggs = [] input_extractors = [] filter_args = [] # stores the indexes of the distinct views which the agg functions used distinct_indexes = [] # stores the indexes of the functions which share the same distinct view # and the filter args of them distinct_info_dict = {} for i in range(len(serialized_fn.udfs)): if i != self.index_of_count_star: user_defined_agg, input_extractor, filter_arg, distinct_index = \ extract_user_defined_aggregate_function( i, serialized_fn.udfs[i], distinct_info_dict) else: user_defined_agg = Count1AggFunction() filter_arg = -1 distinct_index = -1 def dummy_input_extractor(value): return [] input_extractor = dummy_input_extractor user_defined_aggs.append(user_defined_agg) input_extractors.append(input_extractor) filter_args.append(filter_arg) distinct_indexes.append(distinct_index) distinct_view_descriptors = {} for agg_index_list, filter_arg_list in distinct_info_dict.values(): if -1 in filter_arg_list: # If there is a non-filter call, we don't need to check filter or not before # writing the distinct data view. filter_arg_list = [] # use the agg index of the first function as the key of shared distinct view distinct_view_descriptors[agg_index_list[0]] = DistinctViewDescriptor( input_extractors[agg_index_list[0]], filter_arg_list) aggs_handler_function = SimpleAggsHandleFunction( user_defined_aggs, input_extractors, self.index_of_count_star, self.data_view_specs, filter_args, distinct_indexes, distinct_view_descriptors) key_selector = RowKeySelector(self.grouping) if len(self.data_view_specs) > 0: state_value_coder = DataViewFilterCoder(self.data_view_specs) else: state_value_coder = PickleCoder() self.group_agg_function = GroupAggFunction( aggs_handler_function, key_selector, self.keyed_state_backend, state_value_coder, self.generate_update_before, self.state_cleaning_enabled, self.index_of_count_star) return self.process_element_or_timer, []
def generate_func(self, serialized_fn): user_defined_aggs = [] input_extractors = [] filter_args = [] # stores the indexes of the distinct views which the agg functions used distinct_indexes = [] # stores the indexes of the functions which share the same distinct view # and the filter args of them distinct_info_dict = {} for i in range(len(serialized_fn.udfs)): user_defined_agg, input_extractor, filter_arg, distinct_index = \ extract_user_defined_aggregate_function( i, serialized_fn.udfs[i], distinct_info_dict) user_defined_aggs.append(user_defined_agg) input_extractors.append(input_extractor) filter_args.append(filter_arg) distinct_indexes.append(distinct_index) distinct_view_descriptors = {} for agg_index_list, filter_arg_list in distinct_info_dict.values(): if -1 in filter_arg_list: # If there is a non-filter call, we don't need to check filter or not before # writing the distinct data view. filter_arg_list = [] # use the agg index of the first function as the key of shared distinct view distinct_view_descriptors[ agg_index_list[0]] = DistinctViewDescriptor( input_extractors[agg_index_list[0]], filter_arg_list) key_selector = RowKeySelector(self.grouping) if len(self.data_view_specs) > 0: state_value_coder = DataViewFilterCoder(self.data_view_specs) else: state_value_coder = PickleCoder() self.group_agg_function = self.create_process_function( user_defined_aggs, input_extractors, filter_args, distinct_indexes, distinct_view_descriptors, key_selector, state_value_coder) return self.process_element_or_timer, []