def reduce(self, operator): """ Applies a Reduce transformation on a non-grouped DataSet. The transformation consecutively calls a ReduceFunction until only a single element remains which is the result of the transformation. A ReduceFunction combines two elements into one new element of the same type. :param operator:The ReduceFunction that is applied on the DataSet. :return:A ReduceOperator that represents the reduced DataSet. """ operator._set_grouping_keys(self._child_chain[0].keys) for i in self._child_chain: self._env._sets.append(i) child = OperationInfo() child_set = OperatorSet(self._env, child) child.identifier = _Identifier.REDUCE child.parent = self._info child.operator = copy.deepcopy(operator) child.operator._combine = False child.meta = str(inspect.getmodule(operator)) + "|" + str( operator.__class__.__name__) child.combine = True child.combineop = operator child.combineop._combine = True child.name = "PythonReduce" child.types = deduct_output_type(self._info) self._info.children.append(child) self._env._sets.append(child) return child_set
def reduce(self, operator): """ Applies a Reduce transformation on a non-grouped DataSet. The transformation consecutively calls a ReduceFunction until only a single element remains which is the result of the transformation. A ReduceFunction combines two elements into one new element of the same type. :param operator:The ReduceFunction that is applied on the DataSet. :return:A ReduceOperator that represents the reduced DataSet. """ operator._set_grouping_keys(self._child_chain[0].keys) for i in self._child_chain: self._env._sets.append(i) child = OperationInfo() child_set = OperatorSet(self._env, child) child.identifier = _Identifier.REDUCE child.parent = self._info child.operator = copy.deepcopy(operator) child.operator._combine = False child.meta = str(inspect.getmodule(operator)) + "|" + str(operator.__class__.__name__) child.combine = True child.combineop = operator child.combineop._combine = True child.name = "PythonReduce" child.types = deduct_output_type(self._info) self._info.children.append(child) self._env._sets.append(child) return child_set
def reduce_group(self, operator, types, combinable=False): """ Applies a GroupReduce transformation. The transformation calls a GroupReduceFunction once for each group of the DataSet, or one when applied on a non-grouped DataSet. The GroupReduceFunction can iterate over all elements of the DataSet and emit any number of output elements including none. :param operator: The GroupReduceFunction that is applied on the DataSet. :param types: The type of the resulting DataSet. :return:A GroupReduceOperator that represents the reduced DataSet. """ if isinstance(operator, TYPES.FunctionType): f = operator operator = GroupReduceFunction() operator.reduce = f child = OperationInfo() child_set = OperatorSet(self._env, child) child.identifier = _Identifier.GROUPREDUCE child.parent = self._info child.operator = copy.deepcopy(operator) child.operator._combine = False child.meta = str(inspect.getmodule(operator)) + "|" + str( operator.__class__.__name__) child.types = types child.combine = combinable child.combineop = operator child.combineop._combine = True child.name = "PythonGroupReduce" self._info.children.append(child) self._env._sets.append(child) return child_set
def reduce_group(self, operator, types, combinable=False): """ Applies a GroupReduce transformation. The transformation calls a GroupReduceFunction once for each group of the DataSet, or one when applied on a non-grouped DataSet. The GroupReduceFunction can iterate over all elements of the DataSet and emit any number of output elements including none. :param operator: The GroupReduceFunction that is applied on the DataSet. :param types: The type of the resulting DataSet. :return:A GroupReduceOperator that represents the reduced DataSet. """ if isinstance(operator, TYPES.FunctionType): f = operator operator = GroupReduceFunction() operator.reduce = f operator._set_grouping_keys(self._child_chain[0].keys) operator._set_sort_ops([(x.field, x.order) for x in self._child_chain[1:]]) child = OperationInfo() child_set = OperatorSet(self._env, child) child.identifier = _Identifier.GROUPREDUCE child.parent = self._info child.operator = copy.deepcopy(operator) child.operator._combine = False child.meta = str(inspect.getmodule(operator)) + "|" + str(operator.__class__.__name__) child.types = types child.combine = combinable child.combineop = operator child.combineop._combine = True child.name = "PythonGroupReduce" self._info.children.append(child) self._env._sets.append(child) return child_set