def map_partition(self, operator): """ Applies a MapPartition transformation on a DataSet. The transformation calls a MapPartitionFunction once per parallel partition of the DataSet. The entire partition is available through the given Iterator. Each MapPartitionFunction may return an arbitrary number of results. The number of elements that each instance of the MapPartition function sees is non deterministic and depends on the degree of parallelism of the operation. :param operator: The MapFunction that is called for each element of the DataSet. :return:A MapOperator that represents the transformed DataSet """ if isinstance(operator, TYPES.FunctionType): f = operator operator = MapPartitionFunction() operator.map_partition = f child = OperationInfo() child_set = OperatorSet(self._env, child) child.identifier = _Identifier.MAPPARTITION child.parent = self._info child.operator = operator child.types = _createArrayTypeInfo() child.name = "PythonMapPartition" self._info.children.append(child) self._env._sets.append(child) return child_set
def map_partition(self, operator, types): """ Applies a MapPartition transformation on a DataSet. The transformation calls a MapPartitionFunction once per parallel partition of the DataSet. The entire partition is available through the given Iterator. Each MapPartitionFunction may return an arbitrary number of results. The number of elements that each instance of the MapPartition function sees is non deterministic and depends on the degree of parallelism of the operation. :param operator: The MapFunction that is called for each element of the DataSet. :param types: The type of the resulting DataSet :return:A MapOperator that represents the transformed DataSet """ if isinstance(operator, TYPES.FunctionType): f = operator operator = MapPartitionFunction() operator.map_partition = f child = dict() child_set = OperatorSet(self._env, child) child[_Fields.IDENTIFIER] = _Identifier.MAPPARTITION child[_Fields.PARENT] = self._info child[_Fields.OPERATOR] = operator child[_Fields.META] = str(inspect.getmodule(operator)) + "|" + str(operator.__class__.__name__) child[_Fields.TYPES] = types child[_Fields.NAME] = "PythonMapPartition" self._info[_Fields.CHILDREN].append(child) self._env._sets.append(child) return child_set
def map_partition(self, operator, types): """ Applies a MapPartition transformation on a DataSet. The transformation calls a MapPartitionFunction once per parallel partition of the DataSet. The entire partition is available through the given Iterator. Each MapPartitionFunction may return an arbitrary number of results. The number of elements that each instance of the MapPartition function sees is non deterministic and depends on the degree of parallelism of the operation. :param operator: The MapFunction that is called for each element of the DataSet. :param types: The type of the resulting DataSet :return:A MapOperator that represents the transformed DataSet """ if isinstance(operator, TYPES.FunctionType): f = operator operator = MapPartitionFunction() operator.map_partition = f child = dict() child_set = OperatorSet(self._env, child) child[_Fields.IDENTIFIER] = _Identifier.MAPPARTITION child[_Fields.PARENT] = self._info child[_Fields.OPERATOR] = operator child[_Fields.META] = str(inspect.getmodule(operator)) + "|" + str( operator.__class__.__name__) child[_Fields.TYPES] = types child[_Fields.NAME] = "PythonMapPartition" self._info[_Fields.CHILDREN].append(child) self._env._sets.append(child) return child_set