def _configure(self, input_file, output_file, port, env, info): super(GroupReduceFunction, self)._configure(input_file, output_file, port, env, info) if info.key1 is None: self._run = self._run_all_group_reduce else: self._run = self._run_grouped_group_reduce self._group_iterator = Iterator.GroupIterator( self._iterator, info.key1)
def _configure(self, input_file, output_file, port, env, info, subtask_index): super(ReduceFunction, self)._configure(input_file, output_file, port, env, info, subtask_index) if len(info.key1) == 0: self._run = self._run_all_reduce else: self._run = self._run_grouped_reduce self._group_iterator = Iterator.GroupIterator( self._iterator, info.key1)
def _configure(self, input_file, output_file, port): if self._combine: self._connection = Connection.BufferingTCPMappedFileConnection( input_file, output_file, port) self._iterator = Iterator.Iterator(self._connection) self._collector = Collector.Collector(self._connection) self.context = RuntimeContext.RuntimeContext( self._iterator, self._collector) self._run = self._run_combine else: self._connection = Connection.BufferingTCPMappedFileConnection( input_file, output_file, port) self._iterator = Iterator.Iterator(self._connection) self._group_iterator = Iterator.GroupIterator( self._iterator, self._keys) self.context = RuntimeContext.RuntimeContext( self._iterator, self._collector) self._configure_chain(Collector.Collector(self._connection)) self._open()