def run(self,
         job_name,
         mapper_spec,
         reducer_spec,
         input_reader_spec,
         output_writer_spec=None,
         mapper_params=None,
         reducer_params=None,
         shards=None):
   map_pipeline = yield MapPipeline(job_name,
                                    mapper_spec,
                                    input_reader_spec,
                                    params=mapper_params,
                                    shards=shards)
   shuffler_pipeline = yield ShufflePipeline(job_name, map_pipeline)
   reducer_pipeline = yield ReducePipeline(job_name,
                                           reducer_spec,
                                           output_writer_spec,
                                           reducer_params,
                                           shuffler_pipeline)
   with pipeline.After(reducer_pipeline):
     all_temp_files = yield pipeline_common.Extend(
         map_pipeline, shuffler_pipeline)
     yield mapper_pipeline._CleanupPipeline(all_temp_files)
   yield pipeline_common.Return(reducer_pipeline)
    def run(self, map_result_status, reduce_result_status, reduce_outputs):

        if (map_result_status == model.MapreduceState.RESULT_ABORTED or
                reduce_result_status == model.MapreduceState.RESULT_ABORTED):
            result_status = model.MapreduceState.RESULT_ABORTED
        elif (map_result_status == model.MapreduceState.RESULT_FAILED
              or reduce_result_status == model.MapreduceState.RESULT_FAILED):
            result_status = model.MapreduceState.RESULT_FAILED
        else:
            result_status = model.MapreduceState.RESULT_SUCCESS

        self.fill(self.outputs.result_status, result_status)
        if result_status == model.MapreduceState.RESULT_SUCCESS:
            yield pipeline_common.Return(reduce_outputs)
        else:
            yield pipeline_common.Return([])
Example #3
0
 def run(self, job_name, filenames):
     hashed_files = yield _HashPipeline(job_name, filenames)
     sorted_files = yield _SortChunksPipeline(job_name, hashed_files)
     merged_files = yield _MergePipeline(job_name, sorted_files)
     with pipeline.After(merged_files):
         all_temp_files = yield pipeline_common.Extend(
             hashed_files, sorted_files)
         yield mapper_pipeline._CleanupPipeline(all_temp_files)
     yield pipeline_common.Return(merged_files)
Example #4
0
  def run(self, job_name, filenames, shards=None):
    if files.shuffler.available():
      yield _ShuffleServicePipeline(job_name, filenames)
    else:
      hashed_files = yield _HashPipeline(job_name, filenames, shards=shards)
      sorted_files = yield _SortChunksPipeline(job_name, hashed_files)
      temp_files = [hashed_files, sorted_files]

      merged_files = yield _MergePipeline(job_name, sorted_files)

      with pipeline.After(merged_files):
        all_temp_files = yield pipeline_common.Extend(*temp_files)
        yield mapper_pipeline._CleanupPipeline(all_temp_files)

      yield pipeline_common.Return(merged_files)
Example #5
0
 def run(self, job_name, filenames):
     sort_mappers = []
     for i in range(len(filenames)):
         filename = filenames[i]
         sort_mapper = yield mapper_pipeline.MapperPipeline(
             "%s-shuffle-sort-%s" % (job_name, str(i)),
             __name__ + "._sort_records_map",
             __name__ + "._BatchRecordsReader",
             None, {
                 "files": [filename],
                 "processing_rate": 1000000,
             },
             shards=1)
         sort_mappers.append(sort_mapper)
     with pipeline.After(*sort_mappers):
         job_ids = yield pipeline_common.Append(
             *[mapper.job_id for mapper in sort_mappers])
         result = yield _CollectOutputFiles(job_ids)
         with pipeline.After(result):
             yield _CleanupOutputFiles(job_ids)
         yield pipeline_common.Return(result)
Example #6
0
 def run(self, toEmail, documentId, credentialsAsJson):
     revisions = yield DocumentRevisionsPipeline(toEmail, documentId,
                                                 credentialsAsJson)
     revisionsAnalysis = yield RevisionsAnalysisPipeline(
         revisions, credentialsAsJson)
     yield common.Return(revisionsAnalysis)