コード例 #1
0
    def run(self, map_result_status, reduce_result_status, reduce_outputs):

        if (map_result_status == model.MapreduceState.RESULT_ABORTED or
                reduce_result_status == model.MapreduceState.RESULT_ABORTED):
            result_status = model.MapreduceState.RESULT_ABORTED
        elif (map_result_status == model.MapreduceState.RESULT_FAILED
              or reduce_result_status == model.MapreduceState.RESULT_FAILED):
            result_status = model.MapreduceState.RESULT_FAILED
        else:
            result_status = model.MapreduceState.RESULT_SUCCESS

        self.fill(self.outputs.result_status, result_status)
        if result_status == model.MapreduceState.RESULT_SUCCESS:
            yield pipeline_common.Return(reduce_outputs)
        else:
            yield pipeline_common.Return([])
コード例 #2
0
ファイル: mapreduce_pipeline.py プロジェクト: girum11/hang
 def run(self,
         job_name,
         mapper_spec,
         reducer_spec,
         input_reader_spec,
         output_writer_spec=None,
         mapper_params=None,
         reducer_params=None,
         shards=None,
         combiner_spec=None):
     map_pipeline = yield MapPipeline(job_name,
                                      mapper_spec,
                                      input_reader_spec,
                                      params=mapper_params,
                                      shards=shards)
     shuffler_pipeline = yield ShufflePipeline(job_name, map_pipeline)
     reducer_pipeline = yield ReducePipeline(job_name,
                                             reducer_spec,
                                             output_writer_spec,
                                             reducer_params,
                                             shuffler_pipeline,
                                             combiner_spec=combiner_spec)
     with pipeline.After(reducer_pipeline):
         all_temp_files = yield pipeline_common.Extend(
             map_pipeline, shuffler_pipeline)
         yield CleanupPipeline(all_temp_files)
     yield pipeline_common.Return(reducer_pipeline)
コード例 #3
0
 def run(self, job_name, bucket_name, filenames):
   sort_mappers = []
   for i in range(len(filenames)):
     filenames_only = util.strip_prefix_from_items("/%s/" % bucket_name,
                                                   filenames[i])
     sort_mapper = yield mapper_pipeline.MapperPipeline(
         "%s-shuffle-sort-%s" % (job_name, str(i)),
         __name__ + "._sort_records_map",
         __name__ + "._BatchGCSRecordsReader",
         None,
         {
             "input_reader": {
                 "bucket_name": bucket_name,
                 "objects": filenames_only,
             },
         },
         shards=1)
     sort_mappers.append(sort_mapper)
   with pipeline.After(*sort_mappers):
     job_ids = yield pipeline_common.Append(*[mapper.job_id for mapper in
                                              sort_mappers])
     result = yield _CollectOutputFiles(job_ids)
     with pipeline.After(result):
       yield _CleanupOutputFiles(job_ids)
     yield pipeline_common.Return(result)
コード例 #4
0
ファイル: shuffler.py プロジェクト: guisup/appengine-python
 def run(self, job_name, filenames):
     hashed_files = yield _HashPipeline(job_name, filenames)
     sorted_files = yield _SortChunksPipeline(job_name, hashed_files)
     merged_files = yield _MergePipeline(job_name, sorted_files)
     with pipeline.After(merged_files):
         all_temp_files = yield pipeline_common.Extend(
             hashed_files, sorted_files)
         yield mapper_pipeline._CleanupPipeline(all_temp_files)
     yield pipeline_common.Return(merged_files)
コード例 #5
0
  def run(self, job_name, filenames, shards=None):
    if files.shuffler.available():
      yield _ShuffleServicePipeline(job_name, filenames)
    else:
      hashed_files = yield _HashPipeline(job_name, filenames, shards=shards)
      sorted_files = yield _SortChunksPipeline(job_name, hashed_files)
      temp_files = [hashed_files, sorted_files]

      merged_files = yield _MergePipeline(job_name, sorted_files)

      with pipeline.After(merged_files):
        all_temp_files = yield pipeline_common.Extend(*temp_files)
        yield mapper_pipeline._CleanupPipeline(all_temp_files)

      yield pipeline_common.Return(merged_files)
コード例 #6
0
  def run(self, job_name, mapper_params, filenames, shards=None):
    bucket_name = mapper_params["bucket_name"]
    hashed_files = yield _HashPipeline(job_name, bucket_name,
                                       filenames, shards=shards)
    sorted_files = yield _SortChunksPipeline(job_name, bucket_name,
                                             hashed_files)
    temp_files = [hashed_files, sorted_files]

    merged_files = yield _MergePipeline(job_name, bucket_name, sorted_files)

    with pipeline.After(merged_files):
      all_temp_files = yield pipeline_common.Extend(*temp_files)
      yield _GCSCleanupPipeline(all_temp_files)

    yield pipeline_common.Return(merged_files)
コード例 #7
0
 def run(self, job_name, filenames):
     sort_mappers = []
     for i in range(len(filenames)):
         filename = filenames[i]
         sort_mapper = yield mapper_pipeline.MapperPipeline(
             "%s-shuffle-sort-%s" % (job_name, str(i)),
             __name__ + "._sort_records_map",
             __name__ + "._BatchRecordsReader",
             None, {
                 "files": [filename],
                 "processing_rate": 1000000,
             },
             shards=1)
         sort_mappers.append(sort_mapper)
     with pipeline.After(*sort_mappers):
         job_ids = yield pipeline_common.Append(
             *[mapper.job_id for mapper in sort_mappers])
         result = yield _CollectOutputFiles(job_ids)
         with pipeline.After(result):
             yield _CleanupOutputFiles(job_ids)
         yield pipeline_common.Return(result)