def testReturn(self):
   self.assertEquals(
       1234, self.run_pipeline(common.Return(1234)).default.value)
   self.assertEquals(
       'hi there',
       self.run_pipeline(common.Return('hi there')).default.value)
   self.assertTrue(self.run_pipeline(common.Return()).default.value is None)
Esempio n. 2
0
    def run(self, map_result_status, reduce_result_status, reduce_outputs):

        if (map_result_status == model.MapreduceState.RESULT_ABORTED or
                reduce_result_status == model.MapreduceState.RESULT_ABORTED):
            result_status = model.MapreduceState.RESULT_ABORTED
        elif (map_result_status == model.MapreduceState.RESULT_FAILED
              or reduce_result_status == model.MapreduceState.RESULT_FAILED):
            result_status = model.MapreduceState.RESULT_FAILED
        else:
            result_status = model.MapreduceState.RESULT_SUCCESS

        self.fill(self.outputs.result_status, result_status)
        if result_status == model.MapreduceState.RESULT_SUCCESS:
            yield pipeline_common.Return(reduce_outputs)
        else:
            yield pipeline_common.Return([])
Esempio n. 3
0
 def run(self, job_keys):
     project = get_shub_project()
     jobs = [project.job(x) for x in job_keys]
     unfinished = [x for x in jobs if x.info['state'] != 'finished']
     logging.info("Waiting for %s unfinished spiders", len(unfinished))
     if unfinished:
         # Try again in 30 seconds
         with pipeline.InOrder():
             yield common.Delay(seconds=30)
             yield WaitForJobs(job_keys)
     else:
         yield common.Return(True)
Esempio n. 4
0
    def run(self, job_name, mapper_params, filenames, shards=None):
        bucket_name = mapper_params["bucket_name"]
        hashed_files = yield _HashPipeline(job_name,
                                           bucket_name,
                                           filenames,
                                           shards=shards)
        sorted_files = yield _SortChunksPipeline(job_name, bucket_name,
                                                 hashed_files)
        temp_files = [hashed_files, sorted_files]

        merged_files = yield _MergePipeline(job_name, bucket_name,
                                            sorted_files)

        with pipeline.After(merged_files):
            all_temp_files = yield pipeline_common.Extend(*temp_files)
            yield _GCSCleanupPipeline(all_temp_files)

        yield pipeline_common.Return(merged_files)
Esempio n. 5
0
 def run(self, job_name, bucket_name, filenames):
     sort_mappers = []
     for i in range(len(filenames)):
         filenames_only = util.strip_prefix_from_items(
             "/%s/" % bucket_name, filenames[i])
         sort_mapper = yield mapper_pipeline.MapperPipeline(
             "%s-shuffle-sort-%s" % (job_name, str(i)),
             __name__ + "._sort_records_map",
             __name__ + "._BatchGCSRecordsReader",
             None, {
                 "input_reader": {
                     "bucket_name": bucket_name,
                     "objects": filenames_only,
                 },
             },
             shards=1)
         sort_mappers.append(sort_mapper)
     with pipeline.After(*sort_mappers):
         job_ids = yield pipeline_common.Append(
             *[mapper.job_id for mapper in sort_mappers])
         result = yield _CollectOutputFiles(job_ids)
         with pipeline.After(result):
             yield _CleanupOutputFiles(job_ids)
         yield pipeline_common.Return(result)