def testReturn(self): self.assertEquals( 1234, self.run_pipeline(common.Return(1234)).default.value) self.assertEquals( 'hi there', self.run_pipeline(common.Return('hi there')).default.value) self.assertTrue(self.run_pipeline(common.Return()).default.value is None)
def run(self, map_result_status, reduce_result_status, reduce_outputs): if (map_result_status == model.MapreduceState.RESULT_ABORTED or reduce_result_status == model.MapreduceState.RESULT_ABORTED): result_status = model.MapreduceState.RESULT_ABORTED elif (map_result_status == model.MapreduceState.RESULT_FAILED or reduce_result_status == model.MapreduceState.RESULT_FAILED): result_status = model.MapreduceState.RESULT_FAILED else: result_status = model.MapreduceState.RESULT_SUCCESS self.fill(self.outputs.result_status, result_status) if result_status == model.MapreduceState.RESULT_SUCCESS: yield pipeline_common.Return(reduce_outputs) else: yield pipeline_common.Return([])
def run(self, job_keys): project = get_shub_project() jobs = [project.job(x) for x in job_keys] unfinished = [x for x in jobs if x.info['state'] != 'finished'] logging.info("Waiting for %s unfinished spiders", len(unfinished)) if unfinished: # Try again in 30 seconds with pipeline.InOrder(): yield common.Delay(seconds=30) yield WaitForJobs(job_keys) else: yield common.Return(True)
def run(self, job_name, mapper_params, filenames, shards=None): bucket_name = mapper_params["bucket_name"] hashed_files = yield _HashPipeline(job_name, bucket_name, filenames, shards=shards) sorted_files = yield _SortChunksPipeline(job_name, bucket_name, hashed_files) temp_files = [hashed_files, sorted_files] merged_files = yield _MergePipeline(job_name, bucket_name, sorted_files) with pipeline.After(merged_files): all_temp_files = yield pipeline_common.Extend(*temp_files) yield _GCSCleanupPipeline(all_temp_files) yield pipeline_common.Return(merged_files)
def run(self, job_name, bucket_name, filenames): sort_mappers = [] for i in range(len(filenames)): filenames_only = util.strip_prefix_from_items( "/%s/" % bucket_name, filenames[i]) sort_mapper = yield mapper_pipeline.MapperPipeline( "%s-shuffle-sort-%s" % (job_name, str(i)), __name__ + "._sort_records_map", __name__ + "._BatchGCSRecordsReader", None, { "input_reader": { "bucket_name": bucket_name, "objects": filenames_only, }, }, shards=1) sort_mappers.append(sort_mapper) with pipeline.After(*sort_mappers): job_ids = yield pipeline_common.Append( *[mapper.job_id for mapper in sort_mappers]) result = yield _CollectOutputFiles(job_ids) with pipeline.After(result): yield _CleanupOutputFiles(job_ids) yield pipeline_common.Return(result)