def testSuccessfulRun(self): p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"]) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request self.assertTrue(request) self.assertTrue(request.shuffle_name().startswith("testjob-")) self.assertEquals(2, len(request.input_list())) self.assertEquals(1, request.input(0).format()) self.assertEquals("file1", request.input(0).path()) self.assertEquals(1, request.input(1).format()) self.assertEquals("file2", request.input(1).path()) self.assertEquals(2, len(request.output().path_list())) callback = request.callback() self.assertTrue(callback.url().startswith( "/mapreduce/pipeline/callback?pipeline_id=")) self.assertEquals(self.version_id, callback.app_version_id()) self.assertEquals("GET", callback.method()) self.assertEquals("default", callback.queue()) callback_task = { "url": callback.url(), "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertTrue(p.has_finalized) output_files = p.outputs.default.value self.assertEquals(2, len(output_files)) self.assertTrue(output_files[0].startswith("/blobstore/")) self.assertTrue(output_files[1].startswith("/blobstore/"))
def testNoInputFile(self): p = shuffler._ShuffleServicePipeline("testjob", []) p.start() test_support.execute_until_empty(self.taskqueue) # No shuffle request. request = self.file_service.shuffle_request self.assertEqual(None, request) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertEqual([], p.outputs.default.value)
def testSuccessfulRun_CallbackOnDefaultVersion(self): os.environ["CURRENT_MODULE_ID"] = "default" input_file1 = self._CreateInputFile() input_file2 = self._CreateInputFile() p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2]) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request callback = request.callback() self.assertTrue(callback.url().startswith( "/mapreduce/pipeline/callback?pipeline_id=")) self.assertEquals(self.major_version_id, callback.app_version_id()) self.assertEquals("GET", callback.method()) self.assertEquals("default", callback.queue())
def testNoData(self): input_file = files.blobstore.create() files.finalize(input_file) input_file = files.blobstore.get_file_name( files.blobstore.get_blob_key(input_file)) p = shuffler._ShuffleServicePipeline("testjob", [input_file, input_file]) p.start() test_support.execute_until_empty(self.taskqueue) # No shuffle request. request = self.file_service.shuffle_request self.assertEqual(None, request) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertEqual([], p.outputs.default.value)
def run(self, job_name, shuffler_params, filenames, shards=None): if files.shuffler.available(): yield _ShuffleServicePipeline(job_name, filenames) else: hashed_files = yield _HashGSPipeline(job_name, filenames, shards=shards) sorted_files = yield _SortChunksPipeline(job_name, hashed_files) temp_files = [hashed_files, sorted_files] merged_files = yield _MergeGSPipeline(job_name, sorted_files) with pipeline.After(merged_files): all_temp_files = yield pipeline_common.Extend(*temp_files) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(merged_files)
def testError(self): p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"]) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request callback = request.callback() callback_task = { "url": callback.url() + "&error=1", "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertTrue(p.was_aborted)
def testError(self): input_file1 = self._CreateInputFile() input_file2 = self._CreateInputFile() p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2]) self.assertEquals(1, p.current_attempt) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request callback = request.callback() callback_task = { "url": callback.url() + "&error=1", "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertEquals(2, p.current_attempt)