def testSuccessfulRun(self): p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"]) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request self.assertTrue(request) self.assertTrue(request.shuffle_name().startswith("testjob-")) self.assertEquals(2, len(request.input_list())) self.assertEquals(1, request.input(0).format()) self.assertEquals("file1", request.input(0).path()) self.assertEquals(1, request.input(1).format()) self.assertEquals("file2", request.input(1).path()) self.assertEquals(2, len(request.output().path_list())) callback = request.callback() self.assertTrue(callback.url().startswith( "/mapreduce/pipeline/callback?pipeline_id=")) self.assertEquals(self.version_id, callback.app_version_id()) self.assertEquals("GET", callback.method()) self.assertEquals("default", callback.queue()) callback_task = { "url": callback.url(), "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertTrue(p.has_finalized) output_files = p.outputs.default.value self.assertEquals(2, len(output_files)) self.assertTrue(output_files[0].startswith("/blobstore/")) self.assertTrue(output_files[1].startswith("/blobstore/"))
def validate_map_started(self): # Only one kickoff task. tasks = self.taskqueue.GetTasks(self.config.queue_name) self.assertEqual(1, len(tasks)) self.taskqueue.FlushQueue(self.config.queue_name) # Hook was run. self.assertEqual(1, len(TestHooks.enqueue_kickoff_task_calls)) # Check the task. task = tasks[0] self.assertTrue(task["url"].startswith(self.config._base_path)) # Check task header. headers = dict(task["headers"]) self.assertEqual(self.config.job_id, headers[util._MR_ID_TASK_HEADER]) # Check task payload. task_mr_id = test_support.decode_task_payload(task).get("mapreduce_id") self.assertEqual(self.config.job_id, task_mr_id) # Check state. state = model.MapreduceState.get_by_job_id(self.config.job_id) self.assertTrue(state.active) self.assertEqual(0, state.active_shards) test_support.execute_task(task) # controller + shard tasks. tasks = self.taskqueue.GetTasks(self.config.queue_name) self.assertEqual(1 + self.config.shard_count, len(tasks)) state = model.MapreduceState.get_by_job_id(self.config.job_id) self.assertEqual("__main__.TestHooks", state.mapreduce_spec.hooks_class_name) # Verify mapreduce_spec.mapper_spec mapper_spec = state.mapreduce_spec.mapper self.assertEqual("mapreduce.api.map_job." "mapper.Mapper", mapper_spec.handler_spec) self.assertEqual("mapreduce.api.map_job." "sample_input_reader.SampleInputReader", mapper_spec.input_reader_spec) self.assertEqual(self.config.input_reader_params, {"count": TEST_SAMPLE_INPUT_READER_COUNT}) # Verify mapreduce_spec.params. self.assertEqual(self.config.queue_name, state.mapreduce_spec.params["queue_name"]) self.assertEqual(self.config._force_writes, state.mapreduce_spec.params["force_writes"]) self.assertEqual(self.config.done_callback_url, state.mapreduce_spec.params["done_callback"]) self.assertEqual(self.config._base_path, state.mapreduce_spec.params["base_path"]) self.assertEqual(self.config.shard_max_attempts, state.mapreduce_spec.params["shard_max_attempts"]) self.assertEqual(self.config._task_max_attempts, state.mapreduce_spec.params["task_max_attempts"]) self.assertEqual(self.config._api_version, state.mapreduce_spec.params["api_version"]) self.assertEqual(self.config._api_version, map_job_config._API_VERSION)
def validate_map_started(self, mapreduce_id, queue_name=None): """Tests that the map has been started.""" queue_name = queue_name or self.QUEUE_NAME self.assertTrue(mapreduce_id) # Note: only a kickoff job is pending at this stage, shards come later. tasks = self.taskqueue.GetTasks(queue_name) self.assertEquals(1, len(tasks)) # Checks that tasks are scheduled into the future. task = tasks[0] self.assertEqual( "/mapreduce_base_path/kickoffjob_callback/" + mapreduce_id, task["url"]) handler = test_support.execute_task(task) self.assertEqual(mapreduce_id, handler.request.get("mapreduce_id")) state = model.MapreduceState.get_by_job_id(mapreduce_id) params = map_job.JobConfig._get_default_mr_params() params.update({ "foo": "bar", "base_path": "/mapreduce_base_path", "queue_name": queue_name }) self.assertEqual(state.mapreduce_spec.params, params) job_config = map_job.JobConfig._to_map_job_config( state.mapreduce_spec, queue_name) self.assertEqual(0, job_config._api_version) return task["eta"]
def validate_map_started(self, mapreduce_id, queue_name=None): """Tests that the map has been started.""" queue_name = queue_name or self.QUEUE_NAME self.assertTrue(mapreduce_id) # Note: only a kickoff job is pending at this stage, shards come later. tasks = self.taskqueue.GetTasks(queue_name) self.assertEquals(1, len(tasks)) # Checks that tasks are scheduled into the future. task = tasks[0] self.assertEqual("/mapreduce_base_path/kickoffjob_callback/" + mapreduce_id, task["url"]) handler = test_support.execute_task(task) self.assertEqual(mapreduce_id, handler.request.get("mapreduce_id")) state = model.MapreduceState.get_by_job_id(mapreduce_id) params = map_job.JobConfig._get_default_mr_params() params.update({"foo": "bar", "base_path": "/mapreduce_base_path", "queue_name": queue_name}) self.assertEqual(state.mapreduce_spec.params, params) job_config = map_job.JobConfig._to_map_job_config(state.mapreduce_spec, queue_name) self.assertEqual(0, job_config._api_version) return task["eta"]
def testError(self): p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"]) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request callback = request.callback() callback_task = { "url": callback.url() + "&error=1", "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertTrue(p.was_aborted)
def testGetStatus(self): job = map_job.Job.submit(self.config) self.assertEqual(map_job.Job.RUNNING, job.get_status()) self.assertEqual(self.config, job.job_config) # Execute Kickoff task. tasks = self.taskqueue.GetTasks(self.config.queue_name) self.taskqueue.FlushQueue(self.config.queue_name) task = tasks[0] test_support.execute_task(task) job = map_job.Job.get_job_by_id(job_id=self.config.job_id) self.assertEqual(map_job.Job.RUNNING, job.get_status()) self.assertEqual(self.config, job.job_config) # Execute all tasks. test_support.execute_until_empty(self.taskqueue) # Old job instance will get most up to date status from db. self.assertEqual(map_job.Job.SUCCESS, job.get_status())
def testError(self): input_file1 = self._CreateInputFile() input_file2 = self._CreateInputFile() p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2]) self.assertEquals(1, p.current_attempt) p.start() test_support.execute_until_empty(self.taskqueue) request = self.file_service.shuffle_request callback = request.callback() callback_task = { "url": callback.url() + "&error=1", "method": callback.method(), } test_support.execute_task(callback_task) test_support.execute_until_empty(self.taskqueue) p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id) self.assertEquals(2, p.current_attempt)
def validate_map_started(self, mapreduce_id, queue_name=None): """Tests that the map has been started.""" queue_name = queue_name or self.QUEUE_NAME self.assertTrue(mapreduce_id) # Note: only a kickoff job is pending at this stage, shards come later. tasks = self.taskqueue.GetTasks(queue_name) self.assertEquals(1, len(tasks)) # Checks that tasks are scheduled into the future. task = tasks[0] self.assertEquals("/mapreduce_base_path/kickoffjob_callback", task["url"]) handler = test_support.execute_task(task) mapreduce_spec = model.MapreduceSpec.from_json_str( handler.request.get("mapreduce_spec")) self.assertTrue(mapreduce_spec) self.assertEquals(mapreduce_id, mapreduce_spec.mapreduce_id) self.assertEquals({"foo": "bar"}, mapreduce_spec.params) return task["eta"]