def testSuccessfulRun(self):
        p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        self.assertTrue(request)
        self.assertTrue(request.shuffle_name().startswith("testjob-"))
        self.assertEquals(2, len(request.input_list()))
        self.assertEquals(1, request.input(0).format())
        self.assertEquals("file1", request.input(0).path())
        self.assertEquals(1, request.input(1).format())
        self.assertEquals("file2", request.input(1).path())
        self.assertEquals(2, len(request.output().path_list()))

        callback = request.callback()
        self.assertTrue(callback.url().startswith(
            "/mapreduce/pipeline/callback?pipeline_id="))
        self.assertEquals(self.version_id, callback.app_version_id())
        self.assertEquals("GET", callback.method())
        self.assertEquals("default", callback.queue())

        callback_task = {
            "url": callback.url(),
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertTrue(p.has_finalized)
        output_files = p.outputs.default.value
        self.assertEquals(2, len(output_files))
        self.assertTrue(output_files[0].startswith("/blobstore/"))
        self.assertTrue(output_files[1].startswith("/blobstore/"))
  def testSuccessfulRun(self):
    p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    self.assertTrue(request)
    self.assertTrue(request.shuffle_name().startswith("testjob-"))
    self.assertEquals(2, len(request.input_list()))
    self.assertEquals(1, request.input(0).format())
    self.assertEquals("file1", request.input(0).path())
    self.assertEquals(1, request.input(1).format())
    self.assertEquals("file2", request.input(1).path())
    self.assertEquals(2, len(request.output().path_list()))

    callback = request.callback()
    self.assertTrue(callback.url().startswith(
        "/mapreduce/pipeline/callback?pipeline_id="))
    self.assertEquals(self.version_id, callback.app_version_id())
    self.assertEquals("GET", callback.method())
    self.assertEquals("default", callback.queue())

    callback_task = {
        "url": callback.url(),
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertTrue(p.has_finalized)
    output_files = p.outputs.default.value
    self.assertEquals(2, len(output_files))
    self.assertTrue(output_files[0].startswith("/blobstore/"))
    self.assertTrue(output_files[1].startswith("/blobstore/"))
예제 #3
0
  def validate_map_started(self):
    # Only one kickoff task.
    tasks = self.taskqueue.GetTasks(self.config.queue_name)
    self.assertEqual(1, len(tasks))
    self.taskqueue.FlushQueue(self.config.queue_name)
    # Hook was run.
    self.assertEqual(1, len(TestHooks.enqueue_kickoff_task_calls))

    # Check the task.
    task = tasks[0]
    self.assertTrue(task["url"].startswith(self.config._base_path))
    # Check task header.
    headers = dict(task["headers"])
    self.assertEqual(self.config.job_id, headers[util._MR_ID_TASK_HEADER])
    # Check task payload.
    task_mr_id = test_support.decode_task_payload(task).get("mapreduce_id")
    self.assertEqual(self.config.job_id, task_mr_id)

    # Check state.
    state = model.MapreduceState.get_by_job_id(self.config.job_id)
    self.assertTrue(state.active)
    self.assertEqual(0, state.active_shards)

    test_support.execute_task(task)

    # controller + shard tasks.
    tasks = self.taskqueue.GetTasks(self.config.queue_name)
    self.assertEqual(1 + self.config.shard_count, len(tasks))

    state = model.MapreduceState.get_by_job_id(self.config.job_id)
    self.assertEqual("__main__.TestHooks",
                     state.mapreduce_spec.hooks_class_name)

    # Verify mapreduce_spec.mapper_spec
    mapper_spec = state.mapreduce_spec.mapper
    self.assertEqual("mapreduce.api.map_job."
                     "mapper.Mapper",
                     mapper_spec.handler_spec)
    self.assertEqual("mapreduce.api.map_job."
                     "sample_input_reader.SampleInputReader",
                     mapper_spec.input_reader_spec)
    self.assertEqual(self.config.input_reader_params,
                     {"count": TEST_SAMPLE_INPUT_READER_COUNT})
    # Verify mapreduce_spec.params.
    self.assertEqual(self.config.queue_name,
                     state.mapreduce_spec.params["queue_name"])
    self.assertEqual(self.config._force_writes,
                     state.mapreduce_spec.params["force_writes"])
    self.assertEqual(self.config.done_callback_url,
                     state.mapreduce_spec.params["done_callback"])
    self.assertEqual(self.config._base_path,
                     state.mapreduce_spec.params["base_path"])
    self.assertEqual(self.config.shard_max_attempts,
                     state.mapreduce_spec.params["shard_max_attempts"])
    self.assertEqual(self.config._task_max_attempts,
                     state.mapreduce_spec.params["task_max_attempts"])
    self.assertEqual(self.config._api_version,
                     state.mapreduce_spec.params["api_version"])
    self.assertEqual(self.config._api_version, map_job_config._API_VERSION)
    def validate_map_started(self, mapreduce_id, queue_name=None):
        """Tests that the map has been started."""
        queue_name = queue_name or self.QUEUE_NAME
        self.assertTrue(mapreduce_id)

        # Note: only a kickoff job is pending at this stage, shards come later.
        tasks = self.taskqueue.GetTasks(queue_name)
        self.assertEquals(1, len(tasks))
        # Checks that tasks are scheduled into the future.
        task = tasks[0]
        self.assertEqual(
            "/mapreduce_base_path/kickoffjob_callback/" + mapreduce_id,
            task["url"])
        handler = test_support.execute_task(task)
        self.assertEqual(mapreduce_id, handler.request.get("mapreduce_id"))
        state = model.MapreduceState.get_by_job_id(mapreduce_id)
        params = map_job.JobConfig._get_default_mr_params()
        params.update({
            "foo": "bar",
            "base_path": "/mapreduce_base_path",
            "queue_name": queue_name
        })
        self.assertEqual(state.mapreduce_spec.params, params)

        job_config = map_job.JobConfig._to_map_job_config(
            state.mapreduce_spec, queue_name)
        self.assertEqual(0, job_config._api_version)
        return task["eta"]
예제 #5
0
  def validate_map_started(self, mapreduce_id, queue_name=None):
    """Tests that the map has been started."""
    queue_name = queue_name or self.QUEUE_NAME
    self.assertTrue(mapreduce_id)

    # Note: only a kickoff job is pending at this stage, shards come later.
    tasks = self.taskqueue.GetTasks(queue_name)
    self.assertEquals(1, len(tasks))
    # Checks that tasks are scheduled into the future.
    task = tasks[0]
    self.assertEqual("/mapreduce_base_path/kickoffjob_callback/" + mapreduce_id,
                     task["url"])
    handler = test_support.execute_task(task)
    self.assertEqual(mapreduce_id, handler.request.get("mapreduce_id"))
    state = model.MapreduceState.get_by_job_id(mapreduce_id)
    params = map_job.JobConfig._get_default_mr_params()
    params.update({"foo": "bar",
                   "base_path": "/mapreduce_base_path",
                   "queue_name": queue_name})
    self.assertEqual(state.mapreduce_spec.params, params)

    job_config = map_job.JobConfig._to_map_job_config(state.mapreduce_spec,
                                                      queue_name)
    self.assertEqual(0, job_config._api_version)
    return task["eta"]
    def testError(self):
        p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        callback = request.callback()

        callback_task = {
            "url": callback.url() + "&error=1",
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertTrue(p.was_aborted)
예제 #7
0
  def testError(self):
    p = shuffler._ShuffleServicePipeline("testjob", ["file1", "file2"])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    callback = request.callback()

    callback_task = {
        "url": callback.url() + "&error=1",
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertTrue(p.was_aborted)
예제 #8
0
  def testGetStatus(self):
    job = map_job.Job.submit(self.config)
    self.assertEqual(map_job.Job.RUNNING, job.get_status())
    self.assertEqual(self.config, job.job_config)

    # Execute Kickoff task.
    tasks = self.taskqueue.GetTasks(self.config.queue_name)
    self.taskqueue.FlushQueue(self.config.queue_name)
    task = tasks[0]
    test_support.execute_task(task)

    job = map_job.Job.get_job_by_id(job_id=self.config.job_id)
    self.assertEqual(map_job.Job.RUNNING, job.get_status())
    self.assertEqual(self.config, job.job_config)

    # Execute all tasks.
    test_support.execute_until_empty(self.taskqueue)
    # Old job instance will get most up to date status from db.
    self.assertEqual(map_job.Job.SUCCESS, job.get_status())
예제 #9
0
  def testError(self):
    input_file1 = self._CreateInputFile()
    input_file2 = self._CreateInputFile()

    p = shuffler._ShuffleServicePipeline("testjob", [input_file1, input_file2])
    self.assertEquals(1, p.current_attempt)
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    request = self.file_service.shuffle_request
    callback = request.callback()

    callback_task = {
        "url": callback.url() + "&error=1",
        "method": callback.method(),
        }
    test_support.execute_task(callback_task)
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
    self.assertEquals(2, p.current_attempt)
    def testError(self):
        input_file1 = self._CreateInputFile()
        input_file2 = self._CreateInputFile()

        p = shuffler._ShuffleServicePipeline("testjob",
                                             [input_file1, input_file2])
        self.assertEquals(1, p.current_attempt)
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        request = self.file_service.shuffle_request
        callback = request.callback()

        callback_task = {
            "url": callback.url() + "&error=1",
            "method": callback.method(),
        }
        test_support.execute_task(callback_task)
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler._ShuffleServicePipeline.from_id(p.pipeline_id)
        self.assertEquals(2, p.current_attempt)
  def validate_map_started(self, mapreduce_id, queue_name=None):
    """Tests that the map has been started."""
    queue_name = queue_name or self.QUEUE_NAME
    self.assertTrue(mapreduce_id)

    # Note: only a kickoff job is pending at this stage, shards come later.
    tasks = self.taskqueue.GetTasks(queue_name)
    self.assertEquals(1, len(tasks))
    # Checks that tasks are scheduled into the future.
    task = tasks[0]
    self.assertEquals("/mapreduce_base_path/kickoffjob_callback", task["url"])
    handler = test_support.execute_task(task)
    mapreduce_spec = model.MapreduceSpec.from_json_str(
        handler.request.get("mapreduce_spec"))
    self.assertTrue(mapreduce_spec)
    self.assertEquals(mapreduce_id, mapreduce_spec.mapreduce_id)
    self.assertEquals({"foo": "bar"}, mapreduce_spec.params)

    return task["eta"]