Esempio n. 1
0
    def test_stop_node_wait_for_inactivation_timeout(self):
        pipeline = pipeline_pb2.Pipeline()
        self.load_proto_from_text(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'async_pipeline.pbtxt'), pipeline)
        trainer = pipeline.nodes[2].pipeline_node
        test_utils.fake_trainer_output(self._mlmd_connection,
                                       trainer,
                                       active=True)
        pipeline_uid = task_lib.PipelineUid.from_pipeline(pipeline)
        node_uid = task_lib.NodeUid(node_id='my_trainer',
                                    pipeline_uid=pipeline_uid)
        with self._mlmd_connection as m:
            pstate.PipelineState.new(m, pipeline).commit()
            with self.assertRaisesRegex(
                    status_lib.StatusNotOkError,
                    'Timed out.*waiting for execution inactivation.'
            ) as exception_context:
                pipeline_ops.stop_node(m, node_uid, timeout_secs=1.0)
            self.assertEqual(status_lib.Code.DEADLINE_EXCEEDED,
                             exception_context.exception.code)

            # Even if `wait_for_inactivation` times out, the node should be stop
            # initiated to prevent future triggers.
            pipeline_state = pstate.PipelineState.load(m, pipeline_uid)
            self.assertEqual(
                status_lib.Code.CANCELLED,
                pipeline_state.node_stop_initiated_reason(node_uid).code)
Esempio n. 2
0
    def test_stop_node_wait_for_inactivation(self):
        pipeline = pipeline_pb2.Pipeline()
        self.load_proto_from_text(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'async_pipeline.pbtxt'), pipeline)
        trainer = pipeline.nodes[2].pipeline_node
        test_utils.fake_trainer_output(self._mlmd_connection,
                                       trainer,
                                       active=True)
        pipeline_uid = task_lib.PipelineUid.from_pipeline(pipeline)
        node_uid = task_lib.NodeUid(node_id='my_trainer',
                                    pipeline_uid=pipeline_uid)
        with self._mlmd_connection as m:
            pstate.PipelineState.new(m, pipeline).commit()

            def _inactivate(execution):
                time.sleep(2.0)
                with pipeline_ops._PIPELINE_OPS_LOCK:
                    execution.last_known_state = metadata_store_pb2.Execution.COMPLETE
                    m.store.put_executions([execution])

            execution = task_gen_utils.get_executions(m, trainer)[0]
            thread = threading.Thread(target=_inactivate,
                                      args=(copy.deepcopy(execution), ))
            thread.start()
            pipeline_ops.stop_node(m, node_uid, timeout_secs=5.0)
            thread.join()

            pipeline_state = pstate.PipelineState.load(m, pipeline_uid)
            self.assertTrue(pipeline_state.is_node_stop_initiated(node_uid))

            # Restart node.
            pipeline_state = pipeline_ops.initiate_node_start(m, node_uid)
            self.assertFalse(pipeline_state.is_node_stop_initiated(node_uid))
Esempio n. 3
0
    def test_generate_task_from_active_execution(self):
        with self._mlmd_connection as m:
            # No tasks generated without active execution.
            executions = task_gen_utils.get_executions(m, self._trainer)
            self.assertIsNone(
                task_gen_utils.generate_task_from_active_execution(
                    m, self._pipeline, self._trainer, executions))

        # Next, ensure an active execution for trainer.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer)
        with self._mlmd_connection as m:
            execution = m.store.get_executions()[0]
            execution.last_known_state = metadata_store_pb2.Execution.RUNNING
            m.store.put_executions([execution])

            # Check that task can be generated.
            executions = task_gen_utils.get_executions(m, self._trainer)
            task = task_gen_utils.generate_task_from_active_execution(
                m, self._pipeline, self._trainer, executions)
            self.assertEqual(execution.id, task.execution.id)

            # Mark execution complete. No tasks should be generated.
            execution = m.store.get_executions()[0]
            execution.last_known_state = metadata_store_pb2.Execution.COMPLETE
            m.store.put_executions([execution])
            executions = task_gen_utils.get_executions(m, self._trainer)
            self.assertIsNone(
                task_gen_utils.generate_task_from_active_execution(
                    m, self._pipeline, self._trainer, executions))
Esempio n. 4
0
    def test_task_generation(self, use_task_queue):
        """Tests async pipeline task generation.

    Args:
      use_task_queue: If task queue is enabled, new tasks are only generated if
        a task with the same task_id does not already exist in the queue.
        `use_task_queue=False` is useful to test the case of task generation
        when task queue is empty (for eg: due to orchestrator restart).
    """
        # Simulate that ExampleGen has already completed successfully.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Before generation, there's 1 execution in MLMD.
        with self._mlmd_connection as m:
            executions = m.store.get_executions()
        self.assertLen(executions, 1)

        # Generate once.
        with self.subTest(generate=1):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=1,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_exec_node_task(self._transform,
                                        active_executions[0].id, tasks[0])

        # No new effects if generate called again.
        with self.subTest(generate=2):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=2,
                num_tasks_generated=0 if use_task_queue else 1,
                num_new_executions=0,
                num_active_executions=1)
            execution_id = active_executions[0].id
            if not use_task_queue:
                self._verify_exec_node_task(self._transform, execution_id,
                                            tasks[0])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])
        # Dequeue the corresponding task if task queue is enabled.
        self._dequeue_and_test(use_task_queue, self._transform,
                               active_executions[0].id)

        # Trainer execution task should be generated next.
        with self.subTest(generate=3):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            execution_id = active_executions[0].id
            self._verify_exec_node_task(self._trainer, execution_id, tasks[0])

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])
        # Dequeue the corresponding task if task queue is enabled.
        self._dequeue_and_test(use_task_queue, self._trainer, execution_id)

        # No more tasks should be generated as there are no new inputs.
        with self.subTest(generate=4):
            self._generate_and_test(use_task_queue,
                                    num_initial_executions=3,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)

        # Fake another ExampleGen run.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Both transform and trainer tasks should be generated as they both find
        # new inputs.
        with self.subTest(generate=4):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=4,
                num_tasks_generated=2,
                num_new_executions=2,
                num_active_executions=2)
            self._verify_exec_node_task(self._transform,
                                        active_executions[0].id, tasks[0])
            self._verify_exec_node_task(self._trainer, active_executions[1].id,
                                        tasks[1])

        # Re-generation will produce the same tasks when task queue enabled.
        with self.subTest(generate=5):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=6,
                num_tasks_generated=0 if use_task_queue else 2,
                num_new_executions=0,
                num_active_executions=2)
            if not use_task_queue:
                self._verify_exec_node_task(self._transform,
                                            active_executions[0].id, tasks[0])
                self._verify_exec_node_task(self._trainer,
                                            active_executions[1].id, tasks[1])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])
        # Dequeue the corresponding task.
        self._dequeue_and_test(use_task_queue, self._transform,
                               active_executions[0].id)

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[1])
        self._dequeue_and_test(use_task_queue, self._trainer,
                               active_executions[1].id)

        # Trainer should be triggered again due to transform producing new output.
        with self.subTest(generate=6):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=6,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_exec_node_task(self._trainer, active_executions[0].id,
                                        tasks[0])

        # Finally, no new tasks once trainer completes.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])
        # Dequeue corresponding task.
        self._dequeue_and_test(use_task_queue, self._trainer,
                               active_executions[0].id)
        with self.subTest(generate=7):
            self._generate_and_test(use_task_queue,
                                    num_initial_executions=7,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)
        if use_task_queue:
            self.assertTrue(self._task_queue.is_empty())
Esempio n. 5
0
    def test_tasks_generated_when_upstream_done(self, use_task_queue):
        """Tests that tasks are generated when upstream is done.

    Args:
      use_task_queue: If task queue is enabled, new tasks are only generated if
        a task with the same task_id does not already exist in the queue.
        `use_task_queue=False` is useful to test the case of task generation
        when task queue is empty (for eg: due to orchestrator restart).
    """
        # Simulate that ExampleGen has already completed successfully.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Before generation, there's 1 execution.
        with self._mlmd_connection as m:
            executions = m.store.get_executions()
        self.assertLen(executions, 1)

        # Generate once.
        with self.subTest(generate=1):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=1,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_exec_node_task(self._transform,
                                        active_executions[0].id, tasks[0])

        # Should be fine to regenerate multiple times. There should be no new
        # effects.
        with self.subTest(generate=2):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=2,
                num_tasks_generated=0 if use_task_queue else 1,
                num_new_executions=0,
                num_active_executions=1)
            if not use_task_queue:
                self._verify_exec_node_task(self._transform,
                                            active_executions[0].id, tasks[0])
        with self.subTest(generate=3):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=2,
                num_tasks_generated=0 if use_task_queue else 1,
                num_new_executions=0,
                num_active_executions=1)
            execution_id = active_executions[0].id
            if not use_task_queue:
                self._verify_exec_node_task(self._transform, execution_id,
                                            tasks[0])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])
        # Dequeue the corresponding task if task queue is enabled.
        self._dequeue_and_test(use_task_queue, self._transform, execution_id)

        # Trainer execution task should be generated when generate called again.
        with self.subTest(generate=4):
            tasks, active_executions = self._generate_and_test(
                use_task_queue,
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            execution_id = active_executions[0].id
            self._verify_exec_node_task(self._trainer, execution_id, tasks[0])

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])
        # Dequeue the corresponding task if task queue is enabled.
        self._dequeue_and_test(use_task_queue, self._trainer, execution_id)

        # No more components to execute so no tasks are generated.
        with self.subTest(generate=5):
            self._generate_and_test(use_task_queue,
                                    num_initial_executions=3,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)
        if use_task_queue:
            self.assertTrue(self._task_queue.is_empty())
Esempio n. 6
0
    def test_task_generation(self):
        # Simulate that ExampleGen has already completed successfully.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Before generation, there's 1 execution in MLMD.
        with self._mlmd_connection as m:
            executions = m.store.get_executions()
        self.assertLen(executions, 1)

        # Generate once.
        with self.subTest(generate=1):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=1,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])

        # No new effects if generate called again.
        with self.subTest(generate=2):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=0,
                num_active_executions=1)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])

        # Trainer execution task should be generated next.
        with self.subTest(generate=3):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_node_execution_task(self._trainer,
                                             active_executions[0], tasks[0])

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])

        # No more tasks should be generated as there are no new inputs.
        with self.subTest(generate=4):
            self._generate_and_test(num_initial_executions=3,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)

        # Fake another ExampleGen run.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Both transform and trainer tasks should be generated as they both find
        # new inputs.
        with self.subTest(generate=4):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=4,
                num_tasks_generated=2,
                num_new_executions=2,
                num_active_executions=2)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])
            self._verify_node_execution_task(self._trainer,
                                             active_executions[1], tasks[1])

        # Re-generation will produce the same tasks again.
        with self.subTest(generate=5):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=6,
                num_tasks_generated=2,
                num_new_executions=0,
                num_active_executions=2)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])
            self._verify_node_execution_task(self._trainer,
                                             active_executions[1], tasks[1])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[1])

        # Trainer should be triggered again due to transform producing new output.
        with self.subTest(generate=6):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=6,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_node_execution_task(self._trainer,
                                             active_executions[0], tasks[0])

        # Finally, no new tasks once trainer completes.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])
        with self.subTest(generate=7):
            self._generate_and_test(num_initial_executions=7,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)
  def test_tasks_generated_when_upstream_done(self, use_task_queue):
    """Tests that tasks are generated when upstream is done.

    Args:
      use_task_queue: If task queue is enabled, new tasks are only generated if
        a task with the same task_id does not already exist in the queue.
        `use_task_queue=False` is useful to test the case of task generation
        when task queue is empty (for eg: due to orchestrator restart).
    """
    # Simulate that ExampleGen has already completed successfully.
    otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1, 1)

    def _ensure_node_services(unused_pipeline_state, node_id):
      self.assertEqual(self._example_gen.node_info.id, node_id)
      return service_jobs.ServiceStatus.SUCCESS

    self._mock_service_job_manager.ensure_node_services.side_effect = (
        _ensure_node_services)

    # Generate once.
    with self.subTest(generate=1):
      tasks, active_executions = self._generate_and_test(
          use_task_queue,
          num_initial_executions=1,
          num_tasks_generated=1,
          num_new_executions=1,
          num_active_executions=1)
      self._verify_exec_node_task(self._transform, active_executions[0].id,
                                  tasks[0])

    self._mock_service_job_manager.ensure_node_services.assert_called()

    # Should be fine to regenerate multiple times. There should be no new
    # effects.
    with self.subTest(generate=2):
      tasks, active_executions = self._generate_and_test(
          use_task_queue,
          num_initial_executions=2,
          num_tasks_generated=0 if use_task_queue else 1,
          num_new_executions=0,
          num_active_executions=1)
      if not use_task_queue:
        self._verify_exec_node_task(self._transform, active_executions[0].id,
                                    tasks[0])
    with self.subTest(generate=3):
      tasks, active_executions = self._generate_and_test(
          use_task_queue,
          num_initial_executions=2,
          num_tasks_generated=0 if use_task_queue else 1,
          num_new_executions=0,
          num_active_executions=1)
      execution_id = active_executions[0].id
      if not use_task_queue:
        self._verify_exec_node_task(self._transform, execution_id, tasks[0])

    # Mark transform execution complete.
    otu.fake_transform_output(self._mlmd_connection, self._transform,
                              active_executions[0])
    # Dequeue the corresponding task if task queue is enabled.
    self._dequeue_and_test(use_task_queue, self._transform, execution_id)

    # Trainer execution task should be generated when generate called again.
    with self.subTest(generate=4):
      tasks, active_executions = self._generate_and_test(
          use_task_queue,
          num_initial_executions=2,
          num_tasks_generated=1,
          num_new_executions=1,
          num_active_executions=1)
      execution_id = active_executions[0].id
      self._verify_exec_node_task(self._trainer, execution_id, tasks[0])

    # Mark the trainer execution complete.
    otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                            active_executions[0])
    # Dequeue the corresponding task if task queue is enabled.
    self._dequeue_and_test(use_task_queue, self._trainer, execution_id)

    # No more components to execute, FinalizePipelineTask should be generated.
    with self.subTest(generate=5):
      tasks, _ = self._generate_and_test(
          use_task_queue,
          num_initial_executions=3,
          num_tasks_generated=1,
          num_new_executions=0,
          num_active_executions=0)
    self.assertLen(tasks, 1)
    self.assertTrue(task_lib.is_finalize_pipeline_task(tasks[0]))
    self.assertEqual(status_lib.Code.OK, tasks[0].status.code)
    if use_task_queue:
      self.assertTrue(self._task_queue.is_empty())
    def test_tasks_generated_when_upstream_done(self):
        # Simulate that ExampleGen has already completed successfully.
        otu.fake_example_gen_run(self._mlmd_connection, self._example_gen, 1,
                                 1)

        # Before generation, there's 1 execution.
        with self._mlmd_connection as m:
            executions = m.store.get_executions()
        self.assertLen(executions, 1)

        # Generate once.
        with self.subTest(generate=1):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=1,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])

        # Should be fine to regenerate multiple times. There should be no new
        # effects.
        with self.subTest(generate=2):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=0,
                num_active_executions=1)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])
        with self.subTest(generate=3):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=0,
                num_active_executions=1)
            self._verify_node_execution_task(self._transform,
                                             active_executions[0], tasks[0])

        # Mark transform execution complete.
        otu.fake_transform_output(self._mlmd_connection, self._transform,
                                  active_executions[0])

        # Trainer execution task should be generated when generate called again.
        with self.subTest(generate=4):
            tasks, active_executions = self._generate_and_test(
                num_initial_executions=2,
                num_tasks_generated=1,
                num_new_executions=1,
                num_active_executions=1)
            self._verify_node_execution_task(self._trainer,
                                             active_executions[0], tasks[0])

        # Mark the trainer execution complete.
        otu.fake_trainer_output(self._mlmd_connection, self._trainer,
                                active_executions[0])

        # No more components to execute so no tasks are generated.
        with self.subTest(generate=5):
            self._generate_and_test(num_initial_executions=3,
                                    num_tasks_generated=0,
                                    num_new_executions=0,
                                    num_active_executions=0)