Example #1
0
 def _finish_node_execution(self, use_task_queue, exec_node_task):
   """Simulates successful execution of a node."""
   test_utils.fake_execute_node(self._mlmd_connection, exec_node_task)
   if use_task_queue:
     dequeued_task = self._task_queue.dequeue()
     self._task_queue.task_done(dequeued_task)
     self.assertEqual(exec_node_task.task_id, dequeued_task.task_id)
  def test_cached_execution(self):
    """Tests that cached execution is used if one is available."""

    # Fake ExampleGen run.
    example_gen_exec = otu.fake_example_gen_run(self._mlmd_connection,
                                                self._example_gen, 1, 1)

    # Invoking generator should produce an ExecNodeTask for StatsGen.
    [stats_gen_task] = self._generate_and_test(
        False,
        num_initial_executions=1,
        num_tasks_generated=1,
        num_new_executions=1,
        num_active_executions=1)
    self.assertEqual('my_statistics_gen', stats_gen_task.node_uid.node_id)

    # Finish StatsGen execution.
    otu.fake_execute_node(self._mlmd_connection, stats_gen_task)

    # Prepare another pipeline with a new pipeline_run_id.
    pipeline_run_id = str(uuid.uuid4())
    new_pipeline = self._make_pipeline(self._pipeline_root, pipeline_run_id)

    with self._mlmd_connection as m:
      contexts = m.store.get_contexts_by_execution(example_gen_exec.id)
      # We use node context as cache context for ease of testing.
      cache_context = [c for c in contexts if c.name == 'my_example_gen'][0]
    # Fake example_gen cached execution.
    otu.fake_cached_execution(self._mlmd_connection, cache_context,
                              otu.get_node(new_pipeline, 'my_example_gen'))

    stats_gen = otu.get_node(new_pipeline, 'my_statistics_gen')

    # Invoking generator for the new pipeline should result in:
    # 1. StatsGen execution succeeds with state "CACHED" but no ExecNodeTask
    #    generated.
    # 2. An ExecNodeTask is generated for SchemaGen (component downstream of
    #    StatsGen) with an active execution in MLMD.
    [schema_gen_task] = self._generate_and_test(
        False,
        pipeline=new_pipeline,
        num_initial_executions=3,
        num_tasks_generated=1,
        num_new_executions=2,
        num_active_executions=1)
    self.assertEqual('my_schema_gen', schema_gen_task.node_uid.node_id)

    # Check that StatsGen execution is successful in state "CACHED".
    with self._mlmd_connection as m:
      executions = task_gen_utils.get_executions(m, stats_gen)
      self.assertLen(executions, 1)
      execution = executions[0]
      self.assertTrue(execution_lib.is_execution_successful(execution))
      self.assertEqual(metadata_store_pb2.Execution.CACHED,
                       execution.last_known_state)
Example #3
0
 def _finish_node_execution(self,
                            use_task_queue,
                            exec_node_task,
                            artifact_custom_properties=None):
     """Simulates successful execution of a node."""
     self._start_processing(use_task_queue, exec_node_task)
     test_utils.fake_execute_node(
         self._mlmd_connection,
         exec_node_task,
         artifact_custom_properties=artifact_custom_properties)
     self._finish_processing(use_task_queue, exec_node_task)
Example #4
0
    def test_pipeline_succeeds_when_terminal_nodes_succeed(
            self, use_task_queue, fail_fast):
        """Tests that pipeline is finalized only after terminal nodes are successful.

    Args:
      use_task_queue: If task queue is enabled, new tasks are only generated if
        a task with the same task_id does not already exist in the queue.
        `use_task_queue=False` is useful to test the case of task generation
        when task queue is empty (for eg: due to orchestrator restart).
      fail_fast: If `True`, pipeline is aborted immediately if any node fails.
    """
        # Check the expected terminal nodes.
        layers = sptg._topsorted_layers(self._pipeline)
        self.assertEqual(
            {
                self._example_validator.node_info.id,
                self._chore_b.node_info.id,
                # evaluator execution will be skipped as it is run conditionally and
                # the condition always evaluates to False in the current test.
                self._evaluator.node_info.id,
            },
            sptg._terminal_node_ids(layers))

        # Start executing the pipeline:

        test_utils.fake_example_gen_run(self._mlmd_connection,
                                        self._example_gen, 1, 1)

        self._run_next(use_task_queue, expect_nodes=[self._stats_gen])
        self._run_next(use_task_queue, expect_nodes=[self._schema_gen])

        # Both example-validator and transform are ready to execute.
        [example_validator_task,
         transform_task] = self._generate(use_task_queue,
                                          True,
                                          fail_fast=fail_fast)
        self.assertEqual(self._example_validator.node_info.id,
                         example_validator_task.node_uid.node_id)
        self.assertEqual(self._transform.node_info.id,
                         transform_task.node_uid.node_id)
        # Start processing (but do not finish) example-validator.
        self._start_processing(use_task_queue, example_validator_task)
        # But finish transform which is in the same layer.
        self._finish_node_execution(use_task_queue, transform_task)

        # Readability note: below, example-validator task should continue to be
        # generated when not using task queue because the execution is active.

        # Trainer and downstream nodes can execute as transform is finished.
        self._run_next(use_task_queue,
                       expect_nodes=[self._trainer] if use_task_queue else
                       [self._example_validator, self._trainer],
                       finish_nodes=[self._trainer],
                       fail_fast=fail_fast)
        self._run_next(use_task_queue,
                       expect_nodes=[self._chore_a] if use_task_queue else
                       [self._example_validator, self._chore_a],
                       finish_nodes=[self._chore_a],
                       fail_fast=fail_fast)
        self._run_next(use_task_queue,
                       expect_nodes=[self._chore_b] if use_task_queue else
                       [self._example_validator, self._chore_b],
                       finish_nodes=[self._chore_b],
                       fail_fast=fail_fast)
        self._run_next(
            use_task_queue,
            expect_nodes=[] if use_task_queue else [self._example_validator],
            finish_nodes=[],
            fail_fast=fail_fast)

        # FinalizePipelineTask is generated only after example-validator finishes.
        test_utils.fake_execute_node(self._mlmd_connection,
                                     example_validator_task)
        self._finish_processing(use_task_queue, example_validator_task)
        [finalize_task] = self._generate(use_task_queue,
                                         True,
                                         fail_fast=fail_fast)
        self.assertTrue(task_lib.is_finalize_pipeline_task(finalize_task))
        self.assertEqual(status_lib.Code.OK, finalize_task.status.code)