Ejemplo n.º 1
0
    def test_launch_recovery_actionruns_for_job_runs(self, mock_filter):
        mock_actions = (
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')),
                    spec=SSHActionRun,
                ),
            ],
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=MesosActionRun),
            ],
        )

        mock_filter.return_value = mock_actions
        mock_action_runner = mock.Mock(autospec=True)

        mock_job_run = mock.Mock()
        launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                mock_action_runner)
        ssh_runs = mock_actions[0]
        for run in ssh_runs:
            assert run.recover.call_count == 1

        mesos_run = mock_actions[1][0]
        assert mesos_run.recover.call_count == 1
Ejemplo n.º 2
0
    def test_launch_recovery_actionruns_for_job_runs(self, mock_filter,
                                                     mock_recover_action_run):
        mock_actions = (
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')),
                    spec=SSHActionRun,
                ),
            ],
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=MesosActionRun),
            ],
        )

        mock_filter.return_value = mock_actions
        mock_action_runner = mock.Mock(autospec=True)

        mock_job_run = mock.Mock()
        launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                mock_action_runner)
        ssh_runs = mock_actions[0]
        calls = [
            call(ssh_runs[0], mock_action_runner),
            call(ssh_runs[1], ssh_runs[1].action_runner)
        ]
        mock_recover_action_run.assert_has_calls(calls, any_order=True)

        mesos_run = mock_actions[1][0]
        assert mesos_run.recover.call_count == 1
Ejemplo n.º 3
0
    def test_launch_recovery_actionruns_for_job_runs(self):
        with mock.patch('tron.core.recovery.filter_action_runs_needing_recovery', autospec=True) as mock_filter, \
                mock.patch('tron.core.recovery.recover_action_run', autospec=True) as mock_recover_action_run:

            mock_actions = [
                mock.Mock(action_runner=NoActionRunnerFactory(), spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')
                    ),
                    spec=SSHActionRun,
                ),
                mock.Mock(action_runner=NoActionRunnerFactory(), spec=MesosActionRun),
            ]

            mock_filter.return_value = mock_actions
            mock_action_runner = mock.Mock(autospec=True)

            mock_job_run = mock.Mock()
            launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                    mock_action_runner)
            ssh_runs = mock_actions[:2]
            calls = [
                call(ssh_runs[0], mock_action_runner),
                call(ssh_runs[1], ssh_runs[1].action_runner)
            ]
            mock_recover_action_run.assert_has_calls(calls, any_order=True)

            mesos_run = mock_actions[2]
            assert mesos_run.recover.call_count == 1
Ejemplo n.º 4
0
 def test_recover_action_run_action_runner(self):
     action_runner = SubprocessActionRunnerFactory(
         status_path='/tmp/foo',
         exec_path='/bin/foo',
     )
     mock_node = mock.Mock()
     action_run = SSHActionRun(job_run_id="test.succeeded",
                               name="test.succeeded",
                               node=mock_node,
                               action_runner=action_runner,
                               end_time=timeutils.current_time(),
                               exit_status=0)
     action_run.machine.state = ActionRun.UNKNOWN
     recover_action_run(action_run, action_runner)
     mock_node.submit_command.assert_called_once()
     assert action_run.machine.state == ActionRun.RUNNING
     assert action_run.end_time is None
     assert action_run.exit_status is None
Ejemplo n.º 5
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        rendered_command = state_data.get('rendered_command')
        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            rendered_command=rendered_command,
            bare_command=state_data['command'],
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            exit_statuses=state_data.get('exit_statuses'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh),
            cpus=state_data.get('cpus'),
            mem=state_data.get('mem'),
            constraints=state_data.get('constraints'),
            docker_image=state_data.get('docker_image'),
            docker_parameters=state_data.get('docker_parameters'),
            env=state_data.get('env'),
            extra_volumes=state_data.get('extra_volumes'),
            mesos_task_id=state_data.get('mesos_task_id'),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
        )

        # Transition running to fail unknown because exit status was missed
        if run.is_running:
            run._done('fail_unknown')
        if run.is_starting:
            run._exit_unsuccessful(None)
        return run
Ejemplo n.º 6
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        action_graph,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        action_config = action_graph.action_map.get(action_name)
        if action_config:
            command_config = action_config.command_config
        else:
            command_config = action.ActionCommandConfig(command='')

        attempts = cls.attempts_from_state(state_data, command_config)
        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            command_config=command_config,
            original_command=state_data.get('original_command'),
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            attempts=attempts,
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh.value),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
            trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'),
        )

        # Transition running to fail unknown because exit status was missed
        # Recovery will look for unknown runs
        if run.is_active:
            run.transition_and_notify('fail_unknown')
        return run