Exemple #1
0
    def test_launch_recovery_actionruns_for_job_runs(self, mock_filter):
        mock_actions = (
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')),
                    spec=SSHActionRun,
                ),
            ],
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=MesosActionRun),
            ],
        )

        mock_filter.return_value = mock_actions
        mock_action_runner = mock.Mock(autospec=True)

        mock_job_run = mock.Mock()
        launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                mock_action_runner)
        ssh_runs = mock_actions[0]
        for run in ssh_runs:
            assert run.recover.call_count == 1

        mesos_run = mock_actions[1][0]
        assert mesos_run.recover.call_count == 1
Exemple #2
0
    def test_launch_recovery_actionruns_for_job_runs(self, mock_filter,
                                                     mock_recover_action_run):
        mock_actions = (
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')),
                    spec=SSHActionRun,
                ),
            ],
            [
                mock.Mock(action_runner=NoActionRunnerFactory(),
                          spec=MesosActionRun),
            ],
        )

        mock_filter.return_value = mock_actions
        mock_action_runner = mock.Mock(autospec=True)

        mock_job_run = mock.Mock()
        launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                mock_action_runner)
        ssh_runs = mock_actions[0]
        calls = [
            call(ssh_runs[0], mock_action_runner),
            call(ssh_runs[1], ssh_runs[1].action_runner)
        ]
        mock_recover_action_run.assert_has_calls(calls, any_order=True)

        mesos_run = mock_actions[1][0]
        assert mesos_run.recover.call_count == 1
Exemple #3
0
    def test_launch_recovery_actionruns_for_job_runs(self):
        with mock.patch('tron.core.recovery.filter_action_runs_needing_recovery', autospec=True) as mock_filter, \
                mock.patch('tron.core.recovery.recover_action_run', autospec=True) as mock_recover_action_run:

            mock_actions = [
                mock.Mock(action_runner=NoActionRunnerFactory(), spec=SSHActionRun),
                mock.Mock(
                    action_runner=SubprocessActionRunnerFactory(
                        status_path='/tmp/foo', exec_path=('/tmp/foo')
                    ),
                    spec=SSHActionRun,
                ),
                mock.Mock(action_runner=NoActionRunnerFactory(), spec=MesosActionRun),
            ]

            mock_filter.return_value = mock_actions
            mock_action_runner = mock.Mock(autospec=True)

            mock_job_run = mock.Mock()
            launch_recovery_actionruns_for_job_runs([mock_job_run],
                                                    mock_action_runner)
            ssh_runs = mock_actions[:2]
            calls = [
                call(ssh_runs[0], mock_action_runner),
                call(ssh_runs[1], ssh_runs[1].action_runner)
            ]
            mock_recover_action_run.assert_has_calls(calls, any_order=True)

            mesos_run = mock_actions[2]
            assert mesos_run.recover.call_count == 1
Exemple #4
0
    def __init__(
        self,
        job_run_id,
        name,
        node,
        command_config,
        parent_context=None,
        output_path=None,
        cleanup=False,
        start_time=None,
        end_time=None,
        run_state=SCHEDULED,
        exit_status=None,
        attempts=None,
        action_runner=None,
        retries_remaining=None,
        retries_delay=None,
        machine=None,
        executor=None,
        trigger_downstreams=None,
        triggered_by=None,
        on_upstream_rerun=None,
        trigger_timeout_timestamp=None,
        original_command=None,
    ):
        super().__init__()
        self.job_run_id = maybe_decode(job_run_id)
        self.action_name = maybe_decode(name)
        self.node = node
        self.start_time = start_time
        self.end_time = end_time
        self.exit_status = exit_status
        self.action_runner = action_runner or NoActionRunnerFactory()
        self.machine = machine or Machine.from_machine(
            ActionRun.STATE_MACHINE, None, run_state
        )
        self.is_cleanup = cleanup

        self.executor = executor
        self.command_config = command_config
        self.original_command = original_command or command_config.command
        self.attempts = attempts or []
        self.output_path = output_path or filehandler.OutputPath()
        self.output_path.append(self.action_name)
        self.context = command_context.build_context(self, parent_context)
        self.retries_remaining = retries_remaining
        self.retries_delay = retries_delay
        self.trigger_downstreams = trigger_downstreams
        self.triggered_by = triggered_by
        self.on_upstream_rerun = on_upstream_rerun
        self.trigger_timeout_timestamp = trigger_timeout_timestamp
        self.trigger_timeout_call = None

        self.action_command = None
        self.in_delay = None
Exemple #5
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        rendered_command = state_data.get('rendered_command')
        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            rendered_command=rendered_command,
            bare_command=state_data['command'],
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            exit_statuses=state_data.get('exit_statuses'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh),
            cpus=state_data.get('cpus'),
            mem=state_data.get('mem'),
            constraints=state_data.get('constraints'),
            docker_image=state_data.get('docker_image'),
            docker_parameters=state_data.get('docker_parameters'),
            env=state_data.get('env'),
            extra_volumes=state_data.get('extra_volumes'),
            mesos_task_id=state_data.get('mesos_task_id'),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
        )

        # Transition running to fail unknown because exit status was missed
        if run.is_running:
            run._done('fail_unknown')
        if run.is_starting:
            run._exit_unsuccessful(None)
        return run
Exemple #6
0
    def __init__(
        self,
        job_run_id,
        name,
        node,
        bare_command=None,
        parent_context=None,
        output_path=None,
        cleanup=False,
        start_time=None,
        end_time=None,
        run_state=SCHEDULED,
        rendered_command=None,
        exit_status=None,
        action_runner=None,
        retries_remaining=None,
        retries_delay=None,
        exit_statuses=None,
        machine=None,
        executor=None,
        cpus=None,
        mem=None,
        constraints=None,
        docker_image=None,
        docker_parameters=None,
        env=None,
        extra_volumes=None,
        mesos_task_id=None,
        trigger_downstreams=None,
        triggered_by=None,
        on_upstream_rerun=None,
    ):
        super().__init__()
        self.job_run_id = maybe_decode(job_run_id)
        self.action_name = maybe_decode(name)
        self.node = node
        self.start_time = start_time
        self.end_time = end_time
        self.exit_status = exit_status
        self.bare_command = maybe_decode(bare_command)
        self.rendered_command = rendered_command
        self.action_runner = action_runner or NoActionRunnerFactory()
        self.machine = machine or Machine.from_machine(ActionRun.STATE_MACHINE,
                                                       None, run_state)
        self.is_cleanup = cleanup
        self.executor = executor
        self.cpus = cpus
        self.mem = mem
        self.constraints = constraints
        self.docker_image = docker_image
        self.docker_parameters = docker_parameters
        self.env = env
        self.extra_volumes = extra_volumes
        self.mesos_task_id = mesos_task_id
        self.output_path = output_path or filehandler.OutputPath()
        self.output_path.append(self.id)
        self.context = command_context.build_context(self, parent_context)
        self.retries_remaining = retries_remaining
        self.retries_delay = retries_delay
        self.exit_statuses = exit_statuses
        self.trigger_downstreams = trigger_downstreams
        self.triggered_by = triggered_by
        self.on_upstream_rerun = on_upstream_rerun

        if self.exit_statuses is None:
            self.exit_statuses = []

        self.action_command = None
        self.in_delay = None
Exemple #7
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        action_graph,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        action_config = action_graph.action_map.get(action_name)
        if action_config:
            command_config = action_config.command_config
        else:
            command_config = action.ActionCommandConfig(command='')

        attempts = cls.attempts_from_state(state_data, command_config)
        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            command_config=command_config,
            original_command=state_data.get('original_command'),
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            attempts=attempts,
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh.value),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
            trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'),
        )

        # Transition running to fail unknown because exit status was missed
        # Recovery will look for unknown runs
        if run.is_active:
            run.transition_and_notify('fail_unknown')
        return run