def test_launch_recovery_actionruns_for_job_runs(self, mock_filter): mock_actions = ( [ mock.Mock(action_runner=NoActionRunnerFactory(), spec=SSHActionRun), mock.Mock( action_runner=SubprocessActionRunnerFactory( status_path='/tmp/foo', exec_path=('/tmp/foo')), spec=SSHActionRun, ), ], [ mock.Mock(action_runner=NoActionRunnerFactory(), spec=MesosActionRun), ], ) mock_filter.return_value = mock_actions mock_action_runner = mock.Mock(autospec=True) mock_job_run = mock.Mock() launch_recovery_actionruns_for_job_runs([mock_job_run], mock_action_runner) ssh_runs = mock_actions[0] for run in ssh_runs: assert run.recover.call_count == 1 mesos_run = mock_actions[1][0] assert mesos_run.recover.call_count == 1
def test_launch_recovery_actionruns_for_job_runs(self, mock_filter, mock_recover_action_run): mock_actions = ( [ mock.Mock(action_runner=NoActionRunnerFactory(), spec=SSHActionRun), mock.Mock( action_runner=SubprocessActionRunnerFactory( status_path='/tmp/foo', exec_path=('/tmp/foo')), spec=SSHActionRun, ), ], [ mock.Mock(action_runner=NoActionRunnerFactory(), spec=MesosActionRun), ], ) mock_filter.return_value = mock_actions mock_action_runner = mock.Mock(autospec=True) mock_job_run = mock.Mock() launch_recovery_actionruns_for_job_runs([mock_job_run], mock_action_runner) ssh_runs = mock_actions[0] calls = [ call(ssh_runs[0], mock_action_runner), call(ssh_runs[1], ssh_runs[1].action_runner) ] mock_recover_action_run.assert_has_calls(calls, any_order=True) mesos_run = mock_actions[1][0] assert mesos_run.recover.call_count == 1
def test_launch_recovery_actionruns_for_job_runs(self): with mock.patch('tron.core.recovery.filter_action_runs_needing_recovery', autospec=True) as mock_filter, \ mock.patch('tron.core.recovery.recover_action_run', autospec=True) as mock_recover_action_run: mock_actions = [ mock.Mock(action_runner=NoActionRunnerFactory(), spec=SSHActionRun), mock.Mock( action_runner=SubprocessActionRunnerFactory( status_path='/tmp/foo', exec_path=('/tmp/foo') ), spec=SSHActionRun, ), mock.Mock(action_runner=NoActionRunnerFactory(), spec=MesosActionRun), ] mock_filter.return_value = mock_actions mock_action_runner = mock.Mock(autospec=True) mock_job_run = mock.Mock() launch_recovery_actionruns_for_job_runs([mock_job_run], mock_action_runner) ssh_runs = mock_actions[:2] calls = [ call(ssh_runs[0], mock_action_runner), call(ssh_runs[1], ssh_runs[1].action_runner) ] mock_recover_action_run.assert_has_calls(calls, any_order=True) mesos_run = mock_actions[2] assert mesos_run.recover.call_count == 1
def __init__( self, job_run_id, name, node, command_config, parent_context=None, output_path=None, cleanup=False, start_time=None, end_time=None, run_state=SCHEDULED, exit_status=None, attempts=None, action_runner=None, retries_remaining=None, retries_delay=None, machine=None, executor=None, trigger_downstreams=None, triggered_by=None, on_upstream_rerun=None, trigger_timeout_timestamp=None, original_command=None, ): super().__init__() self.job_run_id = maybe_decode(job_run_id) self.action_name = maybe_decode(name) self.node = node self.start_time = start_time self.end_time = end_time self.exit_status = exit_status self.action_runner = action_runner or NoActionRunnerFactory() self.machine = machine or Machine.from_machine( ActionRun.STATE_MACHINE, None, run_state ) self.is_cleanup = cleanup self.executor = executor self.command_config = command_config self.original_command = original_command or command_config.command self.attempts = attempts or [] self.output_path = output_path or filehandler.OutputPath() self.output_path.append(self.action_name) self.context = command_context.build_context(self, parent_context) self.retries_remaining = retries_remaining self.retries_delay = retries_delay self.trigger_downstreams = trigger_downstreams self.triggered_by = triggered_by self.on_upstream_rerun = on_upstream_rerun self.trigger_timeout_timestamp = trigger_timeout_timestamp self.trigger_timeout_call = None self.action_command = None self.in_delay = None
def from_state( cls, state_data, parent_context, output_path, job_run_node, cleanup=False, ): """Restore the state of this ActionRun from a serialized state.""" pool_repo = node.NodePoolRepository.get_instance() # Support state from older version if 'id' in state_data: job_run_id, action_name = state_data['id'].rsplit('.', 1) else: job_run_id = state_data['job_run_id'] action_name = state_data['action_name'] job_run_node = pool_repo.get_node( state_data.get('node_name'), job_run_node, ) action_runner_data = state_data.get('action_runner') if action_runner_data: action_runner = SubprocessActionRunnerFactory(**action_runner_data) else: action_runner = NoActionRunnerFactory() rendered_command = state_data.get('rendered_command') run = cls( job_run_id=job_run_id, name=action_name, node=job_run_node, parent_context=parent_context, output_path=output_path, rendered_command=rendered_command, bare_command=state_data['command'], cleanup=cleanup, start_time=state_data['start_time'], end_time=state_data['end_time'], run_state=state_data['state'], exit_status=state_data.get('exit_status'), retries_remaining=state_data.get('retries_remaining'), retries_delay=state_data.get('retries_delay'), exit_statuses=state_data.get('exit_statuses'), action_runner=action_runner, executor=state_data.get('executor', ExecutorTypes.ssh), cpus=state_data.get('cpus'), mem=state_data.get('mem'), constraints=state_data.get('constraints'), docker_image=state_data.get('docker_image'), docker_parameters=state_data.get('docker_parameters'), env=state_data.get('env'), extra_volumes=state_data.get('extra_volumes'), mesos_task_id=state_data.get('mesos_task_id'), trigger_downstreams=state_data.get('trigger_downstreams'), triggered_by=state_data.get('triggered_by'), on_upstream_rerun=state_data.get('on_upstream_rerun'), ) # Transition running to fail unknown because exit status was missed if run.is_running: run._done('fail_unknown') if run.is_starting: run._exit_unsuccessful(None) return run
def __init__( self, job_run_id, name, node, bare_command=None, parent_context=None, output_path=None, cleanup=False, start_time=None, end_time=None, run_state=SCHEDULED, rendered_command=None, exit_status=None, action_runner=None, retries_remaining=None, retries_delay=None, exit_statuses=None, machine=None, executor=None, cpus=None, mem=None, constraints=None, docker_image=None, docker_parameters=None, env=None, extra_volumes=None, mesos_task_id=None, trigger_downstreams=None, triggered_by=None, on_upstream_rerun=None, ): super().__init__() self.job_run_id = maybe_decode(job_run_id) self.action_name = maybe_decode(name) self.node = node self.start_time = start_time self.end_time = end_time self.exit_status = exit_status self.bare_command = maybe_decode(bare_command) self.rendered_command = rendered_command self.action_runner = action_runner or NoActionRunnerFactory() self.machine = machine or Machine.from_machine(ActionRun.STATE_MACHINE, None, run_state) self.is_cleanup = cleanup self.executor = executor self.cpus = cpus self.mem = mem self.constraints = constraints self.docker_image = docker_image self.docker_parameters = docker_parameters self.env = env self.extra_volumes = extra_volumes self.mesos_task_id = mesos_task_id self.output_path = output_path or filehandler.OutputPath() self.output_path.append(self.id) self.context = command_context.build_context(self, parent_context) self.retries_remaining = retries_remaining self.retries_delay = retries_delay self.exit_statuses = exit_statuses self.trigger_downstreams = trigger_downstreams self.triggered_by = triggered_by self.on_upstream_rerun = on_upstream_rerun if self.exit_statuses is None: self.exit_statuses = [] self.action_command = None self.in_delay = None
def from_state( cls, state_data, parent_context, output_path, job_run_node, action_graph, cleanup=False, ): """Restore the state of this ActionRun from a serialized state.""" pool_repo = node.NodePoolRepository.get_instance() # Support state from older version if 'id' in state_data: job_run_id, action_name = state_data['id'].rsplit('.', 1) else: job_run_id = state_data['job_run_id'] action_name = state_data['action_name'] job_run_node = pool_repo.get_node( state_data.get('node_name'), job_run_node, ) action_runner_data = state_data.get('action_runner') if action_runner_data: action_runner = SubprocessActionRunnerFactory(**action_runner_data) else: action_runner = NoActionRunnerFactory() action_config = action_graph.action_map.get(action_name) if action_config: command_config = action_config.command_config else: command_config = action.ActionCommandConfig(command='') attempts = cls.attempts_from_state(state_data, command_config) run = cls( job_run_id=job_run_id, name=action_name, node=job_run_node, parent_context=parent_context, output_path=output_path, command_config=command_config, original_command=state_data.get('original_command'), cleanup=cleanup, start_time=state_data['start_time'], end_time=state_data['end_time'], run_state=state_data['state'], exit_status=state_data.get('exit_status'), attempts=attempts, retries_remaining=state_data.get('retries_remaining'), retries_delay=state_data.get('retries_delay'), action_runner=action_runner, executor=state_data.get('executor', ExecutorTypes.ssh.value), trigger_downstreams=state_data.get('trigger_downstreams'), triggered_by=state_data.get('triggered_by'), on_upstream_rerun=state_data.get('on_upstream_rerun'), trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'), ) # Transition running to fail unknown because exit status was missed # Recovery will look for unknown runs if run.is_active: run.transition_and_notify('fail_unknown') return run