def from_config(cls, actions_config, cleanup_action_config=None): """Create this graph from a job config.""" actions = { maybe_decode(name): action.Action.from_config(conf) for name, conf in actions_config.items() } if cleanup_action_config: cleanup_action = action.Action.from_config(cleanup_action_config) actions[maybe_decode(cleanup_action.name)] = cleanup_action return cls(cls._build_dag(actions, actions_config), actions)
def __init__( self, job_name, run_num, run_time, node, output_path=None, base_context=None, action_runs=None, action_graph=None, manual=None, ): super(JobRun, self).__init__() self.job_name = maybe_decode(job_name) self.run_num = run_num self.run_time = run_time self.node = node self.output_path = output_path or filehandler.OutputPath() self.output_path.append(self.id) self.action_runs_proxy = None self._action_runs = None self.action_graph = action_graph self.manual = manual if action_runs: self.action_runs = action_runs self.context = command_context.build_context(self, base_context)
def state_data(self): """This data is used to serialize the state of this action run.""" rendered_command = self.rendered_command if isinstance(self.action_runner, NoActionRunnerFactory): action_runner = None else: action_runner = dict( status_path=self.action_runner.status_path, exec_path=self.action_runner.exec_path, ) # Freeze command after it's run command = rendered_command if rendered_command else self.bare_command return { 'job_run_id': self.job_run_id, 'action_name': self.action_name, 'state': self.state, 'start_time': self.start_time, 'end_time': self.end_time, 'command': maybe_decode(command), 'rendered_command': maybe_decode(self.rendered_command), 'node_name': self.node.get_name() if self.node else None, 'exit_status': self.exit_status, 'retries_remaining': self.retries_remaining, 'retries_delay': self.retries_delay, 'exit_statuses': self.exit_statuses, 'action_runner': action_runner, 'executor': self.executor, 'cpus': self.cpus, 'mem': self.mem, 'disk': self.disk, 'constraints': self.constraints, 'docker_image': self.docker_image, 'docker_parameters': self.docker_parameters, 'env': self.env, 'extra_volumes': self.extra_volumes, 'mesos_task_id': self.mesos_task_id, 'trigger_downstreams': self.trigger_downstreams, 'triggered_by': self.triggered_by, 'on_upstream_rerun': self.on_upstream_rerun, 'trigger_timeout_timestamp': self.trigger_timeout_timestamp, }
def build_action_run_collection(cls, job_run, action_runner): """Create an ActionRunGraph from an ActionGraph and JobRun.""" action_run_map = { maybe_decode(name): cls.build_run_for_action( job_run, action_inst, action_runner, ) for name, action_inst in job_run.action_graph.action_map.items() } return ActionRunCollection(job_run.action_graph, action_run_map)
def getChild(self, action_name, _): if not action_name: return self action_name = maybe_decode(action_name) if action_name in self.job_run.action_runs: action_run = self.job_run.action_runs[action_name] return ActionRunResource(action_run, self.job_run) return ErrorResource( f"Cannot find action '{action_name}' for " f"'{self.job_run}'" )
def getChild(self, run_id, _): if not run_id: return self run_id = maybe_decode(run_id) run = self.get_run_from_identifier(run_id) if run: return JobRunResource(run, self.job_scheduler) job = self.job_scheduler.get_job() if run_id in job.action_graph.names(): action_runs = job.runs.get_action_runs(run_id) return ActionRunHistoryResource(action_runs) return ErrorResource(f"Cannot find job run '{run_id}' for '{job}'")
def __init__( self, name, scheduler, queueing=True, all_nodes=False, monitoring=None, node_pool=None, enabled=True, action_graph=None, run_collection=None, parent_context=None, output_path=None, allow_overlap=None, action_runner=None, max_runtime=None, time_zone=None, expected_runtime=None ): super(Job, self).__init__() self.name = maybe_decode(name) self.monitoring = monitoring self.action_graph = action_graph self.scheduler = scheduler self.runs = run_collection self.queueing = queueing self.all_nodes = all_nodes self.enabled = enabled # current enabled setting self.config_enabled = enabled # enabled attribute from file self.node_pool = node_pool self.allow_overlap = allow_overlap self.action_runner = action_runner self.max_runtime = max_runtime self.time_zone = time_zone self.expected_runtime = expected_runtime self.output_path = output_path or filehandler.OutputPath() self.output_path.append(name) self.context = command_context.build_context(self, parent_context) log.info(f'{self} created')
def action_run_collection_from_state( cls, job_run, runs_state_data, cleanup_action_state_data, ): action_runs = [ cls.action_run_from_state(job_run, state_data) for state_data in runs_state_data ] if cleanup_action_state_data: action_runs.append( cls.action_run_from_state( job_run, cleanup_action_state_data, cleanup=True, ), ) action_run_map = { maybe_decode(action_run.action_name): action_run for action_run in action_runs } return ActionRunCollection(job_run.action_graph, action_run_map)
def _get_dependencies(cls, actions_config, action_name): if action_name == action.CLEANUP_ACTION_NAME: return [] return actions_config[maybe_decode(action_name)].requires
def write_raw(path, content): with open(path, 'w') as fh: fh.write(maybe_decode(content))
def getChild(self, name, request): if not name: return self name = maybe_decode(name) return resource_from_collection(self.job_collection, name, JobResource)
def from_state( cls, state_data, parent_context, output_path, job_run_node, cleanup=False, ): """Restore the state of this ActionRun from a serialized state.""" pool_repo = node.NodePoolRepository.get_instance() # Support state from older version if 'id' in state_data: job_run_id, action_name = state_data['id'].rsplit('.', 1) else: job_run_id = state_data['job_run_id'] action_name = state_data['action_name'] job_run_node = pool_repo.get_node( state_data.get('node_name'), job_run_node, ) action_runner_data = state_data.get('action_runner') if action_runner_data: action_runner = SubprocessActionRunnerFactory(**action_runner_data) else: action_runner = NoActionRunnerFactory() run = cls( job_run_id=job_run_id, name=action_name, node=job_run_node, parent_context=parent_context, output_path=output_path, rendered_command=maybe_decode(state_data.get('rendered_command')), bare_command=maybe_decode(state_data['command']), cleanup=cleanup, start_time=state_data['start_time'], end_time=state_data['end_time'], run_state=state_data['state'], exit_status=state_data.get('exit_status'), retries_remaining=state_data.get('retries_remaining'), retries_delay=state_data.get('retries_delay'), exit_statuses=state_data.get('exit_statuses'), action_runner=action_runner, executor=state_data.get('executor', ExecutorTypes.ssh.value), cpus=state_data.get('cpus'), mem=state_data.get('mem'), disk=state_data.get('disk'), constraints=state_data.get('constraints'), docker_image=state_data.get('docker_image'), docker_parameters=state_data.get('docker_parameters'), env=state_data.get('env'), extra_volumes=state_data.get('extra_volumes'), mesos_task_id=state_data.get('mesos_task_id'), trigger_downstreams=state_data.get('trigger_downstreams'), triggered_by=state_data.get('triggered_by'), on_upstream_rerun=state_data.get('on_upstream_rerun'), trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'), ) # Transition running to fail unknown because exit status was missed # Recovery will look for unknown runs if run.is_active: run.transition_and_notify('fail_unknown') return run
def __init__( self, job_run_id, name, node, bare_command=None, parent_context=None, output_path=None, cleanup=False, start_time=None, end_time=None, run_state=SCHEDULED, rendered_command=None, exit_status=None, action_runner=None, retries_remaining=None, retries_delay=None, exit_statuses=None, machine=None, executor=None, cpus=None, mem=None, disk=None, constraints=None, docker_image=None, docker_parameters=None, env=None, extra_volumes=None, mesos_task_id=None, trigger_downstreams=None, triggered_by=None, on_upstream_rerun=None, trigger_timeout_timestamp=None, ): super().__init__() self.job_run_id = maybe_decode(job_run_id) self.action_name = maybe_decode(name) self.node = node self.start_time = start_time self.end_time = end_time self.exit_status = exit_status self.bare_command = maybe_decode(bare_command) self.rendered_command = rendered_command self.action_runner = action_runner or NoActionRunnerFactory() self.machine = machine or Machine.from_machine( ActionRun.STATE_MACHINE, None, run_state ) self.is_cleanup = cleanup self.executor = executor self.cpus = cpus self.mem = mem self.disk = disk self.constraints = constraints self.docker_image = docker_image self.docker_parameters = docker_parameters self.env = env self.extra_volumes = extra_volumes self.mesos_task_id = mesos_task_id self.output_path = output_path or filehandler.OutputPath() self.output_path.append(self.id) self.context = command_context.build_context(self, parent_context) self.retries_remaining = retries_remaining self.retries_delay = retries_delay self.exit_statuses = exit_statuses self.trigger_downstreams = trigger_downstreams self.triggered_by = triggered_by self.on_upstream_rerun = on_upstream_rerun self.trigger_timeout_timestamp = trigger_timeout_timestamp self.trigger_timeout_call = None if self.exit_statuses is None: self.exit_statuses = [] self.action_command = None self.in_delay = None
def __init__(self, type, iden): self.type = maybe_decode(type) self.iden = maybe_decode(iden)
def __init__( self, job_run_id, name, node, bare_command=None, parent_context=None, output_path=None, cleanup=False, start_time=None, end_time=None, run_state=SCHEDULED, rendered_command=None, exit_status=None, action_runner=None, retries_remaining=None, retries_delay=None, exit_statuses=None, machine=None, executor=None, cpus=None, mem=None, constraints=None, docker_image=None, docker_parameters=None, env=None, extra_volumes=None, mesos_task_id=None, trigger_downstreams=None, triggered_by=None, on_upstream_rerun=None, ): super().__init__() self.job_run_id = maybe_decode(job_run_id) self.action_name = maybe_decode(name) self.node = node self.start_time = start_time self.end_time = end_time self.exit_status = exit_status self.bare_command = maybe_decode(bare_command) self.rendered_command = rendered_command self.action_runner = action_runner or NoActionRunnerFactory() self.machine = machine or Machine.from_machine( ActionRun.STATE_MACHINE, None, run_state ) self.is_cleanup = cleanup self.executor = executor self.cpus = cpus self.mem = mem self.constraints = constraints self.docker_image = docker_image self.docker_parameters = docker_parameters self.env = env self.extra_volumes = extra_volumes self.mesos_task_id = mesos_task_id self.output_path = output_path or filehandler.OutputPath() self.output_path.append(self.id) self.context = command_context.build_context(self, parent_context) self.retries_remaining = retries_remaining self.retries_delay = retries_delay self.exit_statuses = exit_statuses self.trigger_downstreams = trigger_downstreams self.triggered_by = triggered_by self.on_upstream_rerun = on_upstream_rerun if self.exit_statuses is None: self.exit_statuses = [] self.action_command = None self.in_delay = None
def _save_action(self, action_name, job_name, config): action_name = maybe_decode(action_name) full_name = f'{job_name}.{action_name}' self.action_map[full_name] = Action.from_config(config) self._actions_for_job[job_name].append(full_name) return full_name
def from_state( cls, state_data, parent_context, output_path, job_run_node, cleanup=False, ): """Restore the state of this ActionRun from a serialized state.""" pool_repo = node.NodePoolRepository.get_instance() # Support state from older version if 'id' in state_data: job_run_id, action_name = state_data['id'].rsplit('.', 1) else: job_run_id = state_data['job_run_id'] action_name = state_data['action_name'] job_run_node = pool_repo.get_node( state_data.get('node_name'), job_run_node, ) action_runner_data = state_data.get('action_runner') if action_runner_data: action_runner = SubprocessActionRunnerFactory(**action_runner_data) else: action_runner = NoActionRunnerFactory() run = cls( job_run_id=job_run_id, name=action_name, node=job_run_node, parent_context=parent_context, output_path=output_path, rendered_command=maybe_decode(state_data.get('rendered_command')), bare_command=maybe_decode(state_data['command']), cleanup=cleanup, start_time=state_data['start_time'], end_time=state_data['end_time'], run_state=state_data['state'], exit_status=state_data.get('exit_status'), retries_remaining=state_data.get('retries_remaining'), retries_delay=state_data.get('retries_delay'), exit_statuses=state_data.get('exit_statuses'), action_runner=action_runner, executor=state_data.get('executor', ExecutorTypes.ssh.value), cpus=state_data.get('cpus'), mem=state_data.get('mem'), disk=state_data.get('disk'), constraints=state_data.get('constraints'), docker_image=state_data.get('docker_image'), docker_parameters=state_data.get('docker_parameters'), env=state_data.get('env'), extra_volumes=state_data.get('extra_volumes'), mesos_task_id=state_data.get('mesos_task_id'), trigger_downstreams=state_data.get('trigger_downstreams'), triggered_by=state_data.get('triggered_by'), on_upstream_rerun=state_data.get('on_upstream_rerun'), trigger_timeout_timestamp=state_data.get( 'trigger_timeout_timestamp'), ) # Transition running to fail unknown because exit status was missed # Recovery will look for unknown runs if run.is_active: run.transition_and_notify('fail_unknown') return run