Beispiel #1
0
    def from_config(cls, actions_config, cleanup_action_config=None):
        """Create this graph from a job config."""
        actions = {
            maybe_decode(name): action.Action.from_config(conf)
            for name, conf in actions_config.items()
        }
        if cleanup_action_config:
            cleanup_action = action.Action.from_config(cleanup_action_config)
            actions[maybe_decode(cleanup_action.name)] = cleanup_action

        return cls(cls._build_dag(actions, actions_config), actions)
Beispiel #2
0
    def __init__(
        self,
        job_name,
        run_num,
        run_time,
        node,
        output_path=None,
        base_context=None,
        action_runs=None,
        action_graph=None,
        manual=None,
    ):
        super(JobRun, self).__init__()
        self.job_name = maybe_decode(job_name)
        self.run_num = run_num
        self.run_time = run_time
        self.node = node
        self.output_path = output_path or filehandler.OutputPath()
        self.output_path.append(self.id)
        self.action_runs_proxy = None
        self._action_runs = None
        self.action_graph = action_graph
        self.manual = manual

        if action_runs:
            self.action_runs = action_runs

        self.context = command_context.build_context(self, base_context)
Beispiel #3
0
    def state_data(self):
        """This data is used to serialize the state of this action run."""
        rendered_command = self.rendered_command

        if isinstance(self.action_runner, NoActionRunnerFactory):
            action_runner = None
        else:
            action_runner = dict(
                status_path=self.action_runner.status_path,
                exec_path=self.action_runner.exec_path,
            )
        # Freeze command after it's run
        command = rendered_command if rendered_command else self.bare_command
        return {
            'job_run_id': self.job_run_id,
            'action_name': self.action_name,
            'state': self.state,
            'start_time': self.start_time,
            'end_time': self.end_time,
            'command': maybe_decode(command),
            'rendered_command': maybe_decode(self.rendered_command),
            'node_name': self.node.get_name() if self.node else None,
            'exit_status': self.exit_status,
            'retries_remaining': self.retries_remaining,
            'retries_delay': self.retries_delay,
            'exit_statuses': self.exit_statuses,
            'action_runner': action_runner,
            'executor': self.executor,
            'cpus': self.cpus,
            'mem': self.mem,
            'disk': self.disk,
            'constraints': self.constraints,
            'docker_image': self.docker_image,
            'docker_parameters': self.docker_parameters,
            'env': self.env,
            'extra_volumes': self.extra_volumes,
            'mesos_task_id': self.mesos_task_id,
            'trigger_downstreams': self.trigger_downstreams,
            'triggered_by': self.triggered_by,
            'on_upstream_rerun': self.on_upstream_rerun,
            'trigger_timeout_timestamp': self.trigger_timeout_timestamp,
        }
Beispiel #4
0
 def build_action_run_collection(cls, job_run, action_runner):
     """Create an ActionRunGraph from an ActionGraph and JobRun."""
     action_run_map = {
         maybe_decode(name): cls.build_run_for_action(
             job_run,
             action_inst,
             action_runner,
         )
         for name, action_inst in job_run.action_graph.action_map.items()
     }
     return ActionRunCollection(job_run.action_graph, action_run_map)
Beispiel #5
0
    def getChild(self, action_name, _):
        if not action_name:
            return self

        action_name = maybe_decode(action_name)
        if action_name in self.job_run.action_runs:
            action_run = self.job_run.action_runs[action_name]
            return ActionRunResource(action_run, self.job_run)

        return ErrorResource(
            f"Cannot find action '{action_name}' for "
            f"'{self.job_run}'"
        )
Beispiel #6
0
    def getChild(self, run_id, _):
        if not run_id:
            return self

        run_id = maybe_decode(run_id)
        run = self.get_run_from_identifier(run_id)
        if run:
            return JobRunResource(run, self.job_scheduler)

        job = self.job_scheduler.get_job()
        if run_id in job.action_graph.names():
            action_runs = job.runs.get_action_runs(run_id)
            return ActionRunHistoryResource(action_runs)

        return ErrorResource(f"Cannot find job run '{run_id}' for '{job}'")
Beispiel #7
0
Datei: job.py Projekt: Yelp/Tron
 def __init__(
     self,
     name,
     scheduler,
     queueing=True,
     all_nodes=False,
     monitoring=None,
     node_pool=None,
     enabled=True,
     action_graph=None,
     run_collection=None,
     parent_context=None,
     output_path=None,
     allow_overlap=None,
     action_runner=None,
     max_runtime=None,
     time_zone=None,
     expected_runtime=None
 ):
     super(Job, self).__init__()
     self.name = maybe_decode(name)
     self.monitoring = monitoring
     self.action_graph = action_graph
     self.scheduler = scheduler
     self.runs = run_collection
     self.queueing = queueing
     self.all_nodes = all_nodes
     self.enabled = enabled  # current enabled setting
     self.config_enabled = enabled  # enabled attribute from file
     self.node_pool = node_pool
     self.allow_overlap = allow_overlap
     self.action_runner = action_runner
     self.max_runtime = max_runtime
     self.time_zone = time_zone
     self.expected_runtime = expected_runtime
     self.output_path = output_path or filehandler.OutputPath()
     self.output_path.append(name)
     self.context = command_context.build_context(self, parent_context)
     log.info(f'{self} created')
Beispiel #8
0
    def action_run_collection_from_state(
        cls,
        job_run,
        runs_state_data,
        cleanup_action_state_data,
    ):
        action_runs = [
            cls.action_run_from_state(job_run, state_data)
            for state_data in runs_state_data
        ]
        if cleanup_action_state_data:
            action_runs.append(
                cls.action_run_from_state(
                    job_run,
                    cleanup_action_state_data,
                    cleanup=True,
                ),
            )

        action_run_map = {
            maybe_decode(action_run.action_name): action_run
            for action_run in action_runs
        }
        return ActionRunCollection(job_run.action_graph, action_run_map)
Beispiel #9
0
 def _get_dependencies(cls, actions_config, action_name):
     if action_name == action.CLEANUP_ACTION_NAME:
         return []
     return actions_config[maybe_decode(action_name)].requires
Beispiel #10
0
 def _get_dependencies(cls, actions_config, action_name):
     if action_name == action.CLEANUP_ACTION_NAME:
         return []
     return actions_config[maybe_decode(action_name)].requires
Beispiel #11
0
def write_raw(path, content):
    with open(path, 'w') as fh:
        fh.write(maybe_decode(content))
Beispiel #12
0
    def getChild(self, name, request):
        if not name:
            return self

        name = maybe_decode(name)
        return resource_from_collection(self.job_collection, name, JobResource)
Beispiel #13
0
def write_raw(path, content):
    with open(path, 'w') as fh:
        fh.write(maybe_decode(content))
Beispiel #14
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            rendered_command=maybe_decode(state_data.get('rendered_command')),
            bare_command=maybe_decode(state_data['command']),
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            exit_statuses=state_data.get('exit_statuses'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh.value),
            cpus=state_data.get('cpus'),
            mem=state_data.get('mem'),
            disk=state_data.get('disk'),
            constraints=state_data.get('constraints'),
            docker_image=state_data.get('docker_image'),
            docker_parameters=state_data.get('docker_parameters'),
            env=state_data.get('env'),
            extra_volumes=state_data.get('extra_volumes'),
            mesos_task_id=state_data.get('mesos_task_id'),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
            trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'),
        )

        # Transition running to fail unknown because exit status was missed
        # Recovery will look for unknown runs
        if run.is_active:
            run.transition_and_notify('fail_unknown')
        return run
Beispiel #15
0
    def __init__(
        self,
        job_run_id,
        name,
        node,
        bare_command=None,
        parent_context=None,
        output_path=None,
        cleanup=False,
        start_time=None,
        end_time=None,
        run_state=SCHEDULED,
        rendered_command=None,
        exit_status=None,
        action_runner=None,
        retries_remaining=None,
        retries_delay=None,
        exit_statuses=None,
        machine=None,
        executor=None,
        cpus=None,
        mem=None,
        disk=None,
        constraints=None,
        docker_image=None,
        docker_parameters=None,
        env=None,
        extra_volumes=None,
        mesos_task_id=None,
        trigger_downstreams=None,
        triggered_by=None,
        on_upstream_rerun=None,
        trigger_timeout_timestamp=None,
    ):
        super().__init__()
        self.job_run_id = maybe_decode(job_run_id)
        self.action_name = maybe_decode(name)
        self.node = node
        self.start_time = start_time
        self.end_time = end_time
        self.exit_status = exit_status
        self.bare_command = maybe_decode(bare_command)
        self.rendered_command = rendered_command
        self.action_runner = action_runner or NoActionRunnerFactory()
        self.machine = machine or Machine.from_machine(
            ActionRun.STATE_MACHINE, None, run_state
        )
        self.is_cleanup = cleanup
        self.executor = executor
        self.cpus = cpus
        self.mem = mem
        self.disk = disk
        self.constraints = constraints
        self.docker_image = docker_image
        self.docker_parameters = docker_parameters
        self.env = env
        self.extra_volumes = extra_volumes
        self.mesos_task_id = mesos_task_id
        self.output_path = output_path or filehandler.OutputPath()
        self.output_path.append(self.id)
        self.context = command_context.build_context(self, parent_context)
        self.retries_remaining = retries_remaining
        self.retries_delay = retries_delay
        self.exit_statuses = exit_statuses
        self.trigger_downstreams = trigger_downstreams
        self.triggered_by = triggered_by
        self.on_upstream_rerun = on_upstream_rerun
        self.trigger_timeout_timestamp = trigger_timeout_timestamp
        self.trigger_timeout_call = None

        if self.exit_statuses is None:
            self.exit_statuses = []

        self.action_command = None
        self.in_delay = None
Beispiel #16
0
    def getChild(self, name, request):
        if not name:
            return self

        name = maybe_decode(name)
        return resource_from_collection(self.job_collection, name, JobResource)
Beispiel #17
0
 def __init__(self, type, iden):
     self.type = maybe_decode(type)
     self.iden = maybe_decode(iden)
Beispiel #18
0
    def __init__(
        self,
        job_run_id,
        name,
        node,
        bare_command=None,
        parent_context=None,
        output_path=None,
        cleanup=False,
        start_time=None,
        end_time=None,
        run_state=SCHEDULED,
        rendered_command=None,
        exit_status=None,
        action_runner=None,
        retries_remaining=None,
        retries_delay=None,
        exit_statuses=None,
        machine=None,
        executor=None,
        cpus=None,
        mem=None,
        constraints=None,
        docker_image=None,
        docker_parameters=None,
        env=None,
        extra_volumes=None,
        mesos_task_id=None,
        trigger_downstreams=None,
        triggered_by=None,
        on_upstream_rerun=None,
    ):
        super().__init__()
        self.job_run_id = maybe_decode(job_run_id)
        self.action_name = maybe_decode(name)
        self.node = node
        self.start_time = start_time
        self.end_time = end_time
        self.exit_status = exit_status
        self.bare_command = maybe_decode(bare_command)
        self.rendered_command = rendered_command
        self.action_runner = action_runner or NoActionRunnerFactory()
        self.machine = machine or Machine.from_machine(
            ActionRun.STATE_MACHINE, None, run_state
        )
        self.is_cleanup = cleanup
        self.executor = executor
        self.cpus = cpus
        self.mem = mem
        self.constraints = constraints
        self.docker_image = docker_image
        self.docker_parameters = docker_parameters
        self.env = env
        self.extra_volumes = extra_volumes
        self.mesos_task_id = mesos_task_id
        self.output_path = output_path or filehandler.OutputPath()
        self.output_path.append(self.id)
        self.context = command_context.build_context(self, parent_context)
        self.retries_remaining = retries_remaining
        self.retries_delay = retries_delay
        self.exit_statuses = exit_statuses
        self.trigger_downstreams = trigger_downstreams
        self.triggered_by = triggered_by
        self.on_upstream_rerun = on_upstream_rerun

        if self.exit_statuses is None:
            self.exit_statuses = []

        self.action_command = None
        self.in_delay = None
Beispiel #19
0
 def _save_action(self, action_name, job_name, config):
     action_name = maybe_decode(action_name)
     full_name = f'{job_name}.{action_name}'
     self.action_map[full_name] = Action.from_config(config)
     self._actions_for_job[job_name].append(full_name)
     return full_name
Beispiel #20
0
    def from_state(
        cls,
        state_data,
        parent_context,
        output_path,
        job_run_node,
        cleanup=False,
    ):
        """Restore the state of this ActionRun from a serialized state."""
        pool_repo = node.NodePoolRepository.get_instance()

        # Support state from older version
        if 'id' in state_data:
            job_run_id, action_name = state_data['id'].rsplit('.', 1)
        else:
            job_run_id = state_data['job_run_id']
            action_name = state_data['action_name']

        job_run_node = pool_repo.get_node(
            state_data.get('node_name'),
            job_run_node,
        )

        action_runner_data = state_data.get('action_runner')
        if action_runner_data:
            action_runner = SubprocessActionRunnerFactory(**action_runner_data)
        else:
            action_runner = NoActionRunnerFactory()

        run = cls(
            job_run_id=job_run_id,
            name=action_name,
            node=job_run_node,
            parent_context=parent_context,
            output_path=output_path,
            rendered_command=maybe_decode(state_data.get('rendered_command')),
            bare_command=maybe_decode(state_data['command']),
            cleanup=cleanup,
            start_time=state_data['start_time'],
            end_time=state_data['end_time'],
            run_state=state_data['state'],
            exit_status=state_data.get('exit_status'),
            retries_remaining=state_data.get('retries_remaining'),
            retries_delay=state_data.get('retries_delay'),
            exit_statuses=state_data.get('exit_statuses'),
            action_runner=action_runner,
            executor=state_data.get('executor', ExecutorTypes.ssh.value),
            cpus=state_data.get('cpus'),
            mem=state_data.get('mem'),
            disk=state_data.get('disk'),
            constraints=state_data.get('constraints'),
            docker_image=state_data.get('docker_image'),
            docker_parameters=state_data.get('docker_parameters'),
            env=state_data.get('env'),
            extra_volumes=state_data.get('extra_volumes'),
            mesos_task_id=state_data.get('mesos_task_id'),
            trigger_downstreams=state_data.get('trigger_downstreams'),
            triggered_by=state_data.get('triggered_by'),
            on_upstream_rerun=state_data.get('on_upstream_rerun'),
            trigger_timeout_timestamp=state_data.get(
                'trigger_timeout_timestamp'),
        )

        # Transition running to fail unknown because exit status was missed
        # Recovery will look for unknown runs
        if run.is_active:
            run.transition_and_notify('fail_unknown')
        return run