Ejemplo n.º 1
0
 def __init__(self, blocking_runner=cwltool_run):
     self.runner = blocking_runner
     self.jobs = JobStore()
Ejemplo n.º 2
0
class Engine(object):
    def __init__(self, blocking_runner=cwltool_run):
        self.runner = blocking_runner
        self.jobs = JobStore()

    def submit(self, process, inputs, job_id=None):
        job = Job(job_id or str(uuid.uuid4()), process)
        job.state = Job.READY
        self.jobs.add(job)
        self.jobs.vars.update(prefix_keys(job.id, inputs))
        return job

    def get(self, job_id):
        return self.jobs.get(job_id)

    def run_all(self):
        ready = self.jobs.get_by_state(Job.READY)
        while ready:
            for job in ready:
                self.run_job(job)
                self.update_parent(job)
            ready = self.jobs.get_by_state(Job.READY)

    def update_parent(self, finished_job):
        if not finished_job.parent_id:
            return
        parent = self.jobs.get(finished_job.parent_id)
        workflow = parent.process
        for candidate in workflow.successors(finished_job.step_id):
            prereqs = [self.jobs.get_for_step(parent.id, s.id) for s in workflow.predecessors(candidate.id)]
            if all(job.state == Job.FINISHED for job in prereqs):
                self.job_ready(self.jobs.get_for_step(parent.id, candidate.id))
        if all(j.state == Job.FINISHED for j in self.jobs.get_children(parent.id)):
            outputs = {out_id: self.value_for_port(parent, out_id) for out_id in parent.process.outputs}
            self.jobs.vars.update(prefix_keys(parent.id, outputs))
            self.jobs.set_state(parent.id, Job.FINISHED)

    def run_job(self, job):
        self.jobs.set_state(job.id, Job.ACTIVE)
        if isinstance(job.process, CLITool):
            self.run_cli_tool(job)
        if isinstance(job.process, Workflow):
            self.run_workflow(job)

    def run_cli_tool(self, job):
        inputs = {k: self.jobs.vars.get(join_id(job.id, k)) for k in job.process.inputs}
        outputs = self.runner(job, inputs)
        self.jobs.vars.update(prefix_keys(job.id, outputs))
        self.jobs.set_state(job.id, Job.FINISHED)

    def run_workflow(self, job):
        for step in job.process.steps.itervalues():
            step_job = Job('.'.join([job.id, step.id]), step.process, step_id=step.id, parent_id=job.id)
            self.jobs.add(step_job)
            if not job.process.predecessors(step.id):
                self.job_ready(step_job)

    def value_for_port(self, wf_job, port_id):
        workflow = wf_job.process
        deps = {k: self.jobs.vars[join_id(wf_job.id, k)] for k in workflow.dependencies(port_id)}
        return workflow.value_for_port(port_id, deps)

    def job_ready(self, job):
        self.jobs.set_state(job.id, Job.READY)
        if not job.step_id:
            return
        parent = self.jobs.get(job.parent_id)
        step = parent.process.steps[job.step_id]
        inputs = {join_id(parent.id, inp_id): self.value_for_port(parent, inp_id) for inp_id in step.inputs}
        self.jobs.vars.update(inputs)