def _init_job(self, joborder, input_basedir, **kwargs): builder = Builder() builder.job = copy.deepcopy(joborder) for i in self.tool["inputs"]: (_, d) = urlparse.urldefrag(i["id"]) if d not in builder.job and "default" in i: builder.job[d] = i["default"] # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) for r in self.tool.get("requirements", []): if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) builder.input_basedir = input_basedir builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.docpath = self.docpath builder.names = self.names builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) return builder
def validate_hints(self, hints, strict): for r in hints: try: if self.names.get_name(r["class"], "") is not None: validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict) else: _logger.info(validate.ValidationException("Unknown hint %s" % (r["class"]))) except validate.ValidationException as v: raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) kwargs["part_of"] = "workflow %s" % (id(self)) wj = WorkflowJob(self, **kwargs) yield wj for w in wj.job(joborder, basedir, output_callback, **kwargs): yield w
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) hints = kwargs.get("hints", []) + self.tool.get("hints", []) steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])] random.shuffle(steps) self.state = {} self.processStatus = "success" for i in self.tool["inputs"]: (_, iid) = urlparse.urldefrag(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"])) else: raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in steps: for out in s.tool["outputs"]: self.state[out["id"]] = None s.completed = False completed = 0 while completed < len(steps): made_progress = False completed = 0 for step in steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, requirements=requirements, hints=hints, **kwargs): if newjob: made_progress = True yield newjob if not made_progress and completed < len(steps): yield None wo = {} for i in self.tool["outputs"]: if "connect" in i: (_, src) = urlparse.urldefrag(i['id']) if i["connect"]["source"] not in self.state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (i["connect"]["source"], inp["id"])) wo[src] = self.state[i["connect"]["source"]].value output_callback(wo, self.processStatus)
def validate_hints(self, hints, strict): for r in hints: try: if self.names.get_name(r["class"], "") is not None: validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict) else: _logger.info( validate.ValidationException("Unknown hint %s" % (r["class"]))) except validate.ValidationException as v: raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
def collect_output_ports(self, ports, builder, outdir): try: custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc) return outputdoc ret = {} for port in ports: doc_url, fragment = urlparse.urldefrag(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def validate_requirements(self, tool, field): for r in tool.get(field, []): try: if self.names.get_name(r["class"], "") is None: raise validate.ValidationException("Unknown requirement %s" % (r["class"])) validate.validate_ex(self.names.get_name(r["class"], ""), r) if "requirements" in r: self.validate_requirements(r, "requirements") if "hints" in r: self.validate_requirements(r, "hints") except validate.ValidationException as v: err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v))) if field == "hints": _logger.warn(err) else: raise validate.ValidationException(err)
def _init_job(self, joborder, input_basedir, **kwargs): builder = Builder() builder.job = copy.deepcopy(joborder) for i in self.tool["inputs"]: (_, d) = urlparse.urldefrag(i["id"]) if d not in builder.job and "default" in i: builder.job[d] = i["default"] # Validate job order try: validate.validate_ex( self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) for r in self.requirements: if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/tmp/job_output" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp/job_tmp" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess( input_basedir) builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) return builder
def collect_output_ports(self, ports, builder, outdir): try: custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), outputdoc) return outputdoc ret = {} for port in ports: doc_url, fragment = urlparse.urldefrag(port['id']) ret[fragment] = self.collect_output(port, builder, outdir) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def _init_job(self, joborder, input_basedir, **kwargs): builder = Builder() builder.job = copy.deepcopy(joborder) for i in self.tool["inputs"]: (_, d) = urlparse.urldefrag(i["id"]) if d not in builder.job and "default" in i: builder.job[d] = i["default"] # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) for r in self.requirements: if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/tmp/job_output" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp/job_tmp" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) return builder
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) self.adjust_for_scatter(self.steps) random.shuffle(self.steps) self.state = {} self.processStatus = "success" for i in self.tool["inputs"]: (_, iid) = urlparse.urldefrag(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"])) else: raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in self.steps: for out in s.tool["outputs"]: self.state[out["id"]] = None s.submitted = False s.completed = False if "outdir" in kwargs: outdir = kwargs["outdir"] del kwargs["outdir"] else: outdir = tempfile.mkdtemp() actual_jobs = [] completed = 0 while completed < len(self.steps) and self.processStatus == "success": made_progress = False completed = 0 for step in self.steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, **kwargs): if newjob: made_progress = True actual_jobs.append(newjob) yield newjob if not made_progress and completed < len(self.steps): yield None wo = self.object_from_state(self.tool["outputs"], True) if kwargs.get("move_outputs", True): targets = set() conflicts = set() for f in findfiles(wo): for a in actual_jobs: if a.outdir and f["path"].startswith(a.outdir): src = f["path"] dst = os.path.join(outdir, src[len(a.outdir)+1:]) if dst in targets: conflicts.add(dst) else: targets.add(dst) for f in findfiles(wo): for a in actual_jobs: if a.outdir and f["path"].startswith(a.outdir): src = f["path"] dst = os.path.join(outdir, src[len(a.outdir)+1:]) if dst in conflicts: sp = os.path.splitext(dst) dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1]) dirname = os.path.dirname(dst) if not os.path.exists(dirname): os.makedirs(dirname) _logger.info("Moving '%s' to '%s'", src, dst) shutil.move(src, dst) f["path"] = dst for a in actual_jobs: if a.outdir: _logger.info("Removing intermediate output directory %s", a.outdir) shutil.rmtree(a.outdir, True) output_callback(wo, self.processStatus)
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) self.adjust_for_scatter(self.steps) random.shuffle(self.steps) self.state = {} self.processStatus = "success" for i in self.tool["inputs"]: (_, iid) = urlparse.urldefrag(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem( i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem( i, copy.deepcopy(i["default"])) else: raise WorkflowException( "Input '%s' not in input object and does not have a default value." % (i["id"])) for s in self.steps: for out in s.tool["outputs"]: self.state[out["id"]] = None s.submitted = False s.completed = False if "outdir" in kwargs: outdir = kwargs["outdir"] del kwargs["outdir"] else: outdir = tempfile.mkdtemp() actual_jobs = [] completed = 0 while completed < len(self.steps) and self.processStatus == "success": made_progress = False completed = 0 for step in self.steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, **kwargs): if newjob: made_progress = True actual_jobs.append(newjob) yield newjob if not made_progress and completed < len(self.steps): yield None wo = self.object_from_state(self.tool["outputs"], True) if kwargs.get("move_outputs", True): targets = set() conflicts = set() for f in findfiles(wo): for a in actual_jobs: if a.outdir and f["path"].startswith(a.outdir): src = f["path"] dst = os.path.join(outdir, src[len(a.outdir) + 1:]) if dst in targets: conflicts.add(dst) else: targets.add(dst) for f in findfiles(wo): for a in actual_jobs: if a.outdir and f["path"].startswith(a.outdir): src = f["path"] dst = os.path.join(outdir, src[len(a.outdir) + 1:]) if dst in conflicts: sp = os.path.splitext(dst) dst = "%s-%s%s" % ( sp[0], str(random.randint(1, 1000000000)), sp[1]) dirname = os.path.dirname(dst) if not os.path.exists(dirname): os.makedirs(dirname) _logger.info("Moving '%s' to '%s'", src, dst) shutil.move(src, dst) f["path"] = dst for a in actual_jobs: if a.outdir: _logger.info("Removing intermediate output directory %s", a.outdir) shutil.rmtree(a.outdir, True) output_callback(wo, self.processStatus)
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException( "Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": [] } for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": [] } for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def __init__(self, toolpath_object, validateAs, docpath, **kwargs): (_, self.names) = get_schema() self.docpath = docpath self.tool = toolpath_object try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, **kwargs) except validate.ValidationException as v: raise validate.ValidationException("Could not validate %s:\n%s" % (self.tool.get("id"), validate.indent(str(v)))) self.validate_requirements(self.tool, "requirements") self.validate_requirements(self.tool, "hints") for t in self.tool.get("requirements", []): t["_docpath"] = docpath for t in self.tool.get("hints", []): t["_docpath"] = docpath avro.schema.make_avsc_object({ "name": "Any", "type": "enum", "symbols": ["Any"] }, self.names) self.schemaDefs = {} sd, _ = get_feature("SchemaDefRequirement", requirements=self.tool.get("requirements"), hints=self.tool.get("hints")) if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)