def __init__( self, tag: str, output_type: ParseableType, glob: Optional[Union[Selector, str]] = None, doc: Optional[str] = None, ): """ A ToolOutput instructs the the engine how to collect an output and how it may be referenced in a workflow. :param tag: The identifier of a output, must be unique in the inputs and outputs. :param output_type: The type of output that is being collected. :param glob: How to collect this output, can accept any :class:`janis.Selector`. :param doc: Documentation on what the output is, used to generate docs. """ if not Validators.validate_identifier(tag): raise Exception( f"The identifier '{tag}' was invalid because {Validators.reason_for_failure(tag)}" ) self.tag = tag self.output_type: ParseableType = get_instantiated_type(output_type) self.glob = glob self.doc = doc
def __init__(self, **connections): super().__init__(metadata_class=WorkflowMetadata) self.connections = connections Logger.log(f"Creating workflow with identifier: '{self.id()}'") if not Validators.validate_identifier(self.id()): raise Exception( f"The identifier '{self.id()}' was invalid because {Validators.reason_for_failure(self.id())}" ) # The following variables allow us to quickly check data about the graph self.nodes: Dict[str, Node] = {} self.input_nodes: Dict[str, InputNode] = {} self.step_nodes: Dict[str, StepNode] = {} self.output_nodes: Dict[str, OutputNode] = {} # Flags for different requirements that a workflow might need self.has_scatter = False self.has_subworkflow = False self.has_multiple_inputs = False # Now that we've initialised everything, we can "construct" the workflows for that subclass this class # else, for the WorkflowBuilder it will do nothing and they'll add workflows later self.constructor()
def __init__( self, tag: str, output_type: ParseableType, glob: Optional[Union[Selector, str]] = None, presents_as: str = None, secondaries_present_as: Dict[str, str] = None, doc: Optional[Union[str, OutputDocumentation]] = None, ): """ A ToolOutput instructs the the engine how to collect an output and how it may be referenced in a workflow. :param tag: The identifier of a output, must be unique in the inputs and outputs. :param output_type: The type of output that is being collected. :param glob: How to collect this output, can accept any :class:`janis.Selector`. :param doc: Documentation on what the output is, used to generate docs. """ if not Validators.validate_identifier(tag): raise Exception( f"The identifier '{tag}' was invalid because {Validators.reason_for_failure(tag)}" ) self.tag = tag self.output_type: ParseableType = get_instantiated_type(output_type) if not glob and not ( isinstance(self.output_type, Stdout) or isinstance(self.output_type, Stderr) ): raise Exception( "ToolOutput expects a glob when the output type is not Stdout / Stderr" ) self.glob = glob self.presents_as = presents_as self.secondaries_present_as = secondaries_present_as self.doc = ( doc if isinstance(doc, OutputDocumentation) else OutputDocumentation(doc=doc) ) if self.secondaries_present_as: if not self.output_type.secondary_files(): raise Exception( f"The ToolOutput '{self.id()}' requested a rewrite of secondary file extension through " f"'secondaries_present_as', but the type {self.output_type.id()} not have any secondary files." ) secs = set(self.output_type.secondary_files()) to_remap = set(self.secondaries_present_as.keys()) invalid = to_remap - secs if len(invalid) > 0: raise Exception( f"Error when constructing output '{self.id()}', the secondaries_present_as contained secondary " f"files ({', '.join(invalid)}) that were not found in the output " f"type '{self.output_type.id()}' ({', '.join(secs)})" )
def get_tool_tag_from_identifier(cls, identifier): i = cls.get_tag_from_identifier(identifier) while not Validators.validate_identifier(i): i = str( input( f"The tag for tool: '{i}' (fullID: {identifier}) was invalid, please choose another: " )) return i
def __init__( self, tag: str, input_type: ParseableType, position: Optional[int] = None, prefix: Optional[str] = None, separate_value_from_prefix: bool = None, prefix_applies_to_all_elements: bool = None, separator: str = None, shell_quote: bool = None, localise_file: bool = None, default: Any = None, doc: Optional[str] = None, ): """ A ``ToolInput`` represents an input to a tool, with parameters that allow it to be bound on the command line. The ToolInput must have either a position or prefix set to be bound onto the command line. :param tag: The identifier of the input (unique to inputs and outputs of a tool) :param input_type: The data type that this input accepts :type input_type: ``janis.ParseableType`` :param position: The position of the input to be applied. (Default = 0, after the base_command). :param prefix: The prefix to be appended before the element. (By default, a space will also be applied, see ``separate_value_from_prefix`` for more information) :param separate_value_from_prefix: (Default: True) Add a space between the prefix and value when ``True``. :param prefix_applies_to_all_elements: Applies the prefix to each element of the array (Array inputs only) :param shell_quote: Stops shell quotes from being applied in all circumstances, useful when joining multiple commands together. :param separator: The separator between each element of an array (defaults to ' ') :param localise_file: Ensures that the file(s) are localised into the execution directory. :param default: The default value to be applied if the input is not defined. :param doc: Documentation string for the ToolInput, this is used to generate the tool documentation and provide hints to the user. """ super().__init__( value=None, prefix=prefix, position=position, separate_value_from_prefix=separate_value_from_prefix, doc=doc, shell_quote=shell_quote, ) # if default is not None: # input_type.optional = True if not Validators.validate_identifier(tag): raise Exception( f"The identifier '{tag}' was not validated because {Validators.reason_for_failure(tag)}" ) self.tag: str = tag self.input_type: ParseableType = get_instantiated_type(input_type) self.default = default self.prefix_applies_to_all_elements = prefix_applies_to_all_elements self.separator = separator self.localise_file = localise_file
def verify_identifier(self, identifier: str, component: str): if identifier in self.__dict__: raise Exception( f"'{identifier}' is a protected keyword for a janis workflow") if identifier in self.nodes: existing = self.nodes[identifier] raise Exception( f"There already exists a node (and component) with id '{identifier}'. The added " f"component ('{component}') clashes with '{repr(existing)}').") if not Validators.validate_identifier(identifier): raise Exception( f"The identifier '{identifier}' was invalid because {Validators.reason_for_failure(identifier)}" )
def parse_str(helpstr, option_marker: str = None, requires_prev_line_blank_or_param=False): doc = "" args = [] lines = helpstr.replace("\\n", "\n").split("\n") options_idx = None markers = option_markers if option_marker: markers = markers.union({option_marker.lower()}) for il in range(len(lines)): line = lines[il] if not line.lstrip(): continue ll = line.strip().lower() if any(ll.startswith(m) for m in markers): options_idx = il break doc += line + "\n" if options_idx is None: raise Exception("Couldn't find the start of the inputs") prev_arg = None last_line_was_blank_or_param = True while options_idx < len(lines) - 1: options_idx += 1 line = lines[options_idx] if not line.lstrip(): # line is empty prev_arg = None last_line_was_blank_or_param = True continue line_args = [l.strip() for l in line.lstrip().split(" ") if l] largs = len(line_args) if largs == 0: raise Exception( "No args when should have been filtered by previous step") tool_doc = "" if (not requires_prev_line_blank_or_param or last_line_was_blank_or_param) and line_args[0].startswith("-"): # sometimes this section has two items processed_tags = [ get_tag_and_cleanup_prefix(p) for p in line_args[0].split(",") ] processed_tags = [t for t in processed_tags if t is not None] if len(processed_tags) < 1: continue tags = sorted(processed_tags, key=lambda l: len(l[1]), reverse=True) potential_type = first_or_default([p[3] for p in processed_tags]) if len(tags) > 1: tool_doc += "(" + ", ".join(t[0] for t in tags[1:]) + ") " if largs > 1: tool_doc += " ".join(line_args[1:]) prefix, tag, has_equal, guessed_type = tags[0] eqifrequired = "=" if has_equal else "" if not potential_type: potential_type = Boolean if len(tag) == 1: while not Validators.validate_identifier(tag): print( f"The tag for '{prefix}' was invalid, we need you to come up with a new identifier for:" ) print("\t" + tool_doc if tool_doc else line) tag = str(input("New identifier: ")) try: prev_arg = ToolInput( tag, potential_type(optional=True), prefix=prefix + eqifrequired, separate_value_from_prefix=not has_equal, doc=tool_doc.replace('"', "'"), ) except: print(f"Skipping '{tag}' as it wasn't validated correctly") args.append(prev_arg) # we'll get the longer one for the tag elif prev_arg: prev_arg.doc.doc += " " + line.lstrip() else: last_line_was_blank_or_param = False return doc, args
def __init__( self, tag: str, input_type: ParseableType, position: Optional[int] = None, prefix: Optional[str] = None, separate_value_from_prefix: bool = None, prefix_applies_to_all_elements: bool = None, presents_as: str = None, secondaries_present_as: Dict[str, str] = None, separator: str = None, shell_quote: bool = None, localise_file: bool = None, default: Any = None, doc: Optional[Union[str, InputDocumentation]] = None, ): """ A ``ToolInput`` represents an input to a tool, with parameters that allow it to be bound on the command line. The ToolInput must have either a position or prefix set to be bound onto the command line. :param tag: The identifier of the input (unique to inputs and outputs of a tool) :param input_type: The data type that this input accepts :type input_type: ``janis.ParseableType`` :param position: The position of the input to be applied. (Default = 0, after the base_command). :param prefix: The prefix to be appended before the element. (By default, a space will also be applied, see ``separate_value_from_prefix`` for more information) :param separate_value_from_prefix: (Default: True) Add a space between the prefix and value when ``True``. :param prefix_applies_to_all_elements: Applies the prefix to each element of the array (Array inputs only) :param shell_quote: Stops shell quotes from being applied in all circumstances, useful when joining multiple commands together. :param separator: The separator between each element of an array (defaults to ' ') :param localise_file: Ensures that the file(s) are localised into the execution directory. :param default: The default value to be applied if the input is not defined. :param doc: Documentation string for the ToolInput, this is used to generate the tool documentation and provide hints to the user. """ super().__init__( value=None, prefix=prefix, position=position, separate_value_from_prefix=separate_value_from_prefix, doc=None, shell_quote=shell_quote, ) self.doc: InputDocumentation = (doc if isinstance( doc, DocumentationMeta) else InputDocumentation(doc=doc)) # if default is not None: # input_type.optional = True if not Validators.validate_identifier(tag): raise Exception( f"The identifier '{tag}' was not validated because {Validators.reason_for_failure(tag)}" ) self.tag: str = tag self.input_type: ParseableType = get_instantiated_type(input_type) self.default = default self.prefix_applies_to_all_elements = prefix_applies_to_all_elements self.separator = separator self.localise_file = localise_file self.presents_as = presents_as self.secondaries_present_as = secondaries_present_as if self.secondaries_present_as: if not self.input_type.secondary_files(): raise Exception( f"The ToolOutput '{self.id()}' requested a rewrite of secondary file extension through " f"'secondaries_present_as', but the type {self.input_type.id()} not have any secondary files." ) secs = set(self.input_type.secondary_files()) to_remap = set(self.secondaries_present_as.keys()) invalid = to_remap - secs if len(invalid) > 0: raise Exception( f"Error when constructing output '{self.id()}', the secondaries_present_as contained secondary " f"files ({', '.join(invalid)}) that were not found in the output " f"type '{self.input_type.id()}' ({', '.join(secs)})")
def __init__( self, tag: str, output_type: ParseableType, selector: Optional[Union[Selector, str]] = None, presents_as: str = None, secondaries_present_as: Dict[str, str] = None, doc: Optional[Union[str, OutputDocumentation]] = None, glob: Optional[Union[Selector, str]] = None, _skip_output_quality_check=False, ): """ A ToolOutput instructs the the engine how to collect an output and how it may be referenced in a workflow. :param tag: The identifier of a output, must be unique in the inputs and outputs. :param output_type: The type of output that is being collected. :param selector: How to collect this output, can accept any :class:`janis.Selector`. :param glob: (DEPRECATED) An alias for `selector` :param doc: Documentation on what the output is, used to generate docs. :param _skip_output_quality_check: DO NOT USE THIS PARAMETER, it's a scapegoat for parsing CWL ExpressionTools when an cwl.output.json is generated """ if not Validators.validate_identifier(tag): raise Exception( f"The identifier '{tag}' was invalid because {Validators.reason_for_failure(tag)}" ) self.tag = tag self.output_type: ParseableType = get_instantiated_type(output_type) self._skip_output_quality_check = _skip_output_quality_check if selector is None and glob is not None: selector = glob elif selector is not None and glob is not None: raise TypeError( f"ToolInput({tag}) received inputs for both selector and glob. Please only use glob" ) if (not _skip_output_quality_check and selector is None and not (isinstance(self.output_type, Stdout) or isinstance(self.output_type, Stderr))): raise Exception( "ToolOutput expects a 'selector=' param when the output type is not Stdout / Stderr" ) self.selector = selector self.presents_as = presents_as self.secondaries_present_as = secondaries_present_as self.doc = (doc if isinstance(doc, OutputDocumentation) else OutputDocumentation(doc=doc)) if isinstance(selector, Operator) and self.presents_as: raise Exception( f"Error when constructing output '{self.id()}', Janis does not support 'presents_as' AND " "operators within a ToolOutput selector. Please raise an issue if you think this is in error." ) if self.secondaries_present_as: if not self.output_type.secondary_files(): raise Exception( f"The ToolOutput '{self.id()}' requested a rewrite of secondary file extension through " f"'secondaries_present_as', but the type {self.output_type.id()} not have any secondary files." ) secs = set(self.output_type.secondary_files()) to_remap = set(self.secondaries_present_as.keys()) invalid = to_remap - secs if len(invalid) > 0: raise Exception( f"Error when constructing output '{self.id()}', the secondaries_present_as contained secondary " f"files ({', '.join(invalid)}) that were not found in the output " f"type '{self.output_type.id()}' ({', '.join(secs)})")
def test_invalid_identifiers(self): self.assertFalse(Validators.validate_identifier("test-workflow"))
def test_valid_identifiers(self): self.assertTrue(Validators.validate_identifier("test_workflow"))
def test_invalid_sample_name(self): self.assertFalse(Validators.validate_identifier("fastqs_CDG-025-156R_PDX"))