def _set_status_channels(self): """Compiles all status channels for the status compiler process """ status_inst = pc.StatusCompiler(template="status_compiler") # Compile status channels from pipeline process status_channels = [] for p in [p for p in self.processes]: if not any([isinstance(p, x) for x in self.skip_class]): status_channels.extend(p.status_strs) if not status_channels: logger.debug("No status channels found. Skipping status compiler" "process") return logger.debug("Setting status channels: {}".format(status_channels)) # Check for duplicate channels. Raise exception if found. if len(status_channels) != len(set(status_channels)): raise eh.ProcessError( "Duplicate status channels detected. Please ensure that " "the 'status_channels' attributes of each process are " "unique. Here are the status channels:\n\n{}".format( ", ".join(status_channels))) status_inst.set_status_channels(status_channels) self.processes.append(status_inst)
def update_attributes(self, attr_dict): """Updates the directives attribute from a dictionary object. This will only update the directives for processes that have been defined in the subclass. Parameters ---------- attr_dict : dict Dictionary containing the attributes that will be used to update the process attributes and/or directives. """ # Update directives valid_directives = ["cpus", "memory", "container", "version", "queue"] for attribute, val in attr_dict.items(): # If the attribute has a valid directive key, update that # directive if attribute in valid_directives: for p in self.directives: self.directives[p][attribute] = val # If attribute is present in the class, update that attribute elif hasattr(self, attribute): setattr(self, attribute, val) else: raise eh.ProcessError( "Invalid attribute '{}'".format(attribute))
def set_compiler_channels(self, channel_list, operator="mix"): """General method for setting the input channels for the status process Given a list of status channels that are gathered during the pipeline construction, this method will automatically set the input channel for the status process. This makes use of the ``mix`` channel operator of nextflow for multiple channels:: STATUS_1.mix(STATUS_2,STATUS_3,...) This will set the ``status_channels`` key for the ``_context`` attribute of the process. Parameters ---------- channel_list : list List of strings with the final name of the status channels operator : str Specifies the operator used to join the compiler channels. Available options are 'mix'and 'join'. """ if not channel_list: raise eh.ProcessError("At least one status channel must be " "provided to include this process in the " "pipeline") if len(channel_list) == 1: logger.debug("Setting only one status channel: {}".format( channel_list[0])) self._context = {"compile_channels": channel_list[0]} else: first_status = channel_list[0] if operator == "mix": lst = ",".join(channel_list[1:]) s = "{}.mix({})".format(first_status, lst) elif operator == "join": s = first_status for ch in channel_list[1:]: s += ".join({})".format(ch) s += ".map{ ot -> [ ot[0], ot[1..-1] ] }" logger.debug("Status channel string: {}".format(s)) self._context = {"compile_channels": s}
def _parse_process_name(name_str): """Parses the process string and returns the process name and its directives Process strings my contain directive information with the following syntax:: proc_name={'directive':'val'} This method parses this string and returns the process name as a string and the directives information as a dictionary. Parameters ---------- name_str : str Raw string with process name and, potentially, directive information Returns ------- str Process name dict or None Process directives """ directives = None fields = name_str.split("=") process_name = fields[0] if len(fields) == 2: _directives = fields[1].replace("'", '"') try: directives = json.loads(_directives) except json.decoder.JSONDecodeError: raise eh.ProcessError( "Could not parse directives for process '{}'. The raw" " string is: {}\n" "Possible causes include:\n" "\t1. Spaces inside directives\n" "\t2. Missing '=' symbol before directives\n" "\t3. Missing quotes (' or \") around directives\n" "A valid example: process_name={{'cpus':'2'}}".format( process_name, name_str)) return process_name, directives
def update_attributes(self, attr_dict): """Updates the directives attribute from a dictionary object. This will only update the directives for processes that have been defined in the subclass. Parameters ---------- attr_dict : dict Dictionary containing the attributes that will be used to update the process attributes and/or directives. """ # Update directives # Allowed attributes to write valid_directives = [ "pid", "ignore_type", "ignore_pid", "extra_input", "group", "input_type" ] for attribute, val in attr_dict.items(): # If the attribute has a valid directive key, update that # directive if attribute in valid_directives and hasattr(self, attribute): setattr(self, attribute, val) # The params attribute is special, in the sense that it provides # information for the self.params attribute. elif attribute == "params": for name, value in val.items(): if name in self.params: self.params[name]["default"] = value else: raise eh.ProcessError( "The parameter name '{}' does not exist for " "component '{}'".format(name, self.template)) else: for p in self.directives: self.directives[p][attribute] = val
def _set_template(self, template): """Sets the path to the appropriate jinja template file When a Process instance is initialized, this method will fetch the location of the appropriate template file, based on the ``template`` argument. It will raise an exception is the template file is not found. Otherwise, it will set the :py:attr:`Process.template_path` attribute. """ # Set template directory tpl_dir = join(dirname(abspath(__file__)), "templates") # Set template file path tpl_path = join(tpl_dir, template + ".nf") if not os.path.exists(tpl_path): raise eh.ProcessError( "Template {} does not exist".format(tpl_path)) self._template_path = join(tpl_dir, template + ".nf")
def template_str(self): """Class property that returns a populated template string This property allows the template of a particular process to be dynamically generated and returned when doing ``Process.template_str``. Returns ------- x : str String with the complete and populated process template """ if not self._context: raise eh.ProcessError("Channels must be setup first using the " "set_channels method") logger.debug("Setting context for template {}: {}".format( self.template, self._context)) x = self.render(self._template_path, self._context) return x
def set_status_channels(self, channel_list): """General method for setting the input channels for the status process Given a list of status channels that are gathered during the pipeline construction, this method will automatically set the input channel for the status process. This makes use of the ``mix`` channel operator of nextflow for multiple channels:: STATUS_1.mix(STATUS_2,STATUS_3,...) This will set the ``status_channels`` key for the ``_context`` attribute of the process. Parameters ---------- channel_list : list List of strings with the final name of the status channels """ if not channel_list: raise eh.ProcessError("At least one status channel must be " "provided to include this process in the " "pipeline") if len(channel_list) == 1: logger.debug("Setting only one status channel: {}".format( channel_list[0])) self._context = {"status_channels": channel_list[0]} else: first_status = channel_list[0] lst = ",".join(channel_list[1:]) s = "{}.mix({})".format(first_status, lst) logger.debug("Status channel string: {}".format(s)) self._context = {"status_channels": s}