Ejemplo n.º 1
0
def assign_strings(paths, tags):
    '''
    Assign N strings (path names, for example) to N tags. Example:

    - paths = ['RIB0000794-cutadapt-R1.fastq.gz', 'RIB0000794-cutadapt-R2.fastq.gz']
    - tags = ['R1', 'R2']
    - result = { 'R1': 'RIB0000794-cutadapt-R1.fastq.gz', 'R2': 'RIB0000794-cutadapt-R2.fastq.gz' }

    If this is not possible without ambiguities, a StandardError is thrown.
    Attention: The number of paths must be equal to the number of tags, a 1:1 relation
    is returned, if possible.
    '''

    def check_candidate(paths, tags, head, tail):
        chopped = []
        for path in paths:
            if path[:len(head)] != head:
                return None
            if len(tail) == 0:
                chopped.append((path[len(head):], path))
            else:
                if path[-len(tail):] != tail:
                    return None
                chopped.append((path[len(head):-len(tail)], path))

        if [_[0] for _ in sorted(chopped)] == sorted(tags):
            result = {}
            for _ in sorted(chopped):
                result[_[0]] = _[1]
            return result

        return None

    results = {}
    if len(paths) != len(tags):
        raise UAPError("Number of tags must be equal to number of paths")
    for tag in tags:
        for path in paths:
            result_candidate = {}
            if tag in path:
                # find all occurences of tag in path
                offset = 0
                while path.find(tag, offset) >= 0:
                    index = path.find(tag, offset)
                    head = path[:index]
                    tail = path[(index + len(tag)):]
                    # now try chopping off head and tail from every path
                    # and see whether we can unambiguously assign a path
                    # to every tag, if yes, we have a result candidate

                    result_candidate = check_candidate(paths, tags, head, tail)
                    if result_candidate:
                        results[json.dumps(
                            result_candidate, sort_keys=True)] = result_candidate
                    offset = index + 1

    if len(results) != 1:
        raise UAPError("Unable to find an unambiguous mapping.")

    return results[results.keys()[0]]
Ejemplo n.º 2
0
 def get_cluster_job_ids(self):
     '''
     The argument less method returns a set the cluster job ids of all
     subbmited jobs.
     '''
     ids = set()
     for task in self.all_tasks_topologically_sorted:
         queued_ping_file = task.get_run().get_queued_ping_file()
         failed_qpf = queued_ping_file + '.bad'  # alternative location
         try:
             with open(queued_ping_file, 'r') as fl:
                 info = yaml.load(fl, Loader=yaml.FullLoader)
             ids.add(info['cluster job id'])
         except (IOError, TypeError) as e:
             if os.path.exists(queued_ping_file):
                 raise UAPError('Could not read ping file %s: %s' %
                                (queued_ping_file, e))
             else:
                 try:
                     with open(failed_qpf, 'r') as fl:
                         info = yaml.load(fl, Loader=yaml.FullLoader)
                     ids.add(info['cluster job id'])
                 except (IOError, TypeError) as e:
                     if os.path.exists(failed_qpf):
                         raise UAPError('Could not read ping file %s: %s' %
                                        (failed_qpf, e))
     return ids
Ejemplo n.º 3
0
    def look_for_unique(self, connection, include=None):
        '''
        Looks for a unique file in the connection and returns it.
        E.g., to find a reference assembly among all parent runs.
        If NO runs come with the connection it returns None and if
        MORE THAN ONE run comes with the connection an UAPError is raised.
        The value passed with include is also counted.
        '''
        if self.connection_exists(connection) and include is not None:
            raise UAPError('In step %s runs come with %s but it is set '
                           'to %s through an option.' %
                           (self.step_name, connection, include))

        ref_run = self.get_runs_with_connections(connection, with_empty=False)
        if len(ref_run) > 1:
            UAPError('More then one but not all runs come with %s.' %
                     connection)
        elif len(ref_run) == 1:
            if include is not None:
                raise UAPError(
                    'In step %s, value supplied by connection %s but'
                    'option is set to %s.' %
                    (self.step_name, connection, include))
            ref_run = ref_run.pop()  # ref_run is a temporary set
            con_value = self.connections[ref_run][connection]
            if len(con_value) > 1:
                raise UAPError(
                    'In step %s more than one file is passed through %s.' %
                    (self.step_name, connection))
            return con_value[0]
        return include
Ejemplo n.º 4
0
    def add_empty_output_connection(self, tag):
        '''
        An empty output connection has 'None' as output file and 'None' as input
        file.
        '''
        logger.warning(
            '[Deprecation] %s: add_empty_output_connection is depricated. '
            'Please make the connection "out/%s" optional and do not add '
            'anything instead.' % (self.get_step().get_step_type(), tag))
        # make sure tag was declared with an outgoing connection
        if 'out/' + tag not in self._step.get_out_connections():
            raise UAPError(
                "Invalid output_file tag '%s' in %s. "
                "You might want to add self.add_connection('out/%s') "
                "to the constructor of %s." %
                (tag, str(self._step), tag, self._step.__module__))
        try:
            out_connection = self.get_out_connection(tag)
        except KeyError:
            out_connection = self.add_out_connection(tag)

        if None in self._output_files[out_connection]:
            raise UAPError(
                "You're trying to re-declare %s as an empty output connection "
                % out_connection)

        self._output_files[out_connection][None] = None
Ejemplo n.º 5
0
def exec_pre_post_calls(tool_id, info_key, info_command,
                        tool_check_info):
    if isinstance(info_command, str):
        info_command = [info_command]
    for command in info_command:
        if isinstance(command, str):
            command = command.split()
        for argument in command:
            if not isinstance(argument, str):
                raise UAPError(
                    "The command to be launched '%s' contains non-string "
                    "argument '%s'. Therefore the command will fail. Please "
                    "fix this type issue." % (command, argument))
        logger.info("Executing command: %s" % " ".join(command))
        try:
            proc = subprocess.Popen(
                command,
                stdin=None,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                close_fds=True)

        except OSError as e:
            raise UAPError(
                "Error while executing '%s' for %s: %s "
                "Error no.: %s Error message: %s" %
                (info_key, tool_id,
                 " ".join(command), e.errno, e.strerror)
            )

        command_call = info_key
        command_exit_code = '%s-exit-code' % info_key
        command_response = '%s-respone' % info_key
        (output, error) = proc.communicate()
        if info_key in ['module_load', 'module_unload']:
            logger.info("Try '%s' for '%s': %s" % (
                info_key, tool_id, " ".join(command))
            )
            try:
                exec(output)
            except NameError:
                msg = "Error while loading module '%s': \n%s"
                raise UAPError(msg % (tool_id, error.decode('utf-8')))

            tool_check_info.update({
                command_call: (' '.join(command)).strip(),
                command_exit_code: proc.returncode
            })
            if error:
                logger.info('Loading tool %s: %s' %
                            (tool_id, error.decode('utf-8')))
        else:
            tool_check_info.update({
                command_call: (' '.join(command)).strip(),
                command_exit_code: proc.returncode,
                command_response: (output + error)
            })

    return tool_check_info
Ejemplo n.º 6
0
def main(args):
    p = pipeline.Pipeline(arguments=args)

    task = None
    def handle_signal(signum, frame):
        logger.warning("Catching %s!" %
                       process_pool.ProcessPool.SIGNAL_NAMES[signum])
        p.caught_signal = signum
        process_pool.ProcessPool.kill()
        if task:
            signame = process_pool.ProcessPool.SIGNAL_NAMES[signum]
            error = 'UAP stopped because it caught signal %d - %s' % \
                        (signum, signame)
            log_task_error(task, error, True, True)
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGINT, handle_signal)

    # execute all tasks
    finished_states = [p.states.FINISHED]
    if args.ignore:
        finished_states += [p.states.CHANGED]

    accepted_states = [p.states.BAD, p.states.READY, p.states.QUEUED,
                       p.states.VOLATILIZED]
    for task in p.get_task_with_list():
        task_state = task.get_task_state()
        if task_state in finished_states:
            task.move_ping_file()
            sys.stderr.write("Skipping %s because it's already %s.\n" %
                             (task, task_state))
        elif task_state == p.states.VOLATILIZED and not args.run:
            task.move_ping_file()
            sys.stderr.write("Skipping %s because it's already %s and not "
                             "specified as argument.\n" %
                             (task, task_state))
        elif task_state == p.states.CHANGED:
            if not args.force:
                task.move_ping_file()
                raise UAPError(
                    "Task %s has changed. "
                    "Run 'uap %s status --details' to see what changed or "
                    "'uap %s run-locally --force' to force overwrite "
                    "of the results." %
                    (task, args.config.name, args.config.name))
            else:
                check_parents_and_run(task, finished_states, args.debugging)
        elif task_state in accepted_states:
            check_parents_and_run(task, finished_states, args.debugging)
        else:
            task.move_ping_file()
            raise UAPError(
                "Unexpected task state for %s: %s\n"
                "Expected state to be 'READY'. Probably an upstream "
                "run crashed." %
                (task, task_state))
Ejemplo n.º 7
0
def assign_string(s, tags):
    match = None
    for tag in tags:
        if tag in s:
            if match is not None:
                raise UAPError("Could not unambiguously match %s to %s."
                               % (s, tags))
            match = tag
    if match is None:
        raise UAPError("Could not match %s to %s." % (s, tags))
    return match
Ejemplo n.º 8
0
    def add_dependency(self, parent):
        '''
        Add a parent step to this steps dependencies.

        parent -- parent step this step depends on
        '''
        if not isinstance(parent, AbstractStep):
            raise UAPError("Error: parent argument must be an AbstractStep.")
        if parent == self:
            raise UAPError("Cannot add a node as its own dependency.")
        self.dependencies.append(parent)
        parent.children_step_names.add(str(self))
Ejemplo n.º 9
0
 def require_tool(self, tool):
     """
     Declare that this step requires an external tool. Query it later with
     *get_tool()*.
     """
     if self.get_pipeline() is not None:
         if tool not in self.get_pipeline().config['tools']:
             raise UAPError(
                 "%s requires the tool %s but it's not declared in "
                 "the configuration." % (self, tool))
         self._tools[tool] = self.get_pipeline(
         ).config['tools'][tool]['path']
         if 'pre_command' in self.get_pipeline().config['tools'][tool]:
             self._pre_command[tool] = self.get_pipeline(
             ).config['tools'][tool]['pre_command']
         if 'module_load' in self.get_pipeline().config['tools'][tool]:
             self._module_load[tool] = self.get_pipeline(
             ).config['tools'][tool]['module_load']
         if 'module_unload' in self.get_pipeline().config['tools'][tool]:
             self._module_unload[tool] = self.get_pipeline(
             ).config['tools'][tool]['module_unload']
         if 'post_command' in self.get_pipeline().config['tools'][tool]:
             self._post_command[tool] = self.get_pipeline(
             ).config['tools'][tool]['post_command']
     else:
         self._tools[tool] = True
Ejemplo n.º 10
0
 def ping_on_term(signum, frame):
     logger.warning('Recived SIGTERM and moving execution ping file...')
     kill_exec_ping()
     self.remove_ping_file(queued_ping_path, bad_copy=True)
     p.caught_signal = signum
     process_pool.ProcessPool.kill()
     raise UAPError('Recived TERM signal (canceled job).')
Ejemplo n.º 11
0
 def ping_on_int(signum, frame):
     logger.warning('Recived SIGINT and moving execution ping file...')
     kill_exec_ping()
     self.remove_ping_file(queued_ping_path, bad_copy=True)
     p.caught_signal = signum
     process_pool.ProcessPool.kill()
     raise UAPError('Recived INT signal (keybord interrupt).')
Ejemplo n.º 12
0
 def autodetect_cluster_type(self):
     cluster_config = self.get_cluster_config()
     # Let's see if we can successfully run a cluster identity test
     # Test all configured cluster types
     for cluster_type in cluster_config.keys():
         # Do we have an identity test command
         identity = dict()
         for key in ['test', 'answer']:
             try:
                 identity[key] = cluster_config[cluster_type]['identity_%s'
                                                              % key]
             except KeyError:
                 raise UAPError(
                     "%s: Missing 'identity_%s' for %s"
                     "cluster type." %
                     (self._cluster_config_path, key, cluster_type))
         # Now that we know let's test for that cluster
         if not isinstance(identity['answer'], list):
             identity['answer'] = [identity['answer']]
         for answer in identity['answer']:
             try:
                 if (subprocess.check_output(identity['test']).decode(
                         'utf-8').startswith(answer)):
                     return cluster_type
             except OSError:
                 pass
     logger.warning('Cluster type could not be detected.')
     return None
Ejemplo n.º 13
0
 def get_task_with_list(self, as_string=False, exclusive=False):
     '''
     Reruns a list of tasks, specified with the run argument.
     '''
     task_wish_list = list()
     args = list()
     if hasattr(self.args, 'run'):
         specified_tasks = self.args.run
     for task_id in specified_tasks:
         if task_id in self.task_for_task_id:
             task = self.task_for_task_id[task_id]
             if as_string:
                 task = str(task)
             task_wish_list.append(task)
         else:
             for task in self.all_tasks_topologically_sorted:
                 if str(task).startswith(task_id):
                     if as_string:
                         task = str(task)
                     task_wish_list.append(task)
     if specified_tasks and not task_wish_list:
         raise UAPError("No task matches the requested pattern(s) '%s'." %
                        ' '.join(specified_tasks))
     if not specified_tasks and exclusive is False:
         if not as_string:
             return self.all_tasks_topologically_sorted
         return [str(t) for t in self.all_tasks_topologically_sorted]
     return task_wish_list
Ejemplo n.º 14
0
 def get_input_files_for_output_file(self, output_file):
     for connection in self.get_out_connections():
         if output_file in \
                 self.get_output_files_for_out_connection(connection):
             return self._output_files[connection][output_file]
     raise UAPError("Sorry, your output '%s' file couldn't be found" %
                    output_file)
Ejemplo n.º 15
0
 def get_cluster_command(self, key):
     ct = self.get_cluster_type()
     if key not in self.get_cluster_config()[ct].keys():
         raise UAPError(
             'The option "%s" is not available for the cluster "%s".' %
             (key, ct))
     return self.get_cluster_config()[ct][key]
Ejemplo n.º 16
0
    def load_unload_module(self, module_cmd):
        if module_cmd.__class__ == str:
            module_cmd = [module_cmd]

        for command in module_cmd:
            if isinstance(command, str):
                command = command.split()
            self.check_subprocess_command(command)

            try:
                proc = subprocess.Popen(command,
                                        stdin=None,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE,
                                        close_fds=True)

            except OSError as e:
                raise UAPError("Error while executing '%s' "
                               "Error no.: %s Error message: %s" %
                               (" ".join(command), e.errno, e.strerror))

            (output, error) = proc.communicate()
            exec(output)
            sys.stderr.write(error.decode('utf-8'))
            sys.stderr.flush()

        return
Ejemplo n.º 17
0
 def get_option(self, key):
     """
     Query an option.
     """
     if key not in self._defined_options:
         raise UAPError("Cannot query undefined option %s in step %s." %
                        (key, self.__module__))
     return self._options[key]
Ejemplo n.º 18
0
 def _init_run_id(self, run_id=None):
     if run_id is None:
         run_id = self._current_run_id
     else:
         self.connections.setdefault(run_id, dict())
     if run_id is None:
         raise UAPError('No run id given.')
     return run_id
Ejemplo n.º 19
0
 def get_output_directory_du_jour_placeholder(self):
     '''
     Used to return a placeholder for the temporary output directory, which
     needed to be replaced by the actual temp directory inside the
     abstract_step.execute() method.
     '''
     raise UAPError("Using run.get_output_directory_du_jour_placeholder() "
                    "is deprecated. Just use the string '.' instead.")
Ejemplo n.º 20
0
 def get_tool(self, key):
     """
     Return full path to a configured tool.
     """
     if key not in self._tools:
         raise UAPError("Tool %s unknown. Maybe you forgot to use "
                        "self.require_tool('%s')" % (key, key))
     return self._tools[key]
Ejemplo n.º 21
0
 def check_subprocess_command(self, command):
     for argument in command:
         if not isinstance(argument, str):
             raise UAPError("The command to be launched '%s' " % command +
                            "contains non-string argument '%s'. " %
                            argument +
                            "Therefore the command will fail. Please " +
                            "fix this type issue.")
     return
Ejemplo n.º 22
0
    def add_task_for_output_file(self, output_path, task_id):
        if output_path in self.task_id_for_output_file:
            raise UAPError("More than one step is trying to create the "
                           "same output file: %s." % output_path)
        self.task_id_for_output_file[output_path] = task_id

        if task_id not in self.output_files_for_task_id:
            self.output_files_for_task_id[task_id] = set()
        self.output_files_for_task_id[task_id].add(output_path)
Ejemplo n.º 23
0
 def set_cores(self, cores):
     """
     Specify the number of CPU cores this step will use.
     """
     if not isinstance(cores, int) or cores < 1:
         raise UAPError(
             '[%s] Cores need to be a positive integer, not %s.' %
             (self.get_step_name(), cores))
     self._cores = cores
Ejemplo n.º 24
0
 def get_connection(self, connection, run_id=None):
     '''
     Returns a list of file names for the ``connection``.
     '''
     run_id = self._init_run_id(run_id)
     cons = self.connections[run_id]
     if connection not in cons.keys():
         raise UAPError('The input run %s of %s has no connection %s.' %
                        (run_id, self.step_name, connection))
     return cons[run_id]
Ejemplo n.º 25
0
    def add_file_dependencies(self, output_path, input_paths):
        if output_path in self.file_dependencies:
            raise UAPError("Different steps/runs/tags want to create "
                           "the same output file: %s." % output_path)
        self.file_dependencies[output_path] = set(input_paths)

        for inpath in input_paths:
            if inpath not in self.file_dependencies_reverse:
                self.file_dependencies_reverse[inpath] = set()
            self.file_dependencies_reverse[inpath].add(output_path)
Ejemplo n.º 26
0
 def add_connection(self, connection, files, run_id=None):
     '''
     Saves the names in ``files`` for a new ``connection``.
     '''
     if not isinstance(files, list):
         raise UAPError('"files" must be a list but is a %s' %
                        files.__class__.__name__)
     run_id = self._init_run_id(run_id)
     if not isinstance(connection, str):
         raise UAPError('The passed connection must be a string.')
     if not connection.startswith('in/'):
         raise UAPError('Input connections muss start with "in/".')
     self.connections[run_id].setdefault(connection, list())
     self.connections[run_id][connection].extend(files)
     self._by_cons_none_empty.setdefault(connection, set())
     self._by_cons_none_empty[connection].add(run_id)
     self.existing_connections.add(connection)
     self._con_of_all_runs = None  # reset cache
     logger.debug("Found %s to connect %s with run %s." %
                  (self.step_name, connection, run_id))
Ejemplo n.º 27
0
 def add_command(self, command, stdout_path=None, stderr_path=None):
     try:
         command = command_info.CommandInfo(self,
                                            command,
                                            stdout_path=stdout_path,
                                            stderr_path=stderr_path)
     except TypeError as err:
         raise UAPError('During declaration of step "%s": %s' %
                        (str(self._run.get_step()), str(err)))
     self._pipes_and_commands.append(command)
     return command
Ejemplo n.º 28
0
 def add_public_info(self, key, value):
     '''
     Add public information to a run. For example, a FASTQ reader may store
     the index barcode here for subsequent steps to query via
     ``AbstractStep.find_upstream_info()``.
     '''
     if key in self._public_info and value != self._public_info[key]:
         raise UAPError(
             "You're trying to overwrite public info %s with %s, "
             "but there's already a different value stored: %s." %
             (key, value, self._public_info[key]))
     self._public_info[key] = value
Ejemplo n.º 29
0
Archivo: command.py Proyecto: yigbt/uap
 def repl(text):
     if isinstance(text, str):
         if text.endswith(abs_dest):
             return text.replace(abs_dest, rel_path)
         return text.replace(abs_dest + os.sep, rel_path + os.sep)
     elif isinstance(text, list) or isinstance(text, set):
         return [repl(element) for element in text]
     elif text is None:
         return None
     else:
         raise UAPError("Function %s does not return string or "
                        "list of strings." % func.__name__)
Ejemplo n.º 30
0
 def add_out_connection(self, out_connection):
     if not out_connection.startswith('out/'):
         out_connection = 'out/' + out_connection
     if out_connection not in self._step.get_out_connections():
         raise UAPError(
             "Invalid output connection '%s' in %s. "
             "You might want to add self.add_connection('%s') "
             "to the constructor of %s." % (out_connection, str(
                 self._step), out_connection, self._step.__module__))
     logger.debug('Adding %s to %s in run %s.' %
                  (out_connection, str(self.get_step()), self.get_run_id()))
     self._output_files[out_connection] = dict()
     return out_connection