コード例 #1
0
ファイル: pipeline.py プロジェクト: schae234/pypeline
    def _poll_running_nodes(cls, running, nodegraph):
        sleep_time = 0.05
        changes = errors = False
        while running and not (errors or changes):
            for (node, proc) in running.items():
                if not proc.ready():
                    continue
                changes = True

                try:
                    proc.get()
                except (KeyboardInterrupt, SystemExit):
                    raise
                except Exception, errors:
                    nodegraph.set_node_state(node, nodegraph.ERROR)
                    running.pop(node)
                    ui.print_err("%s: Error occurred running command:\n%s\n" \
                                     % (node, "\n".join(("\t" + line) for line in str(errors).strip().split("\n"))),
                                 file = sys.stderr)
                    continue
                nodegraph.set_node_state(node, nodegraph.DONE)
                running.pop(node)

            if not (errors or changes):
                time.sleep(sleep_time)
                sleep_time = min(1, sleep_time * 2)
コード例 #2
0
    def _update_node_state(self, node):
        if node in self._states:
            return self._states[node]

        # Update sub-nodes before checking for fixed states
        state = NodeGraph.DONE
        for subnode in (node.subnodes | node.dependencies):
            state = max(state, self._update_node_state(subnode))

        try:
            if isinstance(node, MetaNode):
                if state in (NodeGraph.RUNNING, NodeGraph.RUNABLE):
                    state = NodeGraph.QUEUED
            elif state == NodeGraph.DONE:
                if not node.is_done or node.is_outdated:
                    state = NodeGraph.RUNABLE
            elif state in (NodeGraph.RUNNING, NodeGraph.RUNABLE, NodeGraph.QUEUED):
                if node.is_done:
                    state = NodeGraph.OUTDATED
                else:
                    state = NodeGraph.QUEUED
        except OSError, error:
            # Typically hapens if base input files are removed, causing a node that
            # 'is_done' to call modified_after on missing files in 'is_outdated'
            ui.print_err("OSError checking state of Node: %s" % error, file = sys.stderr)
            state = NodeGraph.ERROR
コード例 #3
0
    def _poll_running_nodes(cls, running, nodegraph):
        sleep_time = 0.05
        changes = errors = False
        while running and not (errors or changes):
            for (node, proc) in running.items():
                if not proc.ready():
                    continue
                changes = True

                try:
                    proc.get()
                except (KeyboardInterrupt, SystemExit):
                    raise
                except Exception, errors:
                    nodegraph.set_node_state(node, nodegraph.ERROR)
                    running.pop(node)
                    ui.print_err("%s: Error occurred running command:\n%s\n" \
                                     % (node, "\n".join(("\t" + line) for line in str(errors).strip().split("\n"))),
                                 file = sys.stderr)
                    continue
                nodegraph.set_node_state(node, nodegraph.DONE)
                running.pop(node)

            if not (errors or changes):
                time.sleep(sleep_time)
                sleep_time = min(1, sleep_time * 2)
コード例 #4
0
def main(argv):
    config_args = parse_config(argv)
    if not config_args:
        return 1

    config, args = config_args

    try:
        ui.print_info("Building BAM pipeline ...", file = sys.stderr)
        makefiles = read_makefiles(args)
        if not makefiles:
            ui.print_err("Plase specify at least one makefile!", file = sys.stderr)
            return 1
    except MakefileError, e:
        ui.print_err("Error reading makefile:\n\t%s" % \
                         "\n\t".join(str(e).split("\n")),
                         file = sys.stderr)
        return 1
コード例 #5
0
ファイル: mkfile.py プロジェクト: schae234/pypeline
def main(argv):
    options, paths = parse_args(argv)
    records = {}
    for root in paths:
        if os.path.isdir(root):
            filename = os.path.join(root, _FILENAME)
        else:
            root, filename = os.path.split(root)[0], root

        if not os.path.exists(filename):
            ui.print_err("ERROR: Could not find SampleSheet file: %r" % filename)
            return 1

        for record in read_alignment_records(filename):
            libraries = records.setdefault(record["SampleID"], {})
            barcodes  = libraries.setdefault(record["Index"], [])

            record["Lane"] = int(record["Lane"])
            path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" % record
            record["Path"] = select_path(os.path.join(root, path))
            barcodes.append(record)

    _print_header(timestamp   = datetime.datetime.now().isoformat(),
                  full_mkfile = (os.path.basename(sys.argv[0]) != "trim_pipeline"),
                  sample_tmpl = not bool(records),
                  minimal     = options.minimal)
    for (sample, libraries) in records.iteritems():
        print "%s:" % sample
        print "  %s:" % sample
        for (library, barcodes) in libraries.iteritems():
            print "    %s:" % library
            for record in barcodes:
                print "      {FCID}_{Lane}: {Path}".format(**record)
            print
        print

    if not argv:
        ui.print_info("No directories specified, empty table printed:", file = sys.stderr)
        ui.print_info("\tUsage: %s [directory ...]" % sys.argv[0], file = sys.stderr)
        ui.print_info("Each directory must contain a '%s' file." % _FILENAME, file = sys.stderr)
    else:
        ui.print_info("Makefile printed. Please check for correctness before running pipeline.", file = sys.stderr)
    return 0
コード例 #6
0
def main(argv):
    parser = optparse.OptionParser()
    parser.add_option(
        "--jar-root",
        default=os.path.join(os.path.expanduser('~'), "install",
                             "picard-tools"),
        help="Folder containing Picard JARs (http://picard.sf.net)")
    options, args = parser.parse_args(argv)

    if len(args) != 2:
        ui.print_err("Usage: bam_pipeline move SRC DST")
        ui.print_err(
            "  where: SRC and DST are paths in the form TARGET/reads/SAMPLE/LIBRARY/LANE"
        )
        ui.print_err(
            "Note that the second folder of the path (here \"reads/\") is ignored."
        )

        return 1

    source = parse_args(args[0])
    destination = parse_args(args[1])

    move_reads(source, destination)
    move_bams(source, destination)
    retag_bams(options, source, destination)
    rm_files(source)
    rm_files(destination)
    print

    return 0
コード例 #7
0
ファイル: pipeline.py プロジェクト: schae234/pypeline
 def run(self, max_running = 6, dry_run = False, collapse = True, verbose = True):
     try:
         nodegraph = NodeGraph(self._nodes)
     except NodeGraphError, error:
         ui.print_err(error, file = sys.stderr)
         return False
コード例 #8
0
def parse_config(argv):
    config = ConfigParser.SafeConfigParser()
    config_paths = (os.path.join(os.path.expanduser('~'), ".pypeline.conf"),
                    "/etc/pypeline.conf")

    for config_path in config_paths:
        if os.path.exists(config_path):
            config.read(config_path)
            break

    try:
        defaults = dict(config.items("Defaults"))
    except ConfigParser.NoSectionError:
        defaults = {}

    parser = optparse.OptionParser()
    parser.add_option("--verbose", action = "store_true", default = defaults.get("verbose", False),
                      help = "Print the full dependency-tree every time a node is updated.")
    parser.add_option("--allow-missing-input-files", action = "store_true", default = False,
                      help = "Allow processing of lanes, even if the original input files are no-longer " \
                             "accesible, if for example a network drive is down. This option should be " \
                             "used with care!")

    group  = optparse.OptionGroup(parser, "Scheduling")
    group.add_option("--bowtie2-max-threads", type = int, default = defaults.get("bowtie2_max_threads", 4),
                     help = "Maximum number of threads to use per BWA instance [%default]")
    group.add_option("--bwa-max-threads", type = int, default = defaults.get("bwa_max_threads", 4),
                     help = "Maximum number of threads to use per BWA instance [%default]")
    group.add_option("--max-threads", type = int, default = defaults.get("max_threads", 14),
                     help = "Maximum number of threads to use in total [%default]")
    group.add_option("--dry-run", action = "store_true", default = False,
                     help = "If passed, only a dry-run in performed, the dependency tree is printed, and no tasks are executed.")
    parser.add_option_group(group)

    group  = optparse.OptionGroup(parser, "Required paths")
    group.add_option("--jar-root", default = os.path.expanduser(defaults.get("jar_root", os.path.join('~', "install", "jar_root"))),
                     help = "Folder containing Picard JARs (http://picard.sf.net), " \
                            "and GATK (www.broadinstitute.org/gatk). " \
                            "The latter is only required if realigning is enabled. " \
                            "[%default]")
    group.add_option("--temp-root", default = os.path.expanduser(defaults.get("temp_root", os.path.join('~', "scratch", "bam_pypeline"))),
                     help = "Location for temporary files and folders [%default/]")
    group.add_option("--destination", default = None,
                     help = "The destination folder for result files. By default, files will be "
                            "placed in the same folder as the makefile which generated it.")
    parser.add_option_group(group)

    group  = optparse.OptionGroup(parser, "Output files and orphan files")
    group.add_option("--list-output-files", action = "store_true", default = False,
                     help = "List all files generated by pipeline for the makefile(s).")
    group.add_option("--list-orphan-files", action = "store_true", default = False,
                     help = "List all files at destination not generated by the pipeline. " \
                            "Useful for cleaning up after making changes to a makefile.")
    parser.add_option_group(group)

    group  = optparse.OptionGroup(parser, "Targets")
    group.add_option("--target", dest = "targets", action = "append", default = [],
                     help = "Only execute nodes required to build specified target.")
    group.add_option("--list-targets", default = None,
                     help = "List all targets at a given resolution (target, sample, library, lane, reads)")
    parser.add_option_group(group)

    config, args = parser.parse_args(argv)

    config.targets = set(config.targets)
    targets_by_name = ("targets", "prefixes", "samples", "libraries", "lanes", "mapping", "trimming")
    if (config.list_targets is not None) and (config.list_targets not in targets_by_name):
        ui.print_err("ERROR: Invalid value for --list-targets (%s), valid values are '%s'." \
                     % (repr(config.list_targets), "', '".join(targets_by_name)), file = sys.stderr)
        return None

    if config.list_output_files and config.list_orphan_files:
        ui.print_err("ERROR: Both --list-output-files and --list-orphan-files set!", file = sys.stderr)
        return None

    if not os.path.exists(config.temp_root):
        try:
            os.makedirs(config.temp_root)
        except OSError, e:
            ui.print_err("ERROR: Could not create temp root:\n\t%s" % (e,), file = sys.stderr)
            return None
コード例 #9
0
                     % (repr(config.list_targets), "', '".join(targets_by_name)), file = sys.stderr)
        return None

    if config.list_output_files and config.list_orphan_files:
        ui.print_err("ERROR: Both --list-output-files and --list-orphan-files set!", file = sys.stderr)
        return None

    if not os.path.exists(config.temp_root):
        try:
            os.makedirs(config.temp_root)
        except OSError, e:
            ui.print_err("ERROR: Could not create temp root:\n\t%s" % (e,), file = sys.stderr)
            return None

    if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK):
        ui.print_err("ERROR: Insufficient permissions for temp root: '%s'" % config.temp_root, file = sys.stderr)
        return None

    return config, args


def walk_nodes(nodes, func, skip_nodes = None):
    if skip_nodes is None:
        skip_nodes = set()

    for node in nodes:
        if node in skip_nodes:
            continue
        elif not func(node):
            return False
コード例 #10
0
 def run(self, max_running=6, dry_run=False, collapse=True, verbose=True):
     try:
         nodegraph = NodeGraph(self._nodes)
     except NodeGraphError, error:
         ui.print_err(error, file=sys.stderr)
         return False
コード例 #11
0
class Pypeline:
    def __init__(self, config):
        self._nodes = []
        self._config = config

    def add_nodes(self, *nodes):
        for subnodes in safe_coerce_to_tuple(nodes):
            for node in safe_coerce_to_tuple(subnodes):
                if not isinstance(node, Node):
                    raise TypeError("Node object expected, recieved %s" %
                                    repr(node))
                self._nodes.append(node)

    def run(self, max_running=6, dry_run=False, collapse=True, verbose=True):
        try:
            nodegraph = NodeGraph(self._nodes)
        except NodeGraphError, error:
            ui.print_err(error, file=sys.stderr)
            return False

        # calculate remaining nodes
        remaining = set(nodegraph.iterflat())
        for node in remaining:
            if node.threads > max_running:
                ui.print_err("Node requires more threads than the maximum allowed:\n\t%s" \
                             % str(node), file = sys.stderr)
                return False

        if dry_run:
            ui.print_node_tree(nodegraph, collapse)
            ui.print_info("Dry run done ...", file=sys.stderr)
            return True

        running = {}
        interrupted_once = errors = has_refreshed = has_started_any = False
        pool = multiprocessing.Pool(max_running, _init_worker)
        # Run node commands
        while running or remaining:
            try:
                errors |= not self._poll_running_nodes(running, nodegraph)
                if not interrupted_once:  # Prevent starting of new nodes
                    if self._start_new_tasks(remaining, running, nodegraph,
                                             max_running, pool):
                        has_started_any = True
                        has_refreshed = False
                    elif has_started_any and not has_refreshed:
                        # Double-check that everything is in order
                        remaining = set(nodegraph.iterflat())
                        nodegraph.refresh_states()
                        has_refreshed = True

                if running:
                    ui.print_node_tree(nodegraph, collapse, verbose)
            except KeyboardInterrupt:
                if interrupted_once:
                    ui.print_err("\nTerminating now!\n", file=sys.stderr)
                    pool.terminate()
                    pool.join()
                    return False

                remaining, interrupted_once = set(), True
                ui.print_err(
                    "\nKeyboard interrupt detected, waiting for current tasks to complete ...",
                    file=sys.stderr)
                ui.print_err("\t- Press CTRL-C again to force termination.\n",
                             file=sys.stderr)

        ui.print_node_tree(nodegraph, collapse)
        pool.close()
        pool.join()

        if errors:
            ui.print_err("Errors were detected ...", file=sys.stderr)
        ui.print_msg("Done ...", file=sys.stderr)
        return not errors