def _poll_running_nodes(cls, running, nodegraph): sleep_time = 0.05 changes = errors = False while running and not (errors or changes): for (node, proc) in running.items(): if not proc.ready(): continue changes = True try: proc.get() except (KeyboardInterrupt, SystemExit): raise except Exception, errors: nodegraph.set_node_state(node, nodegraph.ERROR) running.pop(node) ui.print_err("%s: Error occurred running command:\n%s\n" \ % (node, "\n".join(("\t" + line) for line in str(errors).strip().split("\n"))), file = sys.stderr) continue nodegraph.set_node_state(node, nodegraph.DONE) running.pop(node) if not (errors or changes): time.sleep(sleep_time) sleep_time = min(1, sleep_time * 2)
def _update_node_state(self, node): if node in self._states: return self._states[node] # Update sub-nodes before checking for fixed states state = NodeGraph.DONE for subnode in (node.subnodes | node.dependencies): state = max(state, self._update_node_state(subnode)) try: if isinstance(node, MetaNode): if state in (NodeGraph.RUNNING, NodeGraph.RUNABLE): state = NodeGraph.QUEUED elif state == NodeGraph.DONE: if not node.is_done or node.is_outdated: state = NodeGraph.RUNABLE elif state in (NodeGraph.RUNNING, NodeGraph.RUNABLE, NodeGraph.QUEUED): if node.is_done: state = NodeGraph.OUTDATED else: state = NodeGraph.QUEUED except OSError, error: # Typically hapens if base input files are removed, causing a node that # 'is_done' to call modified_after on missing files in 'is_outdated' ui.print_err("OSError checking state of Node: %s" % error, file = sys.stderr) state = NodeGraph.ERROR
def main(argv): config_args = parse_config(argv) if not config_args: return 1 config, args = config_args try: ui.print_info("Building BAM pipeline ...", file = sys.stderr) makefiles = read_makefiles(args) if not makefiles: ui.print_err("Plase specify at least one makefile!", file = sys.stderr) return 1 except MakefileError, e: ui.print_err("Error reading makefile:\n\t%s" % \ "\n\t".join(str(e).split("\n")), file = sys.stderr) return 1
def main(argv): options, paths = parse_args(argv) records = {} for root in paths: if os.path.isdir(root): filename = os.path.join(root, _FILENAME) else: root, filename = os.path.split(root)[0], root if not os.path.exists(filename): ui.print_err("ERROR: Could not find SampleSheet file: %r" % filename) return 1 for record in read_alignment_records(filename): libraries = records.setdefault(record["SampleID"], {}) barcodes = libraries.setdefault(record["Index"], []) record["Lane"] = int(record["Lane"]) path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" % record record["Path"] = select_path(os.path.join(root, path)) barcodes.append(record) _print_header(timestamp = datetime.datetime.now().isoformat(), full_mkfile = (os.path.basename(sys.argv[0]) != "trim_pipeline"), sample_tmpl = not bool(records), minimal = options.minimal) for (sample, libraries) in records.iteritems(): print "%s:" % sample print " %s:" % sample for (library, barcodes) in libraries.iteritems(): print " %s:" % library for record in barcodes: print " {FCID}_{Lane}: {Path}".format(**record) print print if not argv: ui.print_info("No directories specified, empty table printed:", file = sys.stderr) ui.print_info("\tUsage: %s [directory ...]" % sys.argv[0], file = sys.stderr) ui.print_info("Each directory must contain a '%s' file." % _FILENAME, file = sys.stderr) else: ui.print_info("Makefile printed. Please check for correctness before running pipeline.", file = sys.stderr) return 0
def main(argv): parser = optparse.OptionParser() parser.add_option( "--jar-root", default=os.path.join(os.path.expanduser('~'), "install", "picard-tools"), help="Folder containing Picard JARs (http://picard.sf.net)") options, args = parser.parse_args(argv) if len(args) != 2: ui.print_err("Usage: bam_pipeline move SRC DST") ui.print_err( " where: SRC and DST are paths in the form TARGET/reads/SAMPLE/LIBRARY/LANE" ) ui.print_err( "Note that the second folder of the path (here \"reads/\") is ignored." ) return 1 source = parse_args(args[0]) destination = parse_args(args[1]) move_reads(source, destination) move_bams(source, destination) retag_bams(options, source, destination) rm_files(source) rm_files(destination) print return 0
def run(self, max_running = 6, dry_run = False, collapse = True, verbose = True): try: nodegraph = NodeGraph(self._nodes) except NodeGraphError, error: ui.print_err(error, file = sys.stderr) return False
def parse_config(argv): config = ConfigParser.SafeConfigParser() config_paths = (os.path.join(os.path.expanduser('~'), ".pypeline.conf"), "/etc/pypeline.conf") for config_path in config_paths: if os.path.exists(config_path): config.read(config_path) break try: defaults = dict(config.items("Defaults")) except ConfigParser.NoSectionError: defaults = {} parser = optparse.OptionParser() parser.add_option("--verbose", action = "store_true", default = defaults.get("verbose", False), help = "Print the full dependency-tree every time a node is updated.") parser.add_option("--allow-missing-input-files", action = "store_true", default = False, help = "Allow processing of lanes, even if the original input files are no-longer " \ "accesible, if for example a network drive is down. This option should be " \ "used with care!") group = optparse.OptionGroup(parser, "Scheduling") group.add_option("--bowtie2-max-threads", type = int, default = defaults.get("bowtie2_max_threads", 4), help = "Maximum number of threads to use per BWA instance [%default]") group.add_option("--bwa-max-threads", type = int, default = defaults.get("bwa_max_threads", 4), help = "Maximum number of threads to use per BWA instance [%default]") group.add_option("--max-threads", type = int, default = defaults.get("max_threads", 14), help = "Maximum number of threads to use in total [%default]") group.add_option("--dry-run", action = "store_true", default = False, help = "If passed, only a dry-run in performed, the dependency tree is printed, and no tasks are executed.") parser.add_option_group(group) group = optparse.OptionGroup(parser, "Required paths") group.add_option("--jar-root", default = os.path.expanduser(defaults.get("jar_root", os.path.join('~', "install", "jar_root"))), help = "Folder containing Picard JARs (http://picard.sf.net), " \ "and GATK (www.broadinstitute.org/gatk). " \ "The latter is only required if realigning is enabled. " \ "[%default]") group.add_option("--temp-root", default = os.path.expanduser(defaults.get("temp_root", os.path.join('~', "scratch", "bam_pypeline"))), help = "Location for temporary files and folders [%default/]") group.add_option("--destination", default = None, help = "The destination folder for result files. By default, files will be " "placed in the same folder as the makefile which generated it.") parser.add_option_group(group) group = optparse.OptionGroup(parser, "Output files and orphan files") group.add_option("--list-output-files", action = "store_true", default = False, help = "List all files generated by pipeline for the makefile(s).") group.add_option("--list-orphan-files", action = "store_true", default = False, help = "List all files at destination not generated by the pipeline. " \ "Useful for cleaning up after making changes to a makefile.") parser.add_option_group(group) group = optparse.OptionGroup(parser, "Targets") group.add_option("--target", dest = "targets", action = "append", default = [], help = "Only execute nodes required to build specified target.") group.add_option("--list-targets", default = None, help = "List all targets at a given resolution (target, sample, library, lane, reads)") parser.add_option_group(group) config, args = parser.parse_args(argv) config.targets = set(config.targets) targets_by_name = ("targets", "prefixes", "samples", "libraries", "lanes", "mapping", "trimming") if (config.list_targets is not None) and (config.list_targets not in targets_by_name): ui.print_err("ERROR: Invalid value for --list-targets (%s), valid values are '%s'." \ % (repr(config.list_targets), "', '".join(targets_by_name)), file = sys.stderr) return None if config.list_output_files and config.list_orphan_files: ui.print_err("ERROR: Both --list-output-files and --list-orphan-files set!", file = sys.stderr) return None if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, e: ui.print_err("ERROR: Could not create temp root:\n\t%s" % (e,), file = sys.stderr) return None
% (repr(config.list_targets), "', '".join(targets_by_name)), file = sys.stderr) return None if config.list_output_files and config.list_orphan_files: ui.print_err("ERROR: Both --list-output-files and --list-orphan-files set!", file = sys.stderr) return None if not os.path.exists(config.temp_root): try: os.makedirs(config.temp_root) except OSError, e: ui.print_err("ERROR: Could not create temp root:\n\t%s" % (e,), file = sys.stderr) return None if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): ui.print_err("ERROR: Insufficient permissions for temp root: '%s'" % config.temp_root, file = sys.stderr) return None return config, args def walk_nodes(nodes, func, skip_nodes = None): if skip_nodes is None: skip_nodes = set() for node in nodes: if node in skip_nodes: continue elif not func(node): return False
def run(self, max_running=6, dry_run=False, collapse=True, verbose=True): try: nodegraph = NodeGraph(self._nodes) except NodeGraphError, error: ui.print_err(error, file=sys.stderr) return False
class Pypeline: def __init__(self, config): self._nodes = [] self._config = config def add_nodes(self, *nodes): for subnodes in safe_coerce_to_tuple(nodes): for node in safe_coerce_to_tuple(subnodes): if not isinstance(node, Node): raise TypeError("Node object expected, recieved %s" % repr(node)) self._nodes.append(node) def run(self, max_running=6, dry_run=False, collapse=True, verbose=True): try: nodegraph = NodeGraph(self._nodes) except NodeGraphError, error: ui.print_err(error, file=sys.stderr) return False # calculate remaining nodes remaining = set(nodegraph.iterflat()) for node in remaining: if node.threads > max_running: ui.print_err("Node requires more threads than the maximum allowed:\n\t%s" \ % str(node), file = sys.stderr) return False if dry_run: ui.print_node_tree(nodegraph, collapse) ui.print_info("Dry run done ...", file=sys.stderr) return True running = {} interrupted_once = errors = has_refreshed = has_started_any = False pool = multiprocessing.Pool(max_running, _init_worker) # Run node commands while running or remaining: try: errors |= not self._poll_running_nodes(running, nodegraph) if not interrupted_once: # Prevent starting of new nodes if self._start_new_tasks(remaining, running, nodegraph, max_running, pool): has_started_any = True has_refreshed = False elif has_started_any and not has_refreshed: # Double-check that everything is in order remaining = set(nodegraph.iterflat()) nodegraph.refresh_states() has_refreshed = True if running: ui.print_node_tree(nodegraph, collapse, verbose) except KeyboardInterrupt: if interrupted_once: ui.print_err("\nTerminating now!\n", file=sys.stderr) pool.terminate() pool.join() return False remaining, interrupted_once = set(), True ui.print_err( "\nKeyboard interrupt detected, waiting for current tasks to complete ...", file=sys.stderr) ui.print_err("\t- Press CTRL-C again to force termination.\n", file=sys.stderr) ui.print_node_tree(nodegraph, collapse) pool.close() pool.join() if errors: ui.print_err("Errors were detected ...", file=sys.stderr) ui.print_msg("Done ...", file=sys.stderr) return not errors