def _update_and_check_max_read_depth(options, mkfile): if any(subdd["VCF_Filter"]["MaxReadDepth"] == "auto" for subdd in mkfile["Genotyping"].itervalues()): print_info(" - Determinining max-depth from depth-histograms ...") for (key, settings) in mkfile["Genotyping"].iteritems(): required_keys = set() for sample in mkfile["Project"]["Samples"].itervalues(): if sample["GenotypingMethod"].lower() == "samtools": required_keys.add(sample["Name"]) max_depths = settings["VCF_Filter"]["MaxReadDepth"] if isinstance(max_depths, types.DictType): # Extra keys are allowed, to make it easier # to temporarily disable a sample missing_keys = required_keys - set(max_depths) if missing_keys: missing_keys = "\n - ".join(sorted(missing_keys)) message = "MaxReadDepth not specified for the following " \ "samples for %r:\n - %s" % (key, missing_keys) raise MakefileError(message) elif isinstance(max_depths, types.StringTypes): assert max_depths.lower() == "auto", max_depths prefix = mkfile["Project"]["Regions"][key]["Prefix"] settings["VCF_Filter"]["MaxReadDepth"] \ = _read_max_depths(options, prefix, required_keys) else: max_depths = dict.fromkeys(required_keys, max_depths) settings["VCF_Filter"]["MaxReadDepth"] = max_depths
def flush(self): """See BaseUI.flush.""" if BaseUI.flush(self) and self._running_nodes: self._print_header(self.states, self.threads) for node in sorted(map(str, self._running_nodes)): print_info(" - %s" % (node,), file=sys.stdout) print_info(file=sys.stdout)
def run_admix_pipeline(config): print_info("\nBuilding %i Zonkey pipeline(s):" % (len(config.samples),)) config.temp_root = os.path.join(config.destination, "temp") if not config.dry_run: fileutils.make_dirs(config.temp_root) cache = {} nodes = [] items = config.samples.iteritems() for idx, (name, sample) in enumerate(sorted(items), start=1): root = sample["Root"] nuc_bam = sample["Files"].get("Nuc") mito_bam = sample["Files"].get("Mito") genomes = [] if mito_bam: genomes.append("MT") if nuc_bam: genomes.append("Nuclear") print_info(" %i. %s: %s DNA" % (idx, name, ' and '.join(genomes))) nodes.extend(build_pipeline(config, root, nuc_bam, mito_bam, cache)) if config.multisample and not config.admixture_only: nodes = [summary.SummaryNode(config, nodes)] if not run_pipeline(config, nodes, "\nRunning Zonkey:"): return 1
def finalize(self): """Called by the pipeline at the termination of a run. By default, this function prints the location of the log-file if one was created during the run (e.g. if there were errors), and a summary of all nodes. """ runtime = (self._end_time or 0) - (self.start_time or 0) if self.states[self.ERROR]: print_err("Done; but errors were detected ...") else: print_info("Done ...") print_info() rows = [(" Number of nodes:", sum(self.states)), (" Number of done nodes:", self.states[self.DONE]), (" Number of runable nodes:", self.states[self.RUNABLE]), (" Number of queued nodes:", self.states[self.QUEUED]), (" Number of outdated nodes:", self.states[self.OUTDATED]), (" Number of failed nodes:", self.states[self.ERROR]), (" Pipeline runtime:", _fmt_runtime(runtime))] for line in text.padded_table(rows): print_info(line) print_info("\nUse --list-output-files to view status of output files.") logfile = paleomix.logger.get_logfile() if logfile: print_debug("Log-file located at %r" % (logfile, )) print_info()
def flush(self): """See BaseUI.flush.""" if BaseUI.flush(self) and self._running_nodes: self._print_header() for node in sorted(map(str, self._running_nodes)): print_info(" - %s" % (node, ), file=sys.stdout) print_info(file=sys.stdout)
def setup_example(config): root = os.path.join(config.destination, 'zonkey_pipeline') with tarfile.TarFile(config.tablefile) as tar_handle: example_files = [] existing_files = [] for member in tar_handle.getmembers(): if os.path.dirname(member.name) == 'examples' and member.isfile(): example_files.append(member) destination = fileutils.reroot_path(root, member.name) if os.path.exists(destination): existing_files.append(destination) if existing_files: print_err("Output files already exist at destination:\n - %s" % ("\n - ".join(map(repr, existing_files)))) return 1 elif not example_files: print_err("Sample database %r does not contain example data; " "cannot proceed." % (config.tablefile,)) return 1 if not os.path.exists(root): fileutils.make_dirs(root) for member in example_files: destination = fileutils.reroot_path(root, member.name) src_handle = tar_handle.extractfile(member) with open(destination, 'w') as out_handle: shutil.copyfileobj(src_handle, out_handle) print_info("Sucessfully saved example data in %r" % (root,)) return 0
def finalize(self): """Called by the pipeline at the termination of a run. By default, this function prints the location of the log-file if one was created during the run (e.g. if there were errors), and a summary of all nodes. """ runtime = (self._end_time or 0) - (self._start_time or 0) if self.states[self.ERROR]: print_err("Done; but errors were detected ...") else: print_info("Done ...") print_info() rows = [(" Number of nodes:", sum(self.states)), (" Number of done nodes:", self.states[self.DONE]), (" Number of runable nodes:", self.states[self.RUNABLE]), (" Number of queued nodes:", self.states[self.QUEUED]), (" Number of outdated nodes:", self.states[self.OUTDATED]), (" Number of failed nodes:", self.states[self.ERROR]), (" Pipeline runtime:", _fmt_runtime(round(runtime)))] for line in text.padded_table(rows): print_info(line) print_info("\nUse --list-output-files to view status of output files.") logfile = paleomix.logger.get_logfile() if logfile: print_debug("Log-file located at %r" % (logfile,)) print_info()
def build_pipeline_full(config, makefile, return_nodes=True): result = [] features = makefile["Options"]["Features"] for (target_name, sample_records) in makefile["Targets"].iteritems(): print_info(".", end='') prefixes = [] for (_, prefix) in makefile["Prefixes"].iteritems(): samples = [] for (sample_name, library_records) in sample_records.iteritems(): libraries = [] for (library_name, barcode_records) in library_records.iteritems(): lanes = [] for (barcode, record) in barcode_records.iteritems(): lane = parts.Lane(config, prefix, record, barcode) # ExcludeReads settings may exlude entire lanes if lane.bams: lanes.append(lane) if lanes: libraries.append( parts.Library(config=config, target=target_name, prefix=prefix, lanes=lanes, name=library_name)) if libraries: samples.append( parts.Sample(config=config, prefix=prefix, libraries=libraries, name=sample_name)) if samples: prefixes.append( parts.Prefix(config=config, prefix=prefix, samples=samples, features=features, target=target_name)) if prefixes: target = parts.Target(config, prefixes, target_name) # Construct coverage, depth-histogram, and summary nodes, etc. parts.add_statistics_nodes(config, makefile, target) if return_nodes: # Extra tasks (e.g. coverage, depth-histograms, etc.) result.extend(target.nodes) # Output BAM files (raw, realigned) result.extend(target.bams.itervalues()) else: result.append(target) return result
def read_makefiles(options, filenames, commands): print_info("Reading makefile(s):") steps = frozenset(key for (key, _) in commands) makefiles = [] for filename in filenames: makefile = paleomix.common.makefile.read_makefile(filename, _VALIDATION) makefile = _mangle_makefile(options, makefile["Makefile"], steps) makefiles.append(makefile) return makefiles
def _collect_fasta_contigs(filename, cache={}): if filename in cache: return cache[filename] if not os.path.exists(filename + ".fai"): print_info(" - Index does not exist for %r; this may " "take a while ..." % (filename,)) cache[filename] = contigs = dict(FASTA.index_and_collect_contigs(filename)) return contigs
def _validate_prefixes(makefiles): """Validates prefixes and regions-of-interest, including an implementation of the checks included in GATK, which require that the FASTA for the human genome is ordered 1 .. 23. This is required since GATK will not run with human genomes in a different order. """ already_validated = {} print_info(" - Validating prefixes ...") for makefile in makefiles: uses_gatk = makefile["Options"]["Features"]["RealignedBAM"] for prefix in makefile["Prefixes"].itervalues(): path = prefix["Path"] if path in already_validated: prefix["IndexFormat"] = already_validated[path]["IndexFormat"] continue # Must be set to a valid value, even if FASTA file does not exist prefix["IndexFormat"] = ".bai" if not os.path.exists(path): print_warn(" - Reference FASTA file does not exist:\n" " %r" % (path, )) continue elif not os.path.exists(path + ".fai"): print_info(" - Index does not exist for %r; this may " "take a while ..." % (path, )) try: contigs = FASTA.index_and_collect_contigs(path) except FASTAError, error: raise MakefileError("Error indexing FASTA:\n %s" % (error, )) # Implementation of GATK checks for the human genome _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk) contigs = dict(contigs) regions_of_interest = prefix.get("RegionsOfInterest", {}) for (name, fpath) in regions_of_interest.iteritems(): try: # read_bed_file returns iterator for _ in bedtools.read_bed_file(fpath, contigs=contigs): pass except (bedtools.BEDError, IOError), error: raise MakefileError("Error reading regions-of-" "interest %r for prefix %r:\n%s" % (name, prefix["Name"], error)) if max(contigs.itervalues()) > _BAM_MAX_SEQUENCE_LENGTH: print_warn(" - FASTA file %r contains sequences longer " "than %i! CSI index files will be used instead " "of BAI index files." % (path, _BAM_MAX_SEQUENCE_LENGTH)) prefix["IndexFormat"] = ".csi" already_validated[path] = prefix
def _validate_prefixes(makefiles): """Validates prefixes and regions-of-interest, including an implementation of the checks included in GATK, which require that the FASTA for the human genome is ordered 1 .. 23. This is required since GATK will not run with human genomes in a different order. """ already_validated = {} print_info(" - Validating prefixes ...") for makefile in makefiles: uses_gatk = makefile["Options"]["Features"]["RealignedBAM"] for prefix in makefile["Prefixes"].itervalues(): path = prefix["Path"] if path in already_validated: prefix["IndexFormat"] = already_validated[path]["IndexFormat"] continue # Must be set to a valid value, even if FASTA file does not exist prefix["IndexFormat"] = ".bai" if not os.path.exists(path): print_warn(" - Reference FASTA file does not exist:\n" " %r" % (path,)) continue elif not os.path.exists(path + ".fai"): print_info(" - Index does not exist for %r; this may " "take a while ..." % (path,)) try: contigs = FASTA.index_and_collect_contigs(path) except FASTAError, error: raise MakefileError("Error indexing FASTA:\n %s" % (error,)) # Implementation of GATK checks for the human genome _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk) contigs = dict(contigs) regions_of_interest = prefix.get("RegionsOfInterest", {}) for (name, fpath) in regions_of_interest.iteritems(): try: # read_bed_file returns iterator for _ in bedtools.read_bed_file(fpath, contigs=contigs): pass except (bedtools.BEDError, IOError), error: raise MakefileError("Error reading regions-of-" "interest %r for prefix %r:\n%s" % (name, prefix["Name"], error)) if max(contigs.itervalues()) > _BAM_MAX_SEQUENCE_LENGTH: print_warn(" - FASTA file %r contains sequences longer " "than %i! CSI index files will be used instead " "of BAI index files." % (path, _BAM_MAX_SEQUENCE_LENGTH)) prefix["IndexFormat"] = ".csi" already_validated[path] = prefix
def build_pipeline_full(config, makefile, return_nodes=True): result = [] features = makefile["Options"]["Features"] for (target_name, sample_records) in makefile["Targets"].iteritems(): print_info(".", end='') prefixes = [] for (_, prefix) in makefile["Prefixes"].iteritems(): samples = [] for (sample_name, library_records) in sample_records.iteritems(): libraries = [] for (library_name, barcode_records) in library_records.iteritems(): lanes = [] for (barcode, record) in barcode_records.iteritems(): lane = parts.Lane(config, prefix, record, barcode) # ExcludeReads settings may exlude entire lanes if lane.bams: lanes.append(lane) if lanes: libraries.append(parts.Library(config=config, target=target_name, prefix=prefix, lanes=lanes, name=library_name)) if libraries: samples.append(parts.Sample(config=config, prefix=prefix, libraries=libraries, name=sample_name)) if samples: prefixes.append(parts.Prefix(config=config, prefix=prefix, samples=samples, features=features, target=target_name)) if prefixes: target = parts.Target(config, prefixes, target_name) # Construct coverage, depth-histogram, and summary nodes, etc. parts.add_statistics_nodes(config, makefile, target) if return_nodes: # Extra tasks (e.g. coverage, depth-histograms, etc.) result.extend(target.nodes) # Output BAM files (raw, realigned) result.extend(target.bams.itervalues()) else: result.append(target) return result
def _print_usage(pipeline): basename = "%s_pipeline" % (pipeline,) usage = \ "BAM Pipeline v{version}\n" \ "Usage:\n" \ " -- {cmd} help -- Display this message.\n" \ " -- {cmd} example [...] -- Create example project.\n" \ " -- {cmd} makefile [...] -- Print makefile template.\n" \ " -- {cmd} dryrun [...] -- Perform dry run of pipeline.\n" \ " -- {cmd} run [...] -- Run pipeline on provided makefiles.\n" \ " -- {cmd} remap [...] -- Re-map hits from previous alignment." print_info(usage.format(version=paleomix.__version__, cmd=basename, pad=" " * len(basename)))
def _print_usage(pipeline): basename = "%s_pipeline" % (pipeline, ) usage = \ "BAM Pipeline v{version}\n" \ "Usage:\n" \ " -- {cmd} help -- Display this message.\n" \ " -- {cmd} example [...] -- Create example project.\n" \ " -- {cmd} makefile [...] -- Print makefile template.\n" \ " -- {cmd} dryrun [...] -- Perform dry run of pipeline.\n" \ " -- {cmd} run [...] -- Run pipeline on provided makefiles.\n" \ " -- {cmd} remap [...] -- Re-map hits from previous alignment." print_info( usage.format(version=paleomix.__version__, cmd=basename, pad=" " * len(basename)))
def main(argv, pipeline="bam"): assert pipeline in ("bam", "trim"), pipeline options, paths = parse_args(argv) records = {} for root in paths: if os.path.isdir(root): filename = os.path.join(root, _FILENAME) else: root, filename = os.path.split(root)[0], root if not os.path.exists(filename): print_err("ERROR: Could not find SampleSheet file: %r" % filename) return 1 for record in read_alignment_records(filename): libraries = records.setdefault(record["SampleID"], {}) barcodes = libraries.setdefault(record["Index"], []) record["Lane"] = int(record["Lane"]) path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" \ % record record["Path"] = select_path(os.path.join(root, path)) barcodes.append(record) template = build_makefile(add_full_options=(pipeline == "bam"), add_prefix_tmpl=(pipeline == "bam")) if options.minimal: template = strip_comments(template) print(template) for (sample, libraries) in records.iteritems(): print("%s:" % sample) print(" %s:" % sample) for (library, barcodes) in libraries.iteritems(): print(" %s:" % library) for record in barcodes: print(" {FCID}_{Lane}: {Path}".format(**record)) print() print() if argv: print_info("Automatically generated makefile printed.\n" "Please check for correctness before running pipeline.") return 0
def build_pipeline_trimming(config, makefile): """Builds only the nodes required to produce trimmed reads. This reduces the required complexity of the makefile to a minimum.""" nodes = [] for (_, samples) in makefile["Targets"].iteritems(): print_info(".", end='') for libraries in samples.itervalues(): for barcodes in libraries.itervalues(): for record in barcodes.itervalues(): if record["Type"] in ("Raw", "Trimmed"): offset = record["Options"]["QualityOffset"] reads = Reads(config, record, offset) nodes.extend(reads.nodes) return nodes
def _update_regions(options, mkfile): print_info(" - Validating regions of interest ...") mkfile["Project"]["Regions"] = mkfile["Project"].pop("RegionsOfInterest") if not mkfile["Project"]["Regions"]: raise MakefileError('No regions of interest have been specified; ' 'no analyses will be performed.') for (name, subdd) in mkfile["Project"]["Regions"].iteritems(): if "Prefix" not in subdd: raise MakefileError("No genome specified for regions %r" % (name,)) subdd["Name"] = name subdd["Desc"] = "{Prefix}.{Name}".format(**subdd) subdd["BED"] = os.path.join(options.regions_root, subdd["Desc"] + ".bed") subdd["FASTA"] = os.path.join(options.prefix_root, subdd["Prefix"] + ".fasta") required_files = ( ("Regions file", subdd["BED"]), ("Reference sequence", subdd["FASTA"]), ) for (desc, path) in required_files: if not os.path.isfile(path): raise MakefileError("%s does not exist for %r:\n Path = %r" % (desc, name, path)) # Collects seq. names / validate regions try: sequences = _collect_sequence_names(bed_file=subdd["BED"], fasta_file=subdd["FASTA"]) except (IOError, BEDError), error: raise MakefileError("Error reading regions-of-interest %r:\n%s" % (name, error)) subdd["Sequences"] = {None: sequences} subdd["SubsetFiles"] = {None: ()} sampledd = subdd["Genotypes"] = {} for sample_name in mkfile["Project"]["Samples"]: fasta_file = ".".join((sample_name, subdd["Desc"], "fasta")) sampledd[sample_name] = os.path.join(options.destination, mkfile["Project"]["Title"], "genotypes", fasta_file)
def process_key_presses(self, nodegraph, max_threads, ui): if not self._tty_settings: return max_threads help_printed = False old_max_threads = max_threads while self.poll_stdin(): character = sys.stdin.read(1) if character == "+": max_threads = min(multiprocessing.cpu_count(), max_threads + 1) elif character == "-": max_threads = max(1, max_threads - 1) elif character in "lL": print_info(file=sys.stdout) progress_printer = RunningUI() progress_printer.max_threads = max_threads progress_printer.start_time = ui.start_time progress_printer.refresh(nodegraph) progress_printer.flush() elif character in "hH": if help_printed: continue help_printed = True print_info(""" Commands: Key Function h Prints this message. l Lists the currently runnning nodes. + Increases the maximum number of threads by one. - Decreases the maximum number of threads by one; already running tasks are NOT terminated if the number of threads currently used exceeds the resulting maximum. """, file=sys.stdout) else: continue if max_threads != old_max_threads: print_debug("Maximum number of threads changed from %i to %i." % (old_max_threads, max_threads), file=sys.stdout) return max_threads
def _check_bam_sequences(options, mkfile, steps): """Check that the BAM files contains the reference sequences found in the FASTA file, matched by name and length; extra sequences are permitted. This check is only done if genotyping is to be carried out, to reduce the overhead of reading the BAM file headers. """ if ("genotype" not in steps) and ("genotyping" not in steps): return print_info(" - Validating BAM files ...") bam_files = {} for regions in mkfile["Project"]["Regions"].itervalues(): for sample in mkfile["Project"]["Samples"].itervalues(): filename = os.path.join(options.samples_root, "%s.%s.bam" % (sample["Name"], regions["Prefix"])) if regions["Realigned"]: filename = add_postfix(filename, ".realigned") if os.path.exists(filename): bam_files[filename] = _collect_fasta_contigs(regions["FASTA"]) for (filename, contigs) in bam_files.iteritems(): with pysam.Samfile(filename) as handle: bam_contigs = dict(zip(handle.references, handle.lengths)) for (contig, length) in contigs.iteritems(): bam_length = bam_contigs.get(contig) if bam_length is None: message = ("Reference sequence missing from BAM file; " "BAM file aligned against different prefix?\n" " BAM file = %s\n Sequence name = %s") \ % (filename, contig) raise MakefileError(message) elif bam_length != length: message = ("Length of reference sequence in FASTA differs " "from length of sequence in BAM file; BAM file " "aligned against different prefix?\n" " BAM file = %s\n" " Length in FASTA = %s\n" " Length in BAM = %s") \ % (filename, length, bam_length) raise MakefileError(message)
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename, )) sys.exit(0)
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename,)) sys.exit(0)
def parse_config(argv): migrate_config() options, args = _run_config_parser(argv) paleomix.ui.set_ui_colors(options.ui_colors) if args and args[0] in ("example", "examples"): return options, args elif (len(args) < 2) and (args != ["mkfile"] and args != ["makefile"]): description = _DESCRIPTION.replace("%prog", "phylo_pipeline").strip() console.print_info("Phylogeny Pipeline v%s\n" % (paleomix.__version__,)) console.print_info(description) return options, args commands = select_commands(args[0] if args else ()) if any((func is None) for (_, func) in commands): unknown_commands = ", ".join(repr(key) for (key, func) in commands if func is None) raise ConfigError("Unknown analysis step(s): %s" % (unknown_commands,)) return options, args
def _update_regions(options, mkfile): print_info(" - Validating regions of interest ...") mkfile["Project"]["Regions"] = mkfile["Project"].pop("RegionsOfInterest") for (name, subdd) in mkfile["Project"]["Regions"].iteritems(): if "Prefix" not in subdd: raise MakefileError("No genome specified for regions %r" % (name,)) subdd["Name"] = name subdd["Desc"] = "{Prefix}.{Name}".format(**subdd) subdd["BED"] = os.path.join(options.regions_root, subdd["Desc"] + ".bed") subdd["FASTA"] = os.path.join(options.prefix_root, subdd["Prefix"] + ".fasta") required_files = ( ("Regions file", subdd["BED"]), ("Reference sequence", subdd["FASTA"]), ) for (desc, path) in required_files: if not os.path.isfile(path): raise MakefileError("%s does not exist for %r:\n Path = %r" % (desc, name, path)) # Collects seq. names / validate regions try: sequences = _collect_sequence_names(bed_file=subdd["BED"], fasta_file=subdd["FASTA"]) except (IOError, BEDError), error: raise MakefileError("Error reading regions-of-interest %r:\n%s" % (name, error)) subdd["Sequences"] = {None: sequences} subdd["SubsetFiles"] = {None: ()} sampledd = subdd["Genotypes"] = {} for sample_name in mkfile["Project"]["Samples"]: fasta_file = ".".join((sample_name, subdd["Desc"], "fasta")) sampledd[sample_name] = os.path.join(options.destination, mkfile["Project"]["Title"], "genotypes", fasta_file)
def main(argv, pipeline="bam"): assert pipeline in ("bam", "trim"), pipeline options, filenames = parse_args(argv) records = read_sample_sheets(filenames) if records is None: return 1 template = build_makefile(add_full_options=(pipeline == "bam"), add_prefix_tmpl=(pipeline == "bam"), add_sample_tmpl=not records) if options.minimal: template = strip_comments(template) print(template) print_samples(records) if argv: print_info("Automatically generated makefile printed.\n" "Please check for correctness before running pipeline.") return 0
if max_depth is None: raise MakefileError("MaxDepth for %r not found in depth-histogram: %r" % (sample, filename)) elif max_depth == "NA": raise MakefileError("MaxDepth is not calculated for sample %r; " "cannot determine MaxDepth values automatically." % (filename,)) elif not max_depth.isdigit(): raise MakefileError("MaxDepth is not a valid for sample %r in %r; " "expected integer, found %r." % (sample, filename, max_depth)) max_depth = int(max_depth) print_info(" - %s.%s = %i" % (sample, prefix, max_depth)) _DEPTHS_CACHE[filename] = max_depth return max_depth _DEPTHS_CACHE = {} def _check_indels_and_msa(mkfile): msa = mkfile["MultipleSequenceAlignment"] regions = mkfile["Project"]["Regions"] for (name, subdd) in regions.iteritems(): msa_enabled = msa[name]["Enabled"] if subdd["IncludeIndels"] and not msa_enabled: raise MakefileError("Regions %r includes indels, but MSA is disabled!" % (name,))
try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error, )) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'" % (config.temp_root, )) return 1 # Init worker-threads before reading in any more data pipeline = Pypeline(config) try: print_info("Reading makefiles ...") makefiles = read_makefiles(config, args, pipeline_variant) except (MakefileError, paleomix.yaml.YAMLError, IOError), error: print_err("Error reading makefiles:", "\n %s:\n " % (error.__class__.__name__, ), "\n ".join(str(error).split("\n"))) return 1 logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log") paleomix.logger.initialize(config, logfile_template) logger = logging.getLogger(__name__) pipeline_func = build_pipeline_trimming if pipeline_variant == "bam": # Build .fai files for reference .fasta files index_references(config, makefiles)
try: os.makedirs(config.temp_root) except OSError, error: print_err("ERROR: Could not create temp root:\n\t%s" % (error,)) return 1 if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK): print_err("ERROR: Insufficient permissions for temp root: '%s'" % (config.temp_root,)) return 1 # Init worker-threads before reading in any more data pipeline = Pypeline(config) try: print_info("Reading makefiles ...") makefiles = read_makefiles(config, args, pipeline_variant) except (MakefileError, paleomix.yaml.YAMLError, IOError), error: print_err("Error reading makefiles:", "\n %s:\n " % (error.__class__.__name__,), "\n ".join(str(error).split("\n"))) return 1 logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log") paleomix.logger.initialize(config, logfile_template) logger = logging.getLogger(__name__) pipeline_func = build_pipeline_trimming if pipeline_variant == "bam": # Build .fai files for reference .fasta files index_references(config, makefiles)