def _run(self, _config, _temp): for filename in self.input_files: check_fasta_file(filename) output_file, = self.output_files make_dirs(os.path.dirname(output_file)) with open(output_file, "w"): pass
def setup_example(config): root = os.path.join(config.destination, 'zonkey_pipeline') with tarfile.TarFile(config.tablefile) as tar_handle: example_files = [] existing_files = [] for member in tar_handle.getmembers(): if os.path.dirname(member.name) == 'examples' and member.isfile(): example_files.append(member) destination = fileutils.reroot_path(root, member.name) if os.path.exists(destination): existing_files.append(destination) if existing_files: print_err("Output files already exist at destination:\n - %s" % ("\n - ".join(map(repr, existing_files)))) return 1 elif not example_files: print_err("Sample database %r does not contain example data; " "cannot proceed." % (config.tablefile,)) return 1 if not os.path.exists(root): fileutils.make_dirs(root) for member in example_files: destination = fileutils.reroot_path(root, member.name) src_handle = tar_handle.extractfile(member) with open(destination, 'w') as out_handle: shutil.copyfileobj(src_handle, out_handle) print_info("Sucessfully saved example data in %r" % (root,)) return 0
def run(self, _): handles = [] try: last_pos = None observed_reads = collections.defaultdict(list) for (record, filename) in self._open_samfiles(handles, self.input_files): curr_pos = (record.pos, record.tid) if curr_pos != last_pos: self._process_reads(observed_reads, self.output_files) observed_reads.clear() last_pos = curr_pos # Stop once the trailing, unmapped reads are reached if record.tid == -1: break observed_reads[record.qname].append((record, filename)) self._process_reads(observed_reads, self.output_files) # Everything is ok, touch the output files for fpath in self.output_files: make_dirs(os.path.dirname(fpath)) with open(fpath, "w"): pass finally: for handle in handles: handle.close()
def run_admix_pipeline(config): print_info("\nBuilding %i Zonkey pipeline(s):" % (len(config.samples),)) config.temp_root = os.path.join(config.destination, "temp") if not config.dry_run: fileutils.make_dirs(config.temp_root) cache = {} nodes = [] items = config.samples.iteritems() for idx, (name, sample) in enumerate(sorted(items), start=1): root = sample["Root"] nuc_bam = sample["Files"].get("Nuc") mito_bam = sample["Files"].get("Mito") genomes = [] if mito_bam: genomes.append("MT") if nuc_bam: genomes.append("Nuclear") print_info(" %i. %s: %s DNA" % (idx, name, ' and '.join(genomes))) nodes.extend(build_pipeline(config, root, nuc_bam, mito_bam, cache)) if config.multisample and not config.admixture_only: nodes = [summary.SummaryNode(config, nodes)] if not run_pipeline(config, nodes, "\nRunning Zonkey:"): return 1
def _create_temp_dir(self, _config): """Called by 'run' in order to create a temporary folder. To allow restarting from checkpoints, we use a fixed folder determined by the output_template.""" temp = os.path.join(self._dirname, self._template % ("temp",)) fileutils.make_dirs(temp) return temp
def _create_temp_dir(self, _config): """Called by 'run' in order to create a temporary folder. To allow restarting from checkpoints, we use a fixed folder determined by the output_template.""" temp = os.path.join(self._dirname, self._template % ("temp", )) fileutils.make_dirs(temp) return temp
def run_admix_pipeline(config): log = logging.getLogger(__name__) log.info("Building %i Zonkey pipeline(s):", len(config.samples)) config.temp_root = os.path.join(config.destination, "temp") if not config.dry_run: fileutils.make_dirs(config.temp_root) cache = {} nodes = [] items = iter(config.samples.items()) for idx, (name, sample) in enumerate(sorted(items), start=1): root = sample["Root"] nuc_bam = sample["Files"].get("Nuc") mito_bam = sample["Files"].get("Mito") genomes = [] if mito_bam: genomes.append("MT") if nuc_bam: genomes.append("Nuclear") log.info(" %i. %s: %s DNA", idx, name, " and ".join(genomes)) nodes.extend(build_pipeline(config, root, nuc_bam, mito_bam, cache)) if config.multisample and not config.admixture_only: nodes = [summary.SummaryNode(config, nodes)] if not run_pipeline(config, nodes, "Running Zonkey"): return 1
def process_bam(args, data, bam_handle): raw_references = bam_handle.references references = map(common.contig_name_to_plink_name, raw_references) if args.downsample: sys.stderr.write("Downsampling to at most %i BAM records ...\n" % (args.downsample)) bam_handle = DownsampledBAM(bam_handle, args.downsample, references) statistics = {"n_reads": 0, "n_reads_used": 0, "n_sites_incl_ts": 0, "n_sites_excl_ts": 0} fileutils.make_dirs(args.root) with open(os.path.join(args.root, 'incl_ts.tped'), 'w') as output_incl: with open(os.path.join(args.root, 'excl_ts.tped'), 'w') as output_excl: with GenotypeReader(args.data) as reader: for ref, sites in reader: raw_ref = raw_references[references.index(ref)] sys.stderr.write("Reading %r from BAM ...\n" % (raw_ref,)) raw_sites = bam_handle.fetch(raw_ref) for pos, line, nucleotides in sites.process(raw_sites, statistics): process_record(ref, pos, line, nucleotides, out_incl_ts=output_incl, out_excl_ts=output_excl, statistics=statistics) write_summary(args, os.path.join(args.root, "common.summary"), statistics=statistics) write_tfam(os.path.join(args.root, "common.tfam"), data, reader.samples, args.name)
def _run(self, _config, _temp): for filename in self.input_files: check_fasta_file(filename) (output_file, ) = self.output_files if os.path.dirname(output_file): make_dirs(os.path.dirname(output_file)) with open(output_file, "w"): pass
def _teardown(self, config, temp): fileutils.make_dirs(self._root) fileutils.move_file(os.path.join(temp, "report.html"), os.path.join(self._root, "report.html")) css_path = paleomix.resources.report("zonkey", "report.css") fileutils.copy_file(css_path, os.path.join(self._root, "report.css"))
def _teardown(self, config, temp): fileutils.make_dirs(self._root) fileutils.move_file(os.path.join(temp, "summary.html"), os.path.join(self._root, "summary.html")) css_path = paleomix.resources.report("zonkey", "report.css") fileutils.copy_file(css_path, os.path.join(self._root, "summary.css"))
def _run(self, _config, _temp): stats = check_fastq_files(self._files, self._offset, True) output_file = tuple(self.output_files)[0] if os.path.dirname(output_file): make_dirs(os.path.dirname(output_file)) data = json.dumps(stats) with open(output_file, "w") as handle: handle.write(data)
def test_move_file__move_to_existing_folder(temp_folder): assert make_dirs(os.path.join(temp_folder, "src")) assert make_dirs(os.path.join(temp_folder, "dst")) file_1 = os.path.join(temp_folder, "src", "file_1") file_2 = os.path.join(temp_folder, "dst", "file_2") set_file_contents(file_1, "2") move_file(file_1, file_2) assert_equal(os.listdir(os.path.dirname(file_1)), []) assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"]) assert_equal(get_file_contents(file_2), "2")
def run(self, _): check_bam_files(self.input_files, self._throw_node_error) # Everything is ok, touch the output files for fpath in self.output_files: if os.path.dirname(fpath): make_dirs(os.path.dirname(fpath)) with open(fpath, "w"): pass
def process_bam(args, data, bam_handle, mapping): reverse_mapping = dict(zip(mapping.values(), mapping)) raw_references = bam_handle.references references = [reverse_mapping.get(name, name) for name in raw_references] if args.downsample: sys.stderr.write("Downsampling to at most %i BAM records\n" % (args.downsample)) bam_handle = DownsampledBAM(bam_handle, args.downsample, references) statistics = { "n_reads": 0, "n_reads_used": 0, "n_sites_incl_ts": 0, "n_sites_excl_ts": 0, } fileutils.make_dirs(args.root) with open(os.path.join(args.root, "incl_ts.tped"), "w") as output_incl: with open(os.path.join(args.root, "excl_ts.tped"), "w") as output_excl: with GenotypeReader(args.database) as reader: for ref, sites in reader: records = set() raw_ref = raw_references[references.index(ref)] sys.stderr.write("Reading %r from BAM\n" % (raw_ref, )) raw_sites = bam_handle.fetch(raw_ref) for pos, line, nucleotides in sites.process( raw_sites, statistics): process_record( ref, pos, line, nucleotides, out_incl_ts=output_incl, out_excl_ts=output_excl, statistics=statistics, records=records, ) write_summary( args, os.path.join(args.root, "common.summary"), statistics=statistics, ) write_tfam( os.path.join(args.root, "common.tfam"), data, reader.samples, args.name, )
def _open_logfile(folder, template, start=0): """Try to open a new logfile, taking steps to ensure that existing logfiles using the same template are not clobbered.""" if not os.path.exists(folder): _fs.make_dirs(folder) flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL while True: filename = os.path.join(folder, template % (start, )) try: if not os.path.exists(filename): return filename, os.fdopen(os.open(filename, flags), "w") except OSError, error: if error.errno != errno.EEXIST: raise start += 1
def main(argv): args = parse_args(argv) args.revision = datetime.datetime.today().strftime('%Y%m%d') data = _collect_samples(args.reference, args.samples) if not data: return 1 fileutils.make_dirs(args.root) _write_contigs(args, os.path.join(args.root, 'contigs.txt')) _write_samples(args, data['samples'], os.path.join(args.root, 'samples.txt')) _write_settings(args, data['contigs'], os.path.join(args.root, 'settings.yaml')) _write_genotypes(args, data, os.path.join(args.root, 'genotypes.txt')) _write_build_sh(args, os.path.join(args.root, 'build.sh'))
def main(argv): args = parse_args(argv) args.revision = datetime.datetime.today().strftime("%Y%m%d") data = _collect_samples(args.reference, args.samples) if not data: return 1 fileutils.make_dirs(args.root) _write_contigs(args, os.path.join(args.root, "contigs.txt")) _write_samples(args, data["samples"], os.path.join(args.root, "samples.txt")) _write_settings(args, data["contigs"], os.path.join(args.root, "settings.yaml")) _write_genotypes(args, data, os.path.join(args.root, "genotypes.txt")) _write_build_sh(args, os.path.join(args.root, "build.sh"))
def test_copy_file__copy_to_new_folder(temp_folder): assert make_dirs(os.path.join(temp_folder, "src")) file_1 = os.path.join(temp_folder, "src", "file_1") file_2 = os.path.join(temp_folder, "dst", "file_2") set_file_contents(file_1, "2") copy_file(file_1, file_2) assert_equal(os.listdir(os.path.dirname(file_1)), ["file_1"]) assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"]) assert_equal(get_file_contents(file_1), "2") assert_equal(get_file_contents(file_2), "2")
def process_bam(args, data, bam_handle): raw_references = bam_handle.references references = map(common.contig_name_to_plink_name, raw_references) if args.downsample: sys.stderr.write("Downsampling to at most %i BAM records ...\n" % (args.downsample)) bam_handle = DownsampledBAM(bam_handle, args.downsample, references) statistics = { "n_reads": 0, "n_reads_used": 0, "n_sites_incl_ts": 0, "n_sites_excl_ts": 0 } fileutils.make_dirs(args.root) with open(os.path.join(args.root, 'incl_ts.tped'), 'w') as output_incl: with open(os.path.join(args.root, 'excl_ts.tped'), 'w') as output_excl: with GenotypeReader(args.database) as reader: for ref, sites in reader: records = set() raw_ref = raw_references[references.index(ref)] sys.stderr.write("Reading %r from BAM ...\n" % (raw_ref, )) raw_sites = bam_handle.fetch(raw_ref) for pos, line, nucleotides in sites.process( raw_sites, statistics): process_record(ref, pos, line, nucleotides, out_incl_ts=output_incl, out_excl_ts=output_excl, statistics=statistics, records=records) write_summary(args, os.path.join(args.root, "common.summary"), statistics=statistics) write_tfam(os.path.join(args.root, "common.tfam"), data, reader.samples, args.name)
def run_admix_pipeline(config): config.temp_root = os.path.join(config.destination, "temp") if not config.dry_run: fileutils.make_dirs(config.temp_root) cache = {} nodes = [] for sample in config.samples.itervalues(): root = sample["Root"] nuc_bam = sample["Files"].get("Nuc") mito_bam = sample["Files"].get("Mito") nodes.extend(build_pipeline(config, root, nuc_bam, mito_bam, cache)) if config.multisample and not config.admixture_only: nodes = [summary.SummaryNode(config, nodes)] if not run_pipeline(config, nodes, "\nRunning Zonkey ..."): return 1
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename,)) sys.exit(0)
def _write_config_file(self, config, defaults): """Writes a basic config files, using the values previously found in the config files, and specified on the command-line.""" defaults_cfg = ConfigParser.SafeConfigParser() defaults_cfg.add_section("Defaults") for key in defaults: value = getattr(config, key) if isinstance(value, (types.ListType, types.TupleType)): value = ";".join(value) defaults_cfg.set("Defaults", key, str(value)) filename = self._filenames[-1] make_dirs(os.path.dirname(filename)) with open(filename, "w") as handle: defaults_cfg.write(handle) print_info("Wrote config file %r" % (filename, )) sys.exit(0)
def with_temp_folder(func): """Decorator for unit-tests: Creates a unique temporary folder before running 'func'. The function is is assumed to take at least one parameter, the first of which is assumed to represent the temporary folder.""" temp_root = os.path.join(tempfile.gettempdir(), os.getlogin()) make_dirs(temp_root) # Ensure that this subdirectory exists @nose.tools.istest def _wrapper(*args, **kwargs): try: temp_folder = None temp_folder = tempfile.mkdtemp(dir = temp_root, prefix = "paleomix_unit") func(temp_folder, *args, **kwargs) finally: if temp_folder: shutil.rmtree(temp_folder) _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder" return _wrapper
def setup_example(config): root = os.path.join(config.destination, "zonkey_pipeline") log = logging.getLogger(__name__) log.info("Copying example project to %r", root) with tarfile.TarFile(config.database.filename) as tar_handle: example_files = [] existing_files = [] for member in tar_handle.getmembers(): if os.path.dirname(member.name) == "examples" and member.isfile(): example_files.append(member) destination = fileutils.reroot_path(root, member.name) if os.path.exists(destination): existing_files.append(destination) if existing_files: log.error("Output files already exist at destination:") for filename in sorted(existing_files): log.error(" - %r", filename) return 1 elif not example_files: log.error( "Sample database %r does not contain example data; cannot proceed.", config.database.filename, ) return 1 if not os.path.exists(root): fileutils.make_dirs(root) for member in example_files: destination = fileutils.reroot_path(root, member.name) src_handle = tar_handle.extractfile(member) with open(destination, "wb") as out_handle: shutil.copyfileobj(src_handle, out_handle) log.info("Sucessfully saved example data in %r", root) return 0
def with_temp_folder(func): """Decorator for unit-tests: Creates a unique temporary folder before running 'func'. The function is is assumed to take at least one parameter, the first of which is assumed to represent the temporary folder.""" name = pwd.getpwuid(os.geteuid()).pw_name temp_root = os.path.join(tempfile.gettempdir(), name) make_dirs(temp_root) @nose.tools.istest def _wrapper(*args, **kwargs): try: temp_folder = None temp_folder = tempfile.mkdtemp(dir=temp_root, prefix="paleomix_unit") func(temp_folder, *args, **kwargs) finally: if temp_folder: shutil.rmtree(temp_folder) _wrapper.__name__ = func.__name__ + "__wrapped_by_with_temp_folder" return _wrapper
def test_move_dirs__permission_denied(temp_folder): dst_folder = os.path.join(temp_folder, "dst") file_1 = os.path.join(temp_folder, "file") file_2 = os.path.join(dst_folder, "file") set_file_contents(file_1, "1") # Make destination folder read-only assert make_dirs(os.path.join(temp_folder, "dst")) mode = os.stat(dst_folder).st_mode ro_mode = mode & ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) os.chmod(dst_folder, ro_mode) # Non ENOENT errors should be re-raised: assert_raises(IOError, move_file, file_1, file_2)
def convert_reads(config, destination, record, sink_cache): # Source name is used, to re-merge split lanes name = record.tags.get("PU_src") destination = os.path.join(destination, name) make_dirs(os.path.join(config.destination, destination)) def _open_se_sink(reads_type): key = (name, reads_type) if not get_in(sink_cache, key): filename = ReadSink.get_filename(destination, reads_type.lower()) set_in(sink_cache, key, ReadSink.open(config.destination, filename)) return key for (reads_type, bam_files) in record.bams.iteritems(): # Processed reads are pre-aligned BAMs which have been cleaned up if reads_type in ("Paired", "Processed"): # Record "Single" reads; these may result from orphan SE reads _open_se_sink("Singleton") key = (name, "Paired") if not get_in(sink_cache, key): set_in(sink_cache, key, PEReadSink.open(config.destination, destination)) else: key = _open_se_sink(reads_type) sink = get_in(sink_cache, key) for filename in bam_files: print("%sProcessing file %r" % (_INDENTATION * 4, filename)) with pysam.Samfile(filename) as handle: def _keep_record(record): return (record.qual >= config.min_quality) and \ (len(record.seq) >= config.min_length) sink.write_records(record for record in handle if _keep_record(record))
def test_make_dirs__creation_preemted(temp_folder): unwrapped, preempted = os.makedirs, [] def _wrap_os_makedirs(*args, **kwargs): # Simulate somebody else creating the directory first preempted.append(True) unwrapped(*args, **kwargs) unwrapped(*args, **kwargs) with Monkeypatch("os.makedirs", _wrap_os_makedirs): work_folder = os.path.join(temp_folder, "test") assert not make_dirs(work_folder) assert os.path.exists(work_folder) assert_equal(os.listdir(temp_folder), ["test"]) assert_equal(preempted, [True])
def test_make_dirs__sub_directories(temp_folder): assert not os.listdir(temp_folder) assert make_dirs(os.path.join(temp_folder, "test", "123")) assert_equal(os.listdir(temp_folder), ["test"]) assert_equal(os.listdir(os.path.join(temp_folder, "test")), ["123"])
def test_make_dirs__permissions(temp_folder): work_dir = os.path.join(temp_folder, "test_1") assert make_dirs(work_dir, mode=0511) stats = os.stat(work_dir) assert_equal(oct(stats.st_mode & 0777), oct(0511))
def test_make_dirs__empty_directory(): make_dirs("")
def test_make_dirs__create_dir(temp_folder): assert not os.listdir(temp_folder) assert make_dirs(os.path.join(temp_folder, "test123")) assert_equal(os.listdir(temp_folder), ["test123"])
def _run(self, _config, _temp): check_fastq_files(self.input_files, self._offset, True) output_file = tuple(self.output_files)[0] make_dirs(os.path.dirname(output_file)) with open(output_file, "w"): pass
def main(argv): config, args = parse_options(argv) if config is None: return 1 # Get default options for bam_pipeline bam_config, _ = bam_cfg.parse_config(args, "bam") makefiles = bam_pipeline.read_makefiles(bam_config, args) # Build .fai files for reference .fasta files bam_pipeline.index_references(bam_config, makefiles) for makefile in makefiles: mkfile_fname = makefile["Statistics"]["Filename"] bam_config.destination = os.path.dirname(mkfile_fname) tasks = bam_pipeline.build_pipeline_full(bam_config, makefile, return_nodes=False) make_dirs(config.destination) makefile_name = add_postfix(makefile["Statistics"]["Filename"], config.postfix) makefile_path = reroot_path(config.destination, makefile_name) if samefile(makefile["Statistics"]["Filename"], makefile_path): sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" % (makefile_path,)) sys.stderr.write(" Please set --destination and/or --output-name-postfix\n") sys.stderr.write(" before continuing.\n") return 1 print("Writing makefile", makefile_path) found_prefix = False for prefix in makefile["Prefixes"]: if prefix != config.prefix: print("%sSkipping %s" % (_INDENTATION, prefix)) else: found_prefix = True if not found_prefix: sys.stderr.write("\nERROR:\n") sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n" % (config.prefix, mkfile_fname)) return 1 with open(makefile_path, "w") as makefile_handle: template = bam_mkfile.build_makefile(add_sample_tmpl=False) makefile_handle.write(template) makefile_handle.write("\n" * 3) for target in tasks: target_name = add_postfix(target.name, config.postfix) print("%sTarget: %s -> %s" % (_INDENTATION, target.name, target_name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 0, target_name)) for prefix in target.prefixes: if prefix.name != config.prefix: continue for sample in prefix.samples: print("%sSample: %s" % (_INDENTATION * 2, sample.name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 1, sample.name)) for library in sample.libraries: print("%sLibrary: %s" % (_INDENTATION * 3, library.name)) makefile_handle.write('%s"%s":\n' % (_INDENTATION * 2, library.name)) sink_cache = {} destination = os.path.join(target_name, "reads", sample.name, library.name) for lane in library.lanes: convert_reads(config, destination, lane, sink_cache) ReadSink.close_all_sinks() for lane_name in sorted(sink_cache): makefile_handle.write('%s"%s":\n' % (_INDENTATION * 3, lane_name)) for (reads_type, sink) in sorted(sink_cache[lane_name].items()): makefile_handle.write('%s%s "%s"\n' % (_INDENTATION * 4, ("%s:" % (reads_type,)).ljust(20), sink.filename)) makefile_handle.write("\n") print("\tDone ...") print() return 0
def setup_mito_mapping(config): genomes_root = os.path.join(config.destination, "genomes") if not os.path.exists(genomes_root): fileutils.make_dirs(genomes_root) mkfile_fpath = os.path.join(config.destination, "makefile.yaml") filenames = [mkfile_fpath] for name, record in sorted(config.database.mitochondria.iteritems()): filenames.append(os.path.join(genomes_root, "%s.fasta" % (record.name,))) existing_filenames = [filename for filename in filenames if os.path.exists(filename)] # A bit strict, but avoid accidential overwrites if existing_filenames: print_err("ERROR: Output file(s) already exists, " "cannot proceed:\n %s" % ("\n ".join(map(repr, existing_filenames),))) return 1 with open(mkfile_fpath, "w") as mkfile: mkfile.write(bam_mkfile.build_makefile(add_prefix_tmpl=False, add_sample_tmpl=False)) mkfile.write("\n\nPrefixes:\n") for name, record in sorted(config.database.mitochondria.iteritems()): meta = (record.meta or "").upper() if "EXCLUDE" in meta: continue mkfile.write(" %s:\n" % (record.name,)) mkfile.write(" Path: genomes/%s.fasta\n" % (record.name,)) info = config.database.samples.get(record.name) if info is not None: mkfile.write(" # Group: %s\n" % (info.get('Group(3)', 'NA'),)) mkfile.write(" # Species: %s\n" % (info.get('Species', 'NA'),)) mkfile.write(" # Sex: %s\n" % (info.get('Sex', 'NA'),)) mkfile.write(" # Publication: %s\n" % (info.get('Publication', 'NA'),)) mkfile.write(" # Sample ID: %s\n" % (info.get('SampleID', 'NA'),)) mkfile.write('\n') fasta_fpath = os.path.join(genomes_root, "%s.fasta" % (record.name,)) with open(fasta_fpath, "w") as fasta_handle: record = FASTA( name=record.name, meta=None, sequence=record.sequence.replace('-', '')) fasta_handle.write(str(record)) fasta_handle.write("\n") mkfile.write("\n") return 0
def test_make_dirs__subdirs_return_values(temp_folder): assert make_dirs(os.path.join(temp_folder, "test")) assert make_dirs(os.path.join(temp_folder, "test", "234")) assert not make_dirs(os.path.join(temp_folder, "test", "234"))
def setup_mito_mapping(config): genomes_root = os.path.join(config.destination, "genomes") if not os.path.exists(genomes_root): fileutils.make_dirs(genomes_root) mkfile_fpath = os.path.join(config.destination, "makefile.yaml") filenames = [mkfile_fpath] for name, record in sorted(config.database.mitochondria.items()): filenames.append( os.path.join(genomes_root, "%s.fasta" % (record.name, ))) existing_filenames = [ filename for filename in filenames if os.path.exists(filename) ] # A bit strict, but avoid accidential overwrites if existing_filenames: log = logging.getLogger(__name__) log.error("Output file(s) already exists, cannot proceed:") for filename in sorted(existing_filenames): log.error(" - %r", filename) return 1 with open(mkfile_fpath, "w") as mkfile: mkfile.write( bam_mkfile.build_makefile(add_prefix_tmpl=False, add_sample_tmpl=False)) mkfile.write("\n\nPrefixes:\n") for name, record in sorted(config.database.mitochondria.items()): if "EXCLUDE" in record.meta.upper(): continue mkfile.write(" %s:\n" % (record.name, )) mkfile.write(" Path: genomes/%s.fasta\n" % (record.name, )) info = config.database.samples.get(record.name) if info is not None: mkfile.write(" # Species: %s\n" % (info.get("Species", "NA"), )) mkfile.write(" # Sex: %s\n" % (info.get("Sex", "NA"), )) mkfile.write(" # Publication: %s\n" % (info.get("Publication", "NA"), )) mkfile.write(" # Sample ID: %s\n" % (info.get("SampleID", "NA"), )) mkfile.write("\n") fasta_fpath = os.path.join(genomes_root, "%s.fasta" % (record.name, )) with open(fasta_fpath, "w") as fasta_handle: record = FASTA( name=record.name, meta=None, sequence=record.sequence.replace("-", ""), ) record.write(fasta_handle) mkfile.write("\n") return 0
def setup_mito_mapping(config): genomes_root = os.path.join(config.destination, "genomes") if not os.path.exists(genomes_root): fileutils.make_dirs(genomes_root) mkfile_fpath = os.path.join(config.destination, "makefile.yaml") filenames = [mkfile_fpath] for name, record in sorted(config.database.mitochondria.iteritems()): filenames.append(os.path.join(genomes_root, "%s.fasta" % (record.name,))) existing_filenames = [filename for filename in filenames if os.path.exists(filename)] # A bit strict, but avoid accidential overwrites if existing_filenames: print_err("ERROR: Output file(s) already exists, " "cannot proceed:\n %s" % ("\n ".join(map(repr, existing_filenames),))) return 1 with open(mkfile_fpath, "w") as mkfile: mkfile.write(bam_mkfile.build_makefile(add_prefix_tmpl=False, add_sample_tmpl=False)) mkfile.write("\n\nPrefixes:\n") for name, record in sorted(config.database.mitochondria.iteritems()): meta = (record.meta or "").upper() if "EXCLUDE" in meta: continue mkfile.write(" %s:\n" % (record.name,)) mkfile.write(" Path: genomes/%s.fasta\n" % (record.name,)) info = config.database.samples.get(record.name) if info is not None: mkfile.write(" # Group: %s\n" % (info.get('Group(3)', 'NA'),)) mkfile.write(" # Species: %s\n" % (info.get('Species', 'NA'),)) mkfile.write(" # Sex: %s\n" % (info.get('Sex', 'NA'),)) mkfile.write(" # Publication: %s\n" % (info.get('Publication', 'NA'),)) mkfile.write(" # Sample ID: %s\n" % (info.get('SampleID', 'NA'),)) mkfile.write('\n') fasta_fpath = os.path.join(genomes_root, "%s.fasta" % (record.name,)) with open(fasta_fpath, "w") as fasta_handle: fasta_handle.write(str(record)) fasta_handle.write("\n") mkfile.write("\n") return 0