예제 #1
0
    def _run(self, _config, temp):
        fasta_files = []
        for (name, filename) in sorted(self._infiles.items()):
            fasta_files.append((name, pysam.FastaFile(filename)))

        for sequence_name in sorted(self._sequences):
            filename = os.path.join(temp, sequence_name + ".fasta")
            with open(filename, "w") as out_handle:
                for (sample, fasta_file) in fasta_files:
                    sequence = fasta_file.fetch(sequence_name)
                    fasta = FASTA(sample, sequence_name, sequence)
                    fasta.write(out_handle)
예제 #2
0
def setup_mito_mapping(config):
    genomes_root = os.path.join(config.destination, "genomes")
    if not os.path.exists(genomes_root):
        fileutils.make_dirs(genomes_root)

    mkfile_fpath = os.path.join(config.destination, "makefile.yaml")

    filenames = [mkfile_fpath]
    for name, record in sorted(config.database.mitochondria.items()):
        filenames.append(
            os.path.join(genomes_root, "%s.fasta" % (record.name, )))

    existing_filenames = [
        filename for filename in filenames if os.path.exists(filename)
    ]

    # A bit strict, but avoid accidential overwrites
    if existing_filenames:
        log = logging.getLogger(__name__)
        log.error("Output file(s) already exists, cannot proceed:")
        for filename in sorted(existing_filenames):
            log.error(" - %r", filename)

        return 1

    with open(mkfile_fpath, "w") as mkfile:
        mkfile.write(
            bam_mkfile.build_makefile(add_prefix_tmpl=False,
                                      add_sample_tmpl=False))

        mkfile.write("\n\nPrefixes:\n")

        for name, record in sorted(config.database.mitochondria.items()):
            if "EXCLUDE" in record.meta.upper():
                continue

            mkfile.write("  %s:\n" % (record.name, ))
            mkfile.write("    Path: genomes/%s.fasta\n" % (record.name, ))

            info = config.database.samples.get(record.name)
            if info is not None:
                mkfile.write("    # Species: %s\n" %
                             (info.get("Species", "NA"), ))
                mkfile.write("    # Sex: %s\n" % (info.get("Sex", "NA"), ))
                mkfile.write("    # Publication: %s\n" %
                             (info.get("Publication", "NA"), ))
                mkfile.write("    # Sample ID: %s\n" %
                             (info.get("SampleID", "NA"), ))

            mkfile.write("\n")

            fasta_fpath = os.path.join(genomes_root,
                                       "%s.fasta" % (record.name, ))

            with open(fasta_fpath, "w") as fasta_handle:
                record = FASTA(
                    name=record.name,
                    meta=None,
                    sequence=record.sequence.replace("-", ""),
                )

                record.write(fasta_handle)

        mkfile.write("\n")

    return 0