Ejemplo n.º 1
0
    def create_ref_id_dict(self, ref_genomes):
        mydict = {}
        for ref in ref_genomes:
            mydict[misc.get_refid_from_path(ref)] = ref

        if len(mydict) != len(ref_genomes):
            raise TypeError("The given reference genomes must have unique base names in their paths.")

        return mydict
Ejemplo n.º 2
0
def align_to_references(state):
    logger = state.logger

    ref_sams = {}

    for refpath in state.paths.reference_genomes:
        suffix = basename(refpath).split('.')[0]

        logger.info("Building the genome fasta file for the file %s..." %
                    basename(refpath))
        bowtie2.build('2.2.9', refpath, logger)

        logger.info("Aligning the forward reads in %s to the genome %s..." %
                    (basename(state.paths.fastq_files[0]), basename(refpath)))
        outsam1 = bowtie2.align_to_genome(
            '2.2.9', refpath, state.paths.fastq_files[0],
            os.path.join(state.paths.full_sams_dir,
                         'fastq1_to_genome_%s.sam' % suffix),
            state.settings.threads)
        logger.info("Output saved to %s" % basename(outsam1))
        logger.debug("Saving reference header for %s under %s" %
                     (basename(refpath), misc.get_refid_from_path(refpath)))
        state.settings.reference_headers_dict[misc.get_refid_from_path(
            refpath)].append(misc.get_sam_header(outsam1))

        logger.info("Aligning the reverse reads in %s to the genome %s..." %
                    (basename(state.paths.fastq_files[1]), basename(refpath)))
        outsam2 = bowtie2.align_to_genome(
            '2.2.9', refpath, state.paths.fastq_files[1],
            os.path.join(state.paths.full_sams_dir,
                         'fastq2_to_genome_%s.sam' % suffix),
            state.settings.threads)
        logger.info("Output saved to %s" % basename(outsam2))
        logger.debug("Saving reference header for %s under %s" %
                     (basename(refpath), misc.get_refid_from_path(refpath)))
        state.settings.reference_headers_dict[misc.get_refid_from_path(
            refpath)].append(misc.get_sam_header(outsam2))

        ref_sams[refpath] = (outsam1, outsam2)
        state.paths.fastq_to_genome_algnmnts = ref_sams

    return ref_sams
Ejemplo n.º 3
0
def genome(path):
    global ref_basenames
    try:
        if not os.path.isfile(path):
            raise TypeError("Genome fasta is not a file.")
        if misc.get_refid_from_path(path) in ref_basenames:
            raise TypeError("Duplicate Basename")
        ref_basenames.add(os.path.basename(path).split('.')[0])
        return os.path.abspath(path)
    except:
        raise argparse.ArgumentTypeError(
            "Genomes must be entered as \"<reference-fasta1> <reference-fasta2>\" and "
            "they must have unique basenames.")