Beispiel #1
0
    def untar_index(  # pylint: disable=too-many-locals,too-many-arguments
            self, genome_file_name, genome_idx, bt2_1_file, bt2_2_file,
            bt2_3_file, bt2_4_file, bt2_rev1_file, bt2_rev2_file):
        """
        Extracts the Bowtie2 index files from the genome index tar file.
        Parameters
        ----------
        genome_file_name : str
            Location string of the genome fasta file
        genome_idx : str
            Location of the Bowtie2 index file
        bt2_1_file : str
            Location of the <genome>.1.bt2 index file
        bt2_2_file : str
            Location of the <genome>.2.bt2 index file
        bt2_3_file : str
            Location of the <genome>.3.bt2 index file
        bt2_4_file : str
            Location of the <genome>.4.bt2 index file
        bt2_rev1_file : str
            Location of the <genome>.rev.1.bt2 index file
        bt2_rev2_file : str
            Location of the <genome>.rev.2.bt2 index file
        Returns
        -------
        bool
            Boolean indicating if the task was successful
        """
        if "no-untar" in self.configuration and self.configuration[
                "no-untar"] is True:
            return True

        gfl = genome_file_name.split("/")
        au_handle = alignerUtils()
        au_handle.bowtie2_untar_index(gfl[-1], genome_idx, bt2_1_file,
                                      bt2_2_file, bt2_3_file, bt2_4_file,
                                      bt2_rev1_file, bt2_rev2_file)

        return True
Beispiel #2
0
    def gem_indexer(self, genome_file, index_loc):  # pylint: disable=unused-argument, no-self-use
        """
        GEM Indexer

        Parameters
        ----------
        genome_file : str
            Location of the genome assembly FASTA file
        idx_loc : str
            Location of the output index file
        """
        try:
            au_handle = alignerUtils()
            au_handle.gem_index_genome(genome_file)
        except (IOError, OSError) as msg:
            logger.fatal("I/O error({0}): {1}".format(
                msg.errno, msg.strerror))
            return False

        try:
            command_line = ['pigz ', genome_file + ".gem"]
            logger.info("args for pigz:" + " ".join(command_line))
            process = subprocess.Popen(" ".join(command_line), shell=True)
            process.wait()
        except OSError:
            logger.warn("OSERROR: pigz not installed, using gzip")
            command_line = 'gzip ' + genome_file + ".gem"
            args = shlex.split(command_line)
            process = subprocess.Popen(args)
            process.wait()

        if genome_file + ".gem.gz" != index_loc:
            with open(index_loc, "wb") as f_out:
                with open(genome_file + ".gem.gz") as f_in:
                    f_out.write(f_in.read())

        return True
    def bwa_aligner_single(  # pylint: disable=too-many-arguments, no-self-use
            self, genome_file_loc, read_file_loc, bam_loc, genome_idx,
            mem_params):  # pylint: disable=unused-argument
        """
        BWA MEM Aligner - Single Ended

        Parameters
        ----------
        genome_file_loc : str
            Location of the genomic fasta
        read_file_loc : str
            Location of the FASTQ file
        bam_loc : str
            Location of the output aligned bam file
        genome_idx : idx
            Location of the BWA index file
        aln_params : dict
            Alignment parameters

        Returns
        -------
        bam_loc : str
            Location of the output file
        """
        g_dir = genome_idx.split("/")
        g_dir = "/".join(g_dir[:-1])

        untar_idx = True
        if "no-untar" in self.configuration and self.configuration[
                "no-untar"] is True:
            untar_idx = False

        if untar_idx is True:
            try:
                tar = tarfile.open(genome_idx)
                tar.extractall(path=g_dir)
                tar.close()
            except IOError:
                return False

        gfl = genome_file_loc.split("/")
        genome_fa_ln = genome_idx.replace('.tar.gz', '/') + gfl[-1]
        shutil.copy(genome_file_loc, genome_fa_ln)

        if (os.path.isfile(genome_fa_ln) is False
                or os.path.getsize(genome_fa_ln) == 0):
            return False
        if (os.path.isfile(read_file_loc) is False
                or os.path.getsize(read_file_loc) == 0):
            return False

        out_bam = read_file_loc + '.out.bam'

        au_handle = alignerUtils()
        logger.info("BWA FINISHED: " + str(
            au_handle.bwa_mem_align_reads(genome_fa_ln, out_bam, mem_params,
                                          read_file_loc)))

        try:
            with open(bam_loc, "wb") as f_out:
                with open(out_bam, "rb") as f_in:
                    f_out.write(f_in.read())
        except IOError:
            return False

        os.remove(out_bam)
        #shutil.rmtree(g_dir)

        return True
Beispiel #4
0
    def bowtie2_aligner_paired(  # pylint: disable=too-many-arguments, no-self-use
            self, genome_file_loc, read_file_loc1, read_file_loc2, bam_loc,
            genome_idx, aln_params):  # pylint: disable=unused-argument
        """
        Bowtie2 Aligner - Paired End

        Parameters
        ----------
        genome_file_loc : str
            Location of the genomic fasta
        read_file_loc1 : str
            Location of the FASTQ file
        read_file_loc2 : str
            Location of the FASTQ file
        bam_loc : str
            Location of the output aligned bam file
        genome_idx : idx
            Location of the Bowtie2 index file
        aln_params : dict
            Alignment parameters

        Returns
        -------
        bam_loc : str
            Location of the output file
        """
        g_dir = genome_idx.split("/")
        g_dir = "/".join(g_dir[:-1])

        untar_idx = True
        if "no-untar" in self.configuration and self.configuration[
                "no-untar"] is True:
            untar_idx = False

        if untar_idx is True:
            try:
                tar = tarfile.open(genome_idx)
                tar.extractall(path=g_dir)
                tar.close()
            except IOError:
                return False

        gfl = genome_file_loc.split("/")
        genome_fa_ln = genome_idx.replace('.tar.gz', '/') + gfl[-1]

        if os.path.isfile(genome_fa_ln) is False:
            shutil.copy(genome_file_loc, genome_fa_ln)

        out_bam = read_file_loc1 + '.out.bam'
        au_handle = alignerUtils()
        logger.info("BOWTIE2 FINISHED: " + str(
            au_handle.bowtie2_align_reads(genome_fa_ln, out_bam, aln_params,
                                          read_file_loc1, read_file_loc2)))

        try:
            with open(bam_loc, "wb") as f_out:
                with open(out_bam, "rb") as f_in:
                    f_out.write(f_in.read())
        except IOError:
            return False

        # shutil.rmtree(g_dir)

        return True
    def bowtie2_indexer(self, file_loc, index_loc):  # pylint: disable=unused-argument, no-self-use
        """
        Bowtie2 Indexer

        Parameters
        ----------
        file_loc : str
            Location of the genome assembly FASTA file
        idx_loc : str
            Location of the output index file
        """

        file_name = file_loc.split('/')
        file_name[-1] = file_name[-1].replace('.fasta', '')
        file_name[-1].replace('.fa', '')
        file_name = "/".join(file_name)

        au_handle = alignerUtils()
        au_handle.bowtie_index_genome(file_loc, file_name)

        try:
            # tar.gz the index
            logger.info("BOWTIE2 - index_loc", index_loc,
                        index_loc.replace('.tar.gz', ''))
            idx_out_pregz = index_loc.replace('.tar.gz', '.tar')

            index_dir = index_loc.replace('.tar.gz', '')
            os.mkdir(index_dir)

            idx_split = index_dir.split("/")

            shutil.move(file_name + ".1.bt2", index_dir)
            shutil.move(file_name + ".2.bt2", index_dir)
            shutil.move(file_name + ".3.bt2", index_dir)
            shutil.move(file_name + ".4.bt2", index_dir)
            shutil.move(file_name + ".rev.1.bt2", index_dir)
            shutil.move(file_name + ".rev.2.bt2", index_dir)

            index_folder = idx_split[-1]

            tar = tarfile.open(idx_out_pregz, "w")
            tar.add(index_dir, arcname=index_folder)
            tar.close()

        except IOError as error:
            logger.fatal("I/O error({0}): {1}".format(error.errno,
                                                      error.strerror))
            return False

        try:
            command_line = 'pigz ' + idx_out_pregz
            args = shlex.split(command_line)
            process = subprocess.Popen(args)
            process.wait()
        except OSError:
            logger.warn("OSERROR: pigz not installed, using gzip")
            command_line = 'gzip ' + idx_out_pregz
            args = shlex.split(command_line)
            process = subprocess.Popen(args)
            process.wait()

        return True
Beispiel #6
0
    def bwa_indexer(self, file_loc, idx_out):  # pylint disable=no-self-use
        """
        BWA Indexer

        Parameters
        ----------
        file_loc : str
            Location of the genome assebly FASTA file
        idx_out : str
            Location of the output index file

        Returns
        -------
        bool
        """

        au_handler = alignerUtils()
        amb_loc, ann_loc, bwt_loc, pac_loc, sa_loc = au_handler.bwa_index_genome(
            file_loc)
        try:
            # tar.gz the index
            logger.info("BWA - idx_out", idx_out,
                        idx_out.replace('.tar.gz', ''))
            idx_out_pregz = idx_out.replace('.tar.gz', '.tar')

            index_dir = idx_out.replace('.tar.gz', '')
            os.mkdir(index_dir)

            idx_split = index_dir.split("/")

            shutil.move(amb_loc, index_dir)
            shutil.move(ann_loc, index_dir)
            shutil.move(bwt_loc, index_dir)
            shutil.move(pac_loc, index_dir)
            shutil.move(sa_loc, index_dir)

            index_folder = idx_split[-1]

            tar = tarfile.open(idx_out_pregz, "w")
            tar.add(index_dir, arcname=index_folder)
            tar.close()

        except (IOError, OSError) as msg:
            logger.fatal("I/O error({0}) - BWA INDEXER: {1}".format(
                msg.errno, msg.strerror))
            return False

        try:
            command_line = 'pigz ' + idx_out_pregz
            args = shlex.split(command_line)
            process = subprocess.Popen(args)
            process.wait()
        except OSError:
            logger.warn("OSERROR: pigz not installed, using gzip")
            command_line = 'gzip ' + idx_out_pregz
            args = shlex.split(command_line)
            process = subprocess.Popen(args)
            process.wait()

        shutil.rmtree(index_dir)

        return True