def untar_index( # pylint: disable=too-many-locals,too-many-arguments self, genome_file_name, genome_idx, bt2_1_file, bt2_2_file, bt2_3_file, bt2_4_file, bt2_rev1_file, bt2_rev2_file): """ Extracts the Bowtie2 index files from the genome index tar file. Parameters ---------- genome_file_name : str Location string of the genome fasta file genome_idx : str Location of the Bowtie2 index file bt2_1_file : str Location of the <genome>.1.bt2 index file bt2_2_file : str Location of the <genome>.2.bt2 index file bt2_3_file : str Location of the <genome>.3.bt2 index file bt2_4_file : str Location of the <genome>.4.bt2 index file bt2_rev1_file : str Location of the <genome>.rev.1.bt2 index file bt2_rev2_file : str Location of the <genome>.rev.2.bt2 index file Returns ------- bool Boolean indicating if the task was successful """ if "no-untar" in self.configuration and self.configuration[ "no-untar"] is True: return True gfl = genome_file_name.split("/") au_handle = alignerUtils() au_handle.bowtie2_untar_index(gfl[-1], genome_idx, bt2_1_file, bt2_2_file, bt2_3_file, bt2_4_file, bt2_rev1_file, bt2_rev2_file) return True
def gem_indexer(self, genome_file, index_loc): # pylint: disable=unused-argument, no-self-use """ GEM Indexer Parameters ---------- genome_file : str Location of the genome assembly FASTA file idx_loc : str Location of the output index file """ try: au_handle = alignerUtils() au_handle.gem_index_genome(genome_file) except (IOError, OSError) as msg: logger.fatal("I/O error({0}): {1}".format( msg.errno, msg.strerror)) return False try: command_line = ['pigz ', genome_file + ".gem"] logger.info("args for pigz:" + " ".join(command_line)) process = subprocess.Popen(" ".join(command_line), shell=True) process.wait() except OSError: logger.warn("OSERROR: pigz not installed, using gzip") command_line = 'gzip ' + genome_file + ".gem" args = shlex.split(command_line) process = subprocess.Popen(args) process.wait() if genome_file + ".gem.gz" != index_loc: with open(index_loc, "wb") as f_out: with open(genome_file + ".gem.gz") as f_in: f_out.write(f_in.read()) return True
def bwa_aligner_single( # pylint: disable=too-many-arguments, no-self-use self, genome_file_loc, read_file_loc, bam_loc, genome_idx, mem_params): # pylint: disable=unused-argument """ BWA MEM Aligner - Single Ended Parameters ---------- genome_file_loc : str Location of the genomic fasta read_file_loc : str Location of the FASTQ file bam_loc : str Location of the output aligned bam file genome_idx : idx Location of the BWA index file aln_params : dict Alignment parameters Returns ------- bam_loc : str Location of the output file """ g_dir = genome_idx.split("/") g_dir = "/".join(g_dir[:-1]) untar_idx = True if "no-untar" in self.configuration and self.configuration[ "no-untar"] is True: untar_idx = False if untar_idx is True: try: tar = tarfile.open(genome_idx) tar.extractall(path=g_dir) tar.close() except IOError: return False gfl = genome_file_loc.split("/") genome_fa_ln = genome_idx.replace('.tar.gz', '/') + gfl[-1] shutil.copy(genome_file_loc, genome_fa_ln) if (os.path.isfile(genome_fa_ln) is False or os.path.getsize(genome_fa_ln) == 0): return False if (os.path.isfile(read_file_loc) is False or os.path.getsize(read_file_loc) == 0): return False out_bam = read_file_loc + '.out.bam' au_handle = alignerUtils() logger.info("BWA FINISHED: " + str( au_handle.bwa_mem_align_reads(genome_fa_ln, out_bam, mem_params, read_file_loc))) try: with open(bam_loc, "wb") as f_out: with open(out_bam, "rb") as f_in: f_out.write(f_in.read()) except IOError: return False os.remove(out_bam) #shutil.rmtree(g_dir) return True
def bowtie2_aligner_paired( # pylint: disable=too-many-arguments, no-self-use self, genome_file_loc, read_file_loc1, read_file_loc2, bam_loc, genome_idx, aln_params): # pylint: disable=unused-argument """ Bowtie2 Aligner - Paired End Parameters ---------- genome_file_loc : str Location of the genomic fasta read_file_loc1 : str Location of the FASTQ file read_file_loc2 : str Location of the FASTQ file bam_loc : str Location of the output aligned bam file genome_idx : idx Location of the Bowtie2 index file aln_params : dict Alignment parameters Returns ------- bam_loc : str Location of the output file """ g_dir = genome_idx.split("/") g_dir = "/".join(g_dir[:-1]) untar_idx = True if "no-untar" in self.configuration and self.configuration[ "no-untar"] is True: untar_idx = False if untar_idx is True: try: tar = tarfile.open(genome_idx) tar.extractall(path=g_dir) tar.close() except IOError: return False gfl = genome_file_loc.split("/") genome_fa_ln = genome_idx.replace('.tar.gz', '/') + gfl[-1] if os.path.isfile(genome_fa_ln) is False: shutil.copy(genome_file_loc, genome_fa_ln) out_bam = read_file_loc1 + '.out.bam' au_handle = alignerUtils() logger.info("BOWTIE2 FINISHED: " + str( au_handle.bowtie2_align_reads(genome_fa_ln, out_bam, aln_params, read_file_loc1, read_file_loc2))) try: with open(bam_loc, "wb") as f_out: with open(out_bam, "rb") as f_in: f_out.write(f_in.read()) except IOError: return False # shutil.rmtree(g_dir) return True
def bowtie2_indexer(self, file_loc, index_loc): # pylint: disable=unused-argument, no-self-use """ Bowtie2 Indexer Parameters ---------- file_loc : str Location of the genome assembly FASTA file idx_loc : str Location of the output index file """ file_name = file_loc.split('/') file_name[-1] = file_name[-1].replace('.fasta', '') file_name[-1].replace('.fa', '') file_name = "/".join(file_name) au_handle = alignerUtils() au_handle.bowtie_index_genome(file_loc, file_name) try: # tar.gz the index logger.info("BOWTIE2 - index_loc", index_loc, index_loc.replace('.tar.gz', '')) idx_out_pregz = index_loc.replace('.tar.gz', '.tar') index_dir = index_loc.replace('.tar.gz', '') os.mkdir(index_dir) idx_split = index_dir.split("/") shutil.move(file_name + ".1.bt2", index_dir) shutil.move(file_name + ".2.bt2", index_dir) shutil.move(file_name + ".3.bt2", index_dir) shutil.move(file_name + ".4.bt2", index_dir) shutil.move(file_name + ".rev.1.bt2", index_dir) shutil.move(file_name + ".rev.2.bt2", index_dir) index_folder = idx_split[-1] tar = tarfile.open(idx_out_pregz, "w") tar.add(index_dir, arcname=index_folder) tar.close() except IOError as error: logger.fatal("I/O error({0}): {1}".format(error.errno, error.strerror)) return False try: command_line = 'pigz ' + idx_out_pregz args = shlex.split(command_line) process = subprocess.Popen(args) process.wait() except OSError: logger.warn("OSERROR: pigz not installed, using gzip") command_line = 'gzip ' + idx_out_pregz args = shlex.split(command_line) process = subprocess.Popen(args) process.wait() return True
def bwa_indexer(self, file_loc, idx_out): # pylint disable=no-self-use """ BWA Indexer Parameters ---------- file_loc : str Location of the genome assebly FASTA file idx_out : str Location of the output index file Returns ------- bool """ au_handler = alignerUtils() amb_loc, ann_loc, bwt_loc, pac_loc, sa_loc = au_handler.bwa_index_genome( file_loc) try: # tar.gz the index logger.info("BWA - idx_out", idx_out, idx_out.replace('.tar.gz', '')) idx_out_pregz = idx_out.replace('.tar.gz', '.tar') index_dir = idx_out.replace('.tar.gz', '') os.mkdir(index_dir) idx_split = index_dir.split("/") shutil.move(amb_loc, index_dir) shutil.move(ann_loc, index_dir) shutil.move(bwt_loc, index_dir) shutil.move(pac_loc, index_dir) shutil.move(sa_loc, index_dir) index_folder = idx_split[-1] tar = tarfile.open(idx_out_pregz, "w") tar.add(index_dir, arcname=index_folder) tar.close() except (IOError, OSError) as msg: logger.fatal("I/O error({0}) - BWA INDEXER: {1}".format( msg.errno, msg.strerror)) return False try: command_line = 'pigz ' + idx_out_pregz args = shlex.split(command_line) process = subprocess.Popen(args) process.wait() except OSError: logger.warn("OSERROR: pigz not installed, using gzip") command_line = 'gzip ' + idx_out_pregz args = shlex.split(command_line) process = subprocess.Popen(args) process.wait() shutil.rmtree(index_dir) return True