def __gen_db_path(self, sball, bucket_path): dir_name = os.path.basename(sball.export_fasta).split(".")[0] dir_name += "_db/" db_name = os.path.basename(sball.export_fasta).split(".")[0] db_name += "_genome.fasta" dir_path = bucket_path + dir_name plumber.force_dir(dir_path) return dir_path + db_name
def __split_queries(self): query_recs = [] for rec in SeqIO.parse(self.query_fpath, "fasta"): query_recs.append(rec) query_list = [] query_path = self.base_blast_path + args.query_dir plumber.force_dir(query_path) for rec in query_recs: write_out = [rec] write_path = query_path + self.__format_id(rec.id) + ".fasta" SeqIO.write(write_out, write_path, "fasta") query_list.append(write_path) return query_list
def bucket_blast(self, archive_path): blast_path = archive_path + self.__gen_archive_subdir() plumber.force_dir(blast_path) for db_path in self.bucket: out_path = blast_path + self.__gen_out_name(db_path) blast_cmd = [ "blastn", "-db", db_path, "-query", self.query, "-out", out_path, "-evalue", "1e-20", "-outfmt", "11" ] try: blast_ret = subprocess.run(blast_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, universal_newlines=True) except subprocess.CalledProcessError as perror: raise (RuntimeError("ERROR: BLAST failed!")) self.archives.append(out_path)
def __build_bucket(self): bucket_path = self.base_blast_path + args.bucket_dir plumber.force_dir(bucket_path) for sball in self.sample_pit: print("\t\tMaking database for sample", sball.sample_id) out_path = self.__gen_db_path(sball, bucket_path) blast_cmd = [ "makeblastdb", "-in", sball.export_fasta, "-dbtype", "nucl", "-out", out_path ] try: blast_ret = subprocess.run(blast_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, check=True, universal_newlines=True) except subprocess.CalledProcessError as perror: raise (RuntimeError("ERROR: makeblastdb failed!")) self.db_bucket.append(out_path)
def convert_hr_reports(self, reports_path): convert_path = reports_path + self.__gen_report_subdir() plumber.force_dir(convert_path) for arch_path in self.archives: out_path = convert_path + self.__gen_report_name(arch_path) bconvert_cmd = [ "blast_formatter", "-archive", arch_path, "-outfmt", "0", "-out", out_path ] #print(bconvert_cmd) try: convert_ret = subprocess.run(bconvert_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, universal_newlines=True) except subprocess.CalledProcessError as perror: raise (RuntimeError("ERROR: Report conversion failed!")) self.reports.append(out_path)
def get_sub_seqs(self, sub_seq_path): sub_dir = sub_seq_path + self.query_name + "_sseq/" plumber.force_dir(sub_dir) temp_name = "sseq_tab_tempBLASTn6" for arch_path in self.archives: out_path = sub_seq_path + temp_name bconvert_cmd = [ "blast_formatter", "-archive", arch_path, "-outfmt", "6 qseqid sseqid sseq", "-out", out_path ] try: convert_ret = subprocess.run(bconvert_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, universal_newlines=True) except subprocess.CalledProcessError as perror: raise (RuntimeError("ERROR: Tab conversion failed!")) output_file = sub_dir + self.__gen_sseq_name(arch_path) with open(sub_seq_path + temp_name) as tf: dat = tf.readlines() sseq_count = len(dat) if sseq_count != 0: records = [] for line in dat: qseqid = line.split("\t")[0] sseqid = line.split("\t")[1] sseq = line.split("\t")[2] record_id = qseqid + "." + sseqid record_seq = Seq(self.seq_format(sseq)) rec = SeqRecord(record_seq, id=record_id, description="") records.append(rec) SeqIO.write(records, output_file, "fasta") os.remove(sub_seq_path + temp_name)
def __extract_sub_seqs(self): sub_seq_path = self.base_blast_path + args.sub_seq_dir plumber.force_dir(sub_seq_path) for query in self.qSeq_list: print("\t\tBuilding reports for %s ..." % query.query_name) query.get_sub_seqs(sub_seq_path)
def __gen_hr_reports(self): reports_path = self.base_blast_path + args.report_dir plumber.force_dir(reports_path) for query in self.qSeq_list: print("\t\tBuilding reports for %s ..." % query.query_name) query.convert_hr_reports(reports_path)
def __blast_all(self): archive_path = self.base_blast_path + args.archive_dir plumber.force_dir(archive_path) for query in self.qSeq_list: print("\t\tBlasting %s against bucket ..." % query.query_name) query.bucket_blast(archive_path)