Exemple #1
0
    def __gen_db_path(self, sball, bucket_path):
        dir_name = os.path.basename(sball.export_fasta).split(".")[0]
        dir_name += "_db/"
        db_name = os.path.basename(sball.export_fasta).split(".")[0]
        db_name += "_genome.fasta"
        dir_path = bucket_path + dir_name
        plumber.force_dir(dir_path)

        return dir_path + db_name
Exemple #2
0
    def __split_queries(self):
        query_recs = []
        for rec in SeqIO.parse(self.query_fpath, "fasta"):
            query_recs.append(rec)

        query_list = []
        query_path = self.base_blast_path + args.query_dir
        plumber.force_dir(query_path)
        for rec in query_recs:
            write_out = [rec]
            write_path = query_path + self.__format_id(rec.id) + ".fasta"
            SeqIO.write(write_out, write_path, "fasta")
            query_list.append(write_path)

        return query_list
Exemple #3
0
    def bucket_blast(self, archive_path):
        blast_path = archive_path + self.__gen_archive_subdir()
        plumber.force_dir(blast_path)
        for db_path in self.bucket:
            out_path = blast_path + self.__gen_out_name(db_path)
            blast_cmd = [
                "blastn", "-db", db_path, "-query", self.query, "-out",
                out_path, "-evalue", "1e-20", "-outfmt", "11"
            ]
            try:
                blast_ret = subprocess.run(blast_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE,
                                           check=True,
                                           universal_newlines=True)
            except subprocess.CalledProcessError as perror:
                raise (RuntimeError("ERROR: BLAST failed!"))

            self.archives.append(out_path)
Exemple #4
0
    def __build_bucket(self):
        bucket_path = self.base_blast_path + args.bucket_dir
        plumber.force_dir(bucket_path)
        for sball in self.sample_pit:
            print("\t\tMaking database for sample", sball.sample_id)
            out_path = self.__gen_db_path(sball, bucket_path)
            blast_cmd = [
                "makeblastdb", "-in", sball.export_fasta, "-dbtype", "nucl",
                "-out", out_path
            ]
            try:
                blast_ret = subprocess.run(blast_cmd,
                                           stdout=subprocess.PIPE,
                                           stdin=subprocess.PIPE,
                                           check=True,
                                           universal_newlines=True)
            except subprocess.CalledProcessError as perror:
                raise (RuntimeError("ERROR: makeblastdb failed!"))

            self.db_bucket.append(out_path)
Exemple #5
0
    def convert_hr_reports(self, reports_path):
        convert_path = reports_path + self.__gen_report_subdir()
        plumber.force_dir(convert_path)
        for arch_path in self.archives:
            out_path = convert_path + self.__gen_report_name(arch_path)
            bconvert_cmd = [
                "blast_formatter", "-archive", arch_path, "-outfmt", "0",
                "-out", out_path
            ]
            #print(bconvert_cmd)
            try:
                convert_ret = subprocess.run(bconvert_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE,
                                             check=True,
                                             universal_newlines=True)
            except subprocess.CalledProcessError as perror:
                raise (RuntimeError("ERROR: Report conversion failed!"))

            self.reports.append(out_path)
Exemple #6
0
    def get_sub_seqs(self, sub_seq_path):
        sub_dir = sub_seq_path + self.query_name + "_sseq/"
        plumber.force_dir(sub_dir)
        temp_name = "sseq_tab_tempBLASTn6"
        for arch_path in self.archives:
            out_path = sub_seq_path + temp_name
            bconvert_cmd = [
                "blast_formatter", "-archive", arch_path, "-outfmt",
                "6 qseqid sseqid sseq", "-out", out_path
            ]
            try:
                convert_ret = subprocess.run(bconvert_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE,
                                             check=True,
                                             universal_newlines=True)
            except subprocess.CalledProcessError as perror:
                raise (RuntimeError("ERROR: Tab conversion failed!"))

            output_file = sub_dir + self.__gen_sseq_name(arch_path)
            with open(sub_seq_path + temp_name) as tf:
                dat = tf.readlines()
            sseq_count = len(dat)
            if sseq_count != 0:
                records = []
                for line in dat:
                    qseqid = line.split("\t")[0]
                    sseqid = line.split("\t")[1]
                    sseq = line.split("\t")[2]
                    record_id = qseqid + "." + sseqid
                    record_seq = Seq(self.seq_format(sseq))
                    rec = SeqRecord(record_seq, id=record_id, description="")
                    records.append(rec)

                SeqIO.write(records, output_file, "fasta")

        os.remove(sub_seq_path + temp_name)
Exemple #7
0
 def __extract_sub_seqs(self):
     sub_seq_path = self.base_blast_path + args.sub_seq_dir
     plumber.force_dir(sub_seq_path)
     for query in self.qSeq_list:
         print("\t\tBuilding reports for %s ..." % query.query_name)
         query.get_sub_seqs(sub_seq_path)
Exemple #8
0
 def __gen_hr_reports(self):
     reports_path = self.base_blast_path + args.report_dir
     plumber.force_dir(reports_path)
     for query in self.qSeq_list:
         print("\t\tBuilding reports for %s ..." % query.query_name)
         query.convert_hr_reports(reports_path)
Exemple #9
0
 def __blast_all(self):
     archive_path = self.base_blast_path + args.archive_dir
     plumber.force_dir(archive_path)
     for query in self.qSeq_list:
         print("\t\tBlasting %s against bucket ..." % query.query_name)
         query.bucket_blast(archive_path)