def run_tblastn(db_path, q_file, file_location): command = [ "tblastn", "-query", q_file, "-db", db_path, "-outfmt", """7 qacc sacc evalue qstart qend sstart send qlen sframe""", "-evalue", "1e-1", "-out", file_location ] run_cmd(command=command, wait=True) return read_blast_output(file_location, db_path)
def make_blast_db(genome, temp_dir): name = genome.split("/")[-1] out_dir = os.path.join(temp_dir, name) if os.path.exists(out_dir + ".nhr"): print("\n\t[-] BLAST db already exists:\t{}\n".format( os.path.join(temp_dir, name))) return out_dir command = "makeblastdb -in " + genome + " -dbtype nucl -parse_seqids -out " + out_dir run_cmd(command=command, wait=True) return out_dir
def parse_blastdb(db_path, contig, start, end): region = str(start) + "-" + str(end) command = "blastdbcmd -db " + db_path + " -dbtype nucl -entry " + contig + " -range " + region results = [] for line in run_cmd(command=command, wait=False): line = line.strip("\n") if not line.startswith(">"): results.append(line) return "".join(results)
def get_exonerate_object(output_path, command): line_count = 0 with open(output_path, "w") as ex: for line in run_cmd(command=command, wait=False): ex.write(line) line_count += 1 if line_count < 10: return None else: return ExonerateObject(output_path)
def msa_operations(command): read_flag = 0 seq, msa_list = [], [] for line in run_cmd(command=command, wait=False): if read_flag == 0: if line.startswith(">"): read_flag = 1 if read_flag == 1: if line.startswith(">"): if len(msa_list) > 0: msa_list.append("".join(seq)) header = line.rstrip("\n").split()[0] msa_list.append(header) seq = [] else: seq.append(line.rstrip("\n")) msa_list.append("".join(seq)) return msa_list
def many_hmm_scores(hmm_file, query_file, test_this_function=None): if test_this_function is None: command = ["hmmsearch", "--noali", hmm_file, query_file] else: command = test_this_function read_count = 0 score_list = [] for line in run_cmd(command=command, wait=False): if "E-value" in line or read_count == 1: read_count += 1 elif read_count == 2: line = line.strip("\n").split() # line[8] is protein name if len(line) > 0: try: score_list.append(float(line[1])) except ValueError: print("VALUE ERROR", line) return score_list else: return score_list
def grap_output(protein_id, orthologous_file): command = ["grep", """{}""".format(protein_id), orthologous_file] for line in run_cmd(command=command, wait=False): group = line.strip().split(":")[0] return group