def split_to_exons(): print('Splitting best hits to exons...') with open(separat_exons) as all_exons: all_exons_parsed = SeqIO.to_dict( SeqIO.parse(all_exons, 'fasta', generic_dna)) with open(best_separate_exons, 'w') as best_exons: for besthit in best_hits: locus = besthit.split()[1].split('-')[0] probe = besthit.split()[0] exons = [ val for key, val in all_exons_parsed.items() if locus in key ] for exon in exons: name = str(exon.id) sequence = str(exon.seq) best_exons.write(f'>{probe}_{name}\n{sequence}\n') NcbimakeblastdbCommandline(dbtype='nucl', input_file=probes, out=probes, parse_seqids=True)() NcbiblastnCommandline( task=blast_task, query=best_separate_exons, db=probes, out=f'{best_separate_exons}_against_{probes}.txt', num_threads=4, outfmt= '6 qaccver saccver pident qcovhsp evalue bitscore sstart send qstart qend' )() with open(f'{best_separate_exons}_against_{probes}.txt' ) as new_blast_results: hits = new_blast_results.readlines() cleaned_hits = [] for hit in hits: if hit.split()[0].split('_')[0] == hit.split()[1]: cleaned_hits.append(hit) cleaned_hits.sort(key=lambda x: float(x.split()[5]), reverse=True) cleaned_hits.sort(key=lambda x: float(x.split()[4])) cleaned_hits.sort(key=lambda x: float(x.split()[2]), reverse=True) cleaned_hits.sort(key=lambda x: float(x.split()[3]), reverse=True) cleaned_hits.sort( key=lambda x: int(x.split()[0].split('-')[3].split('_')[1])) cleaned_hits.sort(key=lambda x: x.split()[0].split('-')[2]) hits_exons = set() cleaned_dedup_hits = [] for cleaned_hit in cleaned_hits: if cleaned_hit.split()[0] not in hits_exons: cleaned_dedup_hits.append(cleaned_hit) hits_exons.add(cleaned_hit.split()[0]) cleaned_dedup_hits.sort( key=lambda x: int(x.split()[0].split('-')[3].split('_')[1])) cleaned_dedup_hits.sort(key=lambda x: x.split()[1].split('-')[1]) with open(f'{best_separate_exons}_against_{probes}.txt', 'w') as new_blast_results: for cleaned_hit in cleaned_dedup_hits: new_blast_results.write(cleaned_hit) with open(probes) as probes_to_parse: probes_as_dict = SeqIO.to_dict( SeqIO.parse(probes_to_parse, 'fasta', generic_dna)) with open(best_separate_exons) as best_exons: best_exons_as_dict = SeqIO.to_dict( SeqIO.parse(best_exons, 'fasta', generic_dna)) with open(result_file, 'w') as resultfile, open(result_file2, 'w') as resultfile2: for cleaned_dedup_hit in cleaned_dedup_hits: name_of_locus = cleaned_dedup_hit.split()[1] name_of_exon = cleaned_dedup_hit.split()[0] num_exon = cleaned_dedup_hit.split()[0].split('-')[3].split('_')[1] if int(cleaned_dedup_hit.split()[6]) > int( cleaned_dedup_hit.split()[7]): start = int(cleaned_dedup_hit.split()[7]) end = int(cleaned_dedup_hit.split()[6]) sequence = str(probes_as_dict[name_of_locus] [start - 1:end].seq.reverse_complement()) else: start = int(cleaned_dedup_hit.split()[6]) end = int(cleaned_dedup_hit.split()[7]) sequence = str(probes_as_dict[name_of_locus][start - 1:end].seq) resultfile.write(f'>{name_of_locus}_exon_{num_exon}\n{sequence}\n') if int(cleaned_dedup_hit.split()[8]) > int( cleaned_dedup_hit.split()[9]): start_opt = int(cleaned_dedup_hit.split()[9]) end_opt = int(cleaned_dedup_hit.split()[8]) sequence_opt = str(best_exons_as_dict[name_of_exon] [start_opt - 1:end_opt].seq.reverse_complement()) else: start_opt = int(cleaned_dedup_hit.split()[8]) end_opt = int(cleaned_dedup_hit.split()[9]) sequence_opt = str( best_exons_as_dict[name_of_exon][start_opt - 1:end_opt].seq) resultfile2.write( f'>{name_of_locus}_exon_{num_exon}\n{sequence_opt}\n') print('Done')
def test_primers(args): ref = args.Reference[0] primers = args.Primers[0] out = args.Output[0] nproc = args.processes[0] tm_offset = args.tm_offset[0] tm_size = args.tm_size[0] #min_align = args.min_align[0] skip_tm = args.skip_tm # File Handler iTFH = TFH(ref, primers, out) # 2. Run blastmakedb db = os.path.join(iTFH.outdir, os.path.basename(iTFH.ref) + ".db") cline = NcbimakeblastdbCommandline(dbtype="nucl", input_file=iTFH.ref, out=db) print("Building BLAST Database...") print(cline) run(cline.__str__()) # 3. Run short-blast result = os.path.join(iTFH.outdir, os.path.basename(iTFH.primers) + ".blast.tsv") result_tmp = os.path.join(iTFH.outdir, os.path.basename(iTFH.primers) + ".tmp") cline = NcbiblastnCommandline( query=iTFH.primers, db=db, task="blastn-short", num_threads=nproc, outfmt= "6 qseqid sseqid sstart send mismatch qlen length pident qseq sseq", out=result_tmp) print("Running short-BLAST...") print(cline) run(cline.__str__()) f = open(result, "w") f.write( "#PrimerName\tTargetName\tTargetStart\tTargetEnd\t#Mismatches\tPrimerLength\tAlignedLength\t%Identity\tPrimerSeq\tContigSeq\n" ) f.writelines(open(result_tmp, "r").readlines()) f.close() os.remove(result_tmp) if skip_tm: return # 4. Thermodynamics of BLAST results print("Running thermodynamic check on blast results...") tm_result_file = os.path.join( iTFH.outdir, os.path.basename(iTFH.primers) + ".blast.TM.tsv") chunks = to_chunks(result, nproc) tm_result = run_thermodynamics(chunks, nproc, iTFH.ref, tm_offset, tm_size) # PrimerName TargetName TargetStart TargetEnd #Mismatches PrimerLength AlignedLength %Identity PrimerSeq ContigSeq Struct_found TM DG DH DS f = open(tm_result_file, "w") f.write( "#PrimerName\tTargetName\tTargetStart\tTargetEnd\t#Mismatches\tPrimerLength\tAlignedLength\t%Identity\tPrimerSeq\tContigSeq\tPrimerTM\tHeteroDimerTM\tHeteroDimerDG\t3EndStabilityTM\t3EndStabilityDG\n" ) for line in tm_result: f.write(line + "\n") f.close()
def blastdb(in_file, db_file): make_db_cmd = NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='nucl', input_file=in_file, out=db_file) make_db_cmd()