def execute_megablast(self, fasta1, fasta2): import os from TPutils import shell_command for one_fasta in fasta2: out_prefix = os.path.basename(one_fasta).split('.')[0] cmd1 = "formatdb -i %s -p F" % one_fasta cmd2 = 'blastn -task megablast -query %s -db %s -evalue 1e-5 -outfmt 6 -out blast_result_%s.tab' % ( fasta1, one_fasta, out_prefix) a, b, c = shell_command.shell_command(cmd1) a, b, c = shell_command.shell_command(cmd2)
def delta_file2start_stop_list(delta_input, contigs_add, algo='nucmer', minimum_identity=85): ''' :param coords_input: :param contig_add: :param algo: :return: ''' import re from TPutils import shell_command out_name = delta_input.split('.')[0] coords_file = 'gaps_%s.coords' % out_name cmd2 = 'show-coords -T -r -c -L 100 -I %s %s > %s' % ( minimum_identity, delta_input, coords_file) print(cmd2) a, b, c = shell_command.shell_command(cmd2) if c != 0: print('nucmer error!!!') print(a, b, c) print(cmd2) import sys sys.exit() with open("gaps_%s.coords" % out_name, 'rU') as infile: rawLinks = justLinks(infile.readlines()) if algo == 'promer': shift = 4 elif algo == 'nucmer': shift = 0 contig2start_stop_list = {} i = 1 for n, row in enumerate(rawLinks): l = re.split(r'\t+', row.rstrip('\n')) start = int(l[0]) + contigs_add[l[9 + shift]][0] stop = int(l[1]) + contigs_add[l[9 + shift]][0] contig_ref = l[9 + shift] if contig_ref not in contig2start_stop_list: contig2start_stop_list[contig_ref] = {} contig2start_stop_list[contig_ref]["start"] = [start] contig2start_stop_list[contig_ref]["stop"] = [stop] else: contig2start_stop_list[contig_ref]["start"].append(start) contig2start_stop_list[contig_ref]["stop"].append(stop) return contig2start_stop_list, coords_file
def run_circos(self, config_file="circos.config", out_prefix="circos"): from TPutils import shell_command cmd = 'circos -outputfile %s.svg -conf %s' % (out_prefix, config_file) a, b, c = shell_command.shell_command(cmd) sys.stdout.write(str(a)) sys.stdout.write(str(b)) sys.stdout.write(str(c)) sys.stdout.flush() if c == 255: raise CircosException("Circos problem, check files... quitting")
def blast2circos_file(self, blast, reference, blastn=False, identity_cutoff=80): ''' tblastn vs contigs by default can be switch to blastn :param blast: :param reference: :param blastn: :return: ''' from TPutils import shell_command from TPutils import blast_utils from Bio.Blast.Applications import NcbitblastnCommandline from Bio.Blast.Applications import NcbiblastnCommandline # todo catch IO errors, orther potential errors a, b, c = shell_command.shell_command('formatdb -i %s -p F' % (reference)) # print a # print b print(c) if not blastn: blast_cline = NcbitblastnCommandline( query=blast, db=reference, evalue=0.00000001, # 0.001 outfmt=6, out="blast.tmp", max_target_seqs=1) print(blast_cline) else: blast_cline = NcbiblastnCommandline(query=blast, db=reference, evalue=0.001, outfmt=6, out="blast.tmp") stdout, stderr = blast_cline() # a,b,c = shell_command.shell_command('tblastn -query %s -db %s -evalue 1e-5 -max_target_seqs 1 -outfmt 6 > blast.tmp' % (blast, reference)) # a,b,c = shell_command.shell_command('tblastn -query %s -db %s -evalue 1e-5 -max_target_seqs 1 -outfmt 6' % (blast, reference)) print('############## BLAST ###################') # print a # print b # print c blast2data, queries = blast_utils.remove_blast_redundancy( ["blast.tmp"], check_overlap=False) o = open('circos_blast.txt', "w") l = open('circos_blast_labels.txt', "w") # with open(blast, 'r') as b: ''' for line in a.split('\n'): data = line.rstrip().split('\t') #print data try: if float(data[2])>80: location = sorted([data[8], data[9]]) o.write("%s\t%s\t%s\n" % (data[1], location[0], location[1])) l.write("%s\t%s\t%s\t%s\n" % (data[1], location[0], location[1], data[0])) except IndexError: continue ''' for contig in blast2data: cname = re.sub("\|", "", contig) for gene in blast2data[contig]: if float(blast2data[contig][gene] [0]) >= identity_cutoff: # 80,20 location = sorted(blast2data[contig][gene][1:3]) o.write("%s\t%s\t%s\n" % (contig, location[0] + self.contigs_add[cname][0], location[1] + self.contigs_add[cname][0])) l.write("%s\t%s\t%s\t%s\n" % (contig, location[0] + self.contigs_add[cname][0], location[1] + self.contigs_add[cname][0], gene)) o.close()
def execute_promer(fasta1, fasta2, algo="nucmer", coords=True, minimum_align_length=100, minimum_identity=30): ''' :param fasta1: reference fata :param fasta2: query fasta(s) :param algo: nucmer/promer :param coords: execute show-coords :param minimum_align_length: default:100 :param minimum_identity: default:30 :return: coords file list, delta file list ''' from TPutils import shell_command import os #cmd1 = 'promer --mum -l 5 %s %s' % (fasta1, fasta2) delta_files = [] coord_files = [] for one_fasta in fasta2: if algo == 'nucmer': # 03.2017 => changed from -mum to -mumreference. 01.06.17 removed -mumreference cmd1 = 'nucmer -b 200 -c 65 -g 90 -l 20 -p %s %s %s' % ( os.path.basename(one_fasta).split('.')[0], fasta1, one_fasta) print(cmd1) a, b, c = shell_command.shell_command(cmd1) if c != 0: raise Exception("%s" % b) if coords: cmd2 = 'show-coords -T -r -c -L %s -I %s %s.delta > %s.coords' % ( minimum_align_length, minimum_identity, os.path.basename(one_fasta).split('.')[0], os.path.basename(one_fasta).split('.')[0]) print(cmd2) a, b, c = shell_command.shell_command(cmd2) if c != 0: raise Exception("%s" % b) elif algo == 'promer': # promer --mum -l 5 cmd1 = 'promer -l 5 -p %s %s %s' % ( os.path.basename(one_fasta).split('.')[0], fasta1, one_fasta) a, b, c = shell_command.shell_command(cmd1) if c != 0: raise Exception("%s" % b) if coords: # show-coords -T -r -c -L 100 -I 30 out.delta cmd2 = 'show-coords -T -r -c -L %s -I %s %s.delta > %s.coords' % ( minimum_align_length, minimum_identity, os.path.basename(one_fasta).split('.')[0], os.path.basename(one_fasta).split('.')[0]) print(cmd2) a, b, c = shell_command.shell_command(cmd2) if c != 0: raise Exception("%s" % b) delta_files.append('%s.delta' % os.path.basename(one_fasta).split('.')[0]) if coords: coord_files.append('%s.coords' % os.path.basename(one_fasta).split('.')[0]) return coord_files, delta_files