def align_strand(al_info): ''' ''' from Bio.Emboss.Applications import NeedleCommandline import subprocess ref_file = al_info['ref_file'] in_file = al_info['in_file'] out_file = al_info['out_file'] cline = NeedleCommandline(gapopen=10.0, gapextend=1.0) cline.asequence = ref_file cline.bsequence = in_file cline.outfile = out_file cline.aformat = 'markx10' cml = str(cline) + ' -adesshow3 -auto' logfun.info(cml) try: retcode = subprocess.call(cml, shell=True) if retcode < 0: logfun.info('Child diri_sampler was terminated by signal %d' - retcode) else: logfun.info('Child diri_sampler returned %d' % retcode) except OSError, ee: logfun.exception('Execution of diri_sampler failed:' + ee)
def out(): filename1 = e1.get() filename2 = e2.get() outfile = e3.get() needle_cline = NeedleCommandline() needle_cline.asequence = filename1 needle_cline.bsequence = filename2 needle_cline.gapopen = int(gapopen) needle_cline.gapextend = int(gapextend) needle_cline.outfile = "needle.txt" print(needle_cline) print(needle_cline.outfile) stdout, stderr = needle_cline() print(stdout + stderr) align = AlignIO.read("needle.txt", "emboss") file = open("needle.txt", "r") # print(file.read()) view = ("\n\n%s" % file.read()) with open(outfile, "w") as f: f.write(view) root = Tk() S = Scrollbar(root) T = Text(root, height=50, width=500) S.pack(side=RIGHT, fill=Y) T.pack(side=LEFT, fill=Y) S.config(command=T.yview) S.config(command=T.xview) T.config(yscrollcommand=S.set) T.config(xscrollcommand=S.set) quote = view T.insert(END, quote, 'color') mainloop() win.destroy()
def each_seq_align(each_id=0, record_list=list(), pair_aln_dir=''): # prepare pairwise sequence alignment files tmp_a_seq = os.path.join(pair_aln_dir, '{0}_a.fasta'.format(str(each_id))) SeqIO.write(record_list[0], tmp_a_seq, 'fasta') tmp_b_seq = os.path.join(pair_aln_dir, '{0}_b.fasta'.format(str(each_id))) SeqIO.write(record_list[1], tmp_b_seq, 'fasta') result_file = os.path.join(pair_aln_dir, "{0}.txt".format(str(each_id))) needle_cline = NeedleCommandline() needle_cline.asequence = tmp_a_seq needle_cline.bsequence = tmp_b_seq needle_cline.gapopen = 10 needle_cline.gapextend = 0.5 needle_cline.outfile = result_file devnull = open(os.devnull, 'w') try: subprocess.call(str(needle_cline), shell=True, stdout=devnull, stderr=devnull) except OSError: sys.exit(1) os.remove(tmp_a_seq) os.remove(tmp_b_seq) in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)') gene_alignment_result = '' with open(result_file, 'r') as f1: for a_line in f1.readlines(): if 'Identity' in a_line: m = re.search(in_pattern, a_line.strip()) similarity = m.group(1) gene_alignment_result = '{0}\t{1}\n'.format( str(each_id), str(similarity)) os.remove(result_file) with open(result_file, 'w') as f2: f2.write(gene_alignment_result)
def needleAlign(seq1, seq2, gapopen, gapextend): needle = NeedleCommandline() needle.asequence = seq1 needle.bsequence = seq2 needle.gapopen = gapopen needle.gapextend = gapextend needle.outfile = "needle.txt" stdout, stderr = needle() print(stdout)
def needle_alignment_emboss(s1, s2): import subprocess from Bio.Emboss.Applications import NeedleCommandline from Bio import AlignIO cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1) cline.asequence = "asis:" + s1 cline.bsequence = "asis:" + s2 process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE, universal_newlines=True) return AlignIO.read(process.stdout, "emboss")
def needle_alignment_emboss(s1, s2): import subprocess from Bio.Emboss.Applications import NeedleCommandline from Bio import AlignIO cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1) cline.asequence = "asis:" + s1 cline.bsequence = "asis:" + s2 process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE, universal_newlines=True) return AlignIO.read(process.stdout, "emboss")
def build_target_matrix(self): if self.mysubject_file.read().count( '>') > self.mytarget_file.read().count('>'): # Ensures the subject file is the smaller one (self.mysubject_file, self.mytarget_file) = (self.mytarget_file, self.mysubject_file) (self.subjectname, self.targetname) = (self.targetname, self.subjectname) self.mysubject_file.seek(0) matrixfile = self.outdir + '/matrix' if os.path.exists(matrixfile): self.assignments = pickle.load(open(matrixfile)) return count = self.mysubject_file.read().count('>') results = re.compile( r'# 2: (\w+).+?# Gaps:\s+\d+/\d+ \((\d+\.\d+)%\).+?# Score: (\d+\.\d+)', re.DOTALL) needle = NeedleCommandline() needle.gapopen = self.gapopen needle.gapextend = self.gapextend needle.outfile = 'stdout' needle.bsequence = self.mytarget_file.name mytargets = SeqIO.parse(self.mytarget_file.name, 'fasta') mytargets = SeqIO.to_dict(mytargets) mysubjects = SeqIO.parse(self.mysubject_file.name, 'fasta') print "Creating alignment matrix. Please wait..." pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=count).start() for i, subject in enumerate(mysubjects): needle.asequence = 'asis:%s' % str(subject.seq) (stdout, stderr) = needle() gaps = results.findall(stdout) gaps.sort(key=lambda x: float(x[2]), reverse=True) self.assignments.setdefault(subject.id, []).extend(gaps[0:self.assign]) pbar.update(i + 1) #pbar.finish() ''' outfile = tempfile.NamedTemporaryFile(delete=False) mycmd = 'ggsearch36 -s BL62 -m 8 -w 80 -f -8 -g -2 -b=3 -3 -k 500 %s %s'%(self.mysubject_file.name,self.mytarget_file.name) handle = subprocess.Popen(mycmd,shell=True,stdout=subprocess.PIPE) (stdout,stderr) = handle.communicate() results = re.compile('#.+?(^[^#].+?)#',re.MULTILINE|re.DOTALL) res = results.findall(stdout+"#") rows = [i.split('\n') for i in res] myrows = [] for row in rows: row.pop() row.sort(key=lambda x:float(x.split('\t')[-1]),reverse=True) myrows.append(row[0:self.assign]) for i in myrows: [self.assignments.setdefault(sub,[]).append(tar) for sub,tar in [x.split('\t')[0:2] for x in i if bool(re.search('\t',x))]] ''' pickle.dump(self.assignments, open(matrixfile, 'w')) return
def needle_alignment(s1, s2): ''' DESCRIPTION Does a Needleman-Wunsch Alignment of sequence s1 and s2 and returns a Bio.Align.Generic.Alignment object. ''' import subprocess from Bio.Emboss.Applications import NeedleCommandline from Bio import AlignIO cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1) cline.asequence = "asis:" + s1 cline.bsequence = "asis:" + s2 process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE) return AlignIO.read(process.stdout, "emboss")
def needle_alignment(s1, s2): ''' DESCRIPTION Does a Needleman-Wunsch Alignment of sequence s1 and s2 and returns a Bio.Align.Generic.Alignment object. ''' import subprocess from Bio.Emboss.Applications import NeedleCommandline from Bio import AlignIO cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1) cline.asequence = "asis:" + s1 cline.bsequence = "asis:" + s2 process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE) return AlignIO.read(process.stdout, "emboss")
Entrez.email = "*****@*****.**" if __name__ == "__main__": with open(os.path.join('data', 'rosalind_need.txt')) as dataset: ids = dataset.read().split() handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta") records = list(SeqIO.parse(handle, 'fasta')) for i, r in enumerate(records): with open(ids[i], 'w') as f: SeqIO.write(r, f, 'fasta') needle_cline = NeedleCommandline() needle_cline.asequence = ids[0] needle_cline.bsequence = ids[1] needle_cline.outfile = "need.txt" needle_cline.gapopen = 11 needle_cline.gapextend = 1 needle_cline.endopen = 11 needle_cline.endextend = 1 needle_cline.endweight = True needle_cline() with open('need.txt') as f: output = f.readlines() for line in output: if 'Score:' in line:
def each_needle_run(pair_gene_dir, tmp_gene_converted_dir, pair_gene_alignment_dir, og_id, strain_dict): """ This function is used to call Needle program to do pairwise sequence alignment :param pair_gene_dir: each homologous gene directory :param tmp_gene_converted_dir: used to put some temporary files and will be deleted in the end :param pair_gene_alignment_dir: each orthologous gene pair-wised alignment directory :param og_id: each orthologous gene id :param strain_dict: inherit from load_strains_label function with strain information :return: the alignment result of each gene """ if not os.path.exists(pair_gene_dir): logger.error("There is no directory contains gene file, please check.") logger.error(last_exception()) sys.exit(1) tmp_gene_fasta = os.path.join(pair_gene_dir, og_id + '.fasta') converted_records = [] re_pattern = re.compile(r'fig\|(\d+\.\d+)\.peg\.(\d+)\s(.*)') in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)') annotation = '' og_list = [] for record in SeqIO.parse(tmp_gene_fasta, 'fasta'): m = re.search(re_pattern, record.description) strain_id = m.group(1) gene_id = '{0}.peg.{1}'.format(strain_id, m.group(2)) og_list.append(gene_id) annotation = m.group(3) record.id = strain_dict[strain_id][0] final_record = SeqRecord(record.seq, record.id, description='') converted_records.append(final_record) the_strain_fasta = os.path.join(tmp_gene_converted_dir, 'a.fasta') other_strain_fasta = os.path.join(tmp_gene_converted_dir, 'b.fasta') SeqIO.write(converted_records[0], the_strain_fasta, 'fasta') SeqIO.write(converted_records[1], other_strain_fasta, 'fasta') result_file = os.path.join(pair_gene_alignment_dir, "{0}.txt".format(og_id)) needle_cline = NeedleCommandline() needle_cline.asequence = the_strain_fasta needle_cline.bsequence = other_strain_fasta needle_cline.gapopen = 10 needle_cline.gapextend = 0.5 needle_cline.outfile = result_file devnull = open(os.devnull, 'w') try: subprocess.call(str(needle_cline), shell=True, stdout=devnull, stderr=devnull) except OSError: logger.info( 'Try to call Needle program failed, please check if Needle has been installed successfully.' ) logger.error(last_exception()) sys.exit(1) os.remove(the_strain_fasta) os.remove(other_strain_fasta) gene_alignment_result = '' with open(result_file, 'r') as f: for a_line in f.readlines(): if 'Identity' in a_line: m = re.search(in_pattern, a_line.strip()) identity = m.group(1) gene_alignment_result = '{0}\t[{1}|{2}]\t{3}\t{4}\n'.format( og_id, og_list[0], og_list[1], identity, annotation) return gene_alignment_result
Entrez.email = "*****@*****.**" if __name__ == "__main__": with open('rosalind_need.txt') as dataset: ids = dataset.read().split() handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta") records = list(SeqIO.parse(handle, 'fasta')) for i, r in enumerate(records): with open(ids[i], 'w') as f: SeqIO.write(r, f, 'fasta') needle_cline = NeedleCommandline() needle_cline.asequence = ids[0] needle_cline.bsequence = ids[1] needle_cline.outfile = "rosalind_need_output.txt" needle_cline.gapopen = 10 needle_cline.gapextend = 1 needle_cline.endopen = 10 needle_cline.endextend = 1 needle_cline.endweight = True needle_cline() with open('rosalind_need_output.txt') as f: output = f.readlines() for line in output: if 'Score:' in line: print(int(float(line[:-1].split(':')[-1].strip())))
print "continue" continue else: print "good length = %i" % seqlength print "break" break newrefrec = temprec SeqIO.write(newrefrec, refoutfile, "fasta") x=0 for item in protlist: x = x + len(item.seq) avg = x/len(protlist) print avg needle_cline = NeedleCommandline() needle_cline.asequence=refoutfile needle_cline.bsequence=prot_outfile needle_cline.gapopen=10 needle_cline.gapextend=0.5 needle_cline.outfile=alignment_out print needle_cline #stdout, stderr = needle_cline() #logstring = stdout+stderr #logout = open(logfile, "w") #logout.write(logstring) #logout.close()
Entrez.email = '*****@*****.**' with open('rosalind_need.txt', 'r') as f: ids = f.read().split() handle = Entrez.efetch(db='nucleotide', id=ids, rettype='fasta') records = list(SeqIO.parse(handle, 'fasta')) for i, record in enumerate(records): with open(ids[i], 'w') as f: SeqIO.write(record, f, 'fasta') # Following step can be done in command line # using 'needle' needle_cl = NeedleCommandline() needle_cl.asequence = ids[0] needle_cl.bsequence = ids[1] needle_cl.outfile = 'need_output.txt' needle_cl.gapopen = 10 needle_cl.gapextend = 1 needle_cl.endopen = 10 needle_cl.endextend = 1 needle_cl.endweight = True needle_cl() # Score with open('need_output.txt', 'r') as f: content = f.readlines()