def test_properties(self): """Test passing options via properties.""" cline = ClustalwCommandline(clustalw_exe) cline.infile = "Fasta/f002" cline.outfile = "temp_test.aln" cline.align = True self.standard_test_procedure(cline)
def test_newtree(self): """Test newtree files.""" input_file = "Registry/seqs.fasta" output_file = "temp_test.aln" newtree_file = "temp_test.dnd" cline = ClustalwCommandline( clustalw_exe, infile=input_file, outfile=output_file, newtree=newtree_file, align=True ) self.standard_test_procedure(cline) cline.newtree = "temp with space.dnd" self.standard_test_procedure(cline)
def win_alignment(self, path): clustalw_exe = CLUSTAL_PATH clustalw_cline = ClustalwCommandline( clustalw_exe, infile=path, output='FASTA', outfile=BASE_DIR + "\secuences\secuence.fasta_aln.fasta") assert os.path.isfile(clustalw_exe), "Clustal W executable missing" stdout, stderr = clustalw_cline()
def run_clustalw(clustalw_infile, qid, sid): """works with biopython version 1.56 or newer""" from Bio.Align.Applications import ClustalwCommandline from Bio import AlignIO cline = ClustalwCommandline("clustalw", infile=clustalw_infile) stdout, stderr = cline() alignment = AlignIO.read(clustalw_infile.replace('.fasta', '.aln'), "clustal") return (qid, sid, alignment)
def runClustal(filePath): print("[INFO] Running clustalw on {}".format(filePath)) clustalw_cline = ClustalwCommandline("clustalw", infile=filePath) stdout, stderr = clustalw_cline() dndFilePath = os.path.splitext(filePath)[0] + ".dnd" print("[INFO] Creating phylogenetic tree from {}".format(dndFilePath)) tree = Phylo.read(dndFilePath, "newick") return tree
def executeClustalW(inputString): inFH = getInputTempFile(inputString) outFH = getOutputTempFile() clustalw_cline = ClustalwCommandline(infile=inFH.name, outfile=outFH.name) clustalw_cline() output = outFH.read() return output
def test_output_filename_with_spaces(self): """Test an output filename containing spaces.""" input_file = "GFF/multi.fna" output_file = "temp with space.aln" cline = ClustalwCommandline(clustalw_exe, infile=input_file, outfile=output_file) self.standard_test_procedure(cline)
def test_simple_fasta(self): """Test a simple fasta input file.""" input_file = "Fasta/f002" output_file = "temp_test.aln" cline = ClustalwCommandline(clustalw_exe, infile=input_file, outfile=output_file) self.standard_test_procedure(cline)
def clustalAlignment(filename, command="clustalw"): name = os.path.splitext(filename)[0] from Bio.Align.Applications import ClustalwCommandline from Bio import AlignIO cline = ClustalwCommandline(command, infile=filename) print 'performing clustal alignment..' stdout, stderr = cline() align = AlignIO.read(name + '.aln', "clustal") return align
def clustal_tree(self, ids_seqs): ''' Função que constrói a árvore filogenética com todas as sequências do gestor com auxilio do programa ClustalW ''' self.write_fasta(ids_seqs, file_name='All_seqs.fasta') cmdline = ClustalwCommandline('clustalw2', infile='All_seqs.fasta') cmdline() tree = Phylo.read('All_seqs.dnd', 'newick') Phylo.draw_ascii(tree)
def realign_me(self): if self.path.endswith(".clw2"): outpath = self.path else: outpath = self.path + ".clw2" cline = ClustalwCommandline("clustalw2", infile = self.path, outfile = outpath) cline() self.path = outpath self.seq_dict = read_fa(path)
def run_aln(in_file, out_file, **args): ClustalwCommandline('clustalw2', infile=in_file, outfile=out_file + '.aln', **args)() align = AlignIO.read(out_file + '.aln', 'clustal') os.remove(out_file + '.aln') with open(out_file, "w") as f: AlignIO.write(align, f, 'fasta') return align
def RunClustalw2(self): for self.fasta_file in self.file_list: self.clustalw_cline = ClustalwCommandline('./clustalw2', infile=self.fasta_file, outfile=self.output) self.clustalw_cline() self.textbox.insert( INSERT, 'Your MSA results for' + self.fasta_file + ' can be found in ' + self.output + '!' + '\n')
def clustal(*id, out='comp.aln'): mkfasx(out, *id) cline = ClustalwCommandline("clustalw", infile=out, score='PERCENT') myStdout, myStderr = cline() align = AlignIO.read(out, "clustal") return align, myStdout
def get_phylo_tree(fasta_file): try: cline = ClustalwCommandline("clustalw2", infile=fasta_file) stdout, stderr = cline() tree = Phylo.read("%s.dnd" % fasta_file, "newick") representation_file = '%s.tree' % fasta_file with open(representation_file, 'w') as output_file: Phylo.draw_ascii(tree, output_file) return representation_file except Exception, e: return 'Error generating phylo tree: %s' % str(e)
def create_distmat(fname, method = 1): cline = ClustalwCommandline('clustalw', infile=fname) #print cline base = fname.split('/')[len(fname.split('/')) - 1].split('.')[0] better = fname.split('.')[0] #print 'better: ', better return_code = subprocess.call(str(cline), stdout = open(os.devnull), stderr = open(os.devnull), shell=(sys.platform!="win32")) #print "return_code", return_code distmat_line = "distmat %s.aln -outfile %s.distmat -protmethod %i" % (better, better, method) #print 'distmat_line: ', distmat_line return_code = subprocess.call(distmat_line, stdout = open(os.devnull), stderr = open(os.devnull), shell=(sys.platform!="win32"))
def perform_alignment(self): clustalw_cline = ClustalwCommandline(self.clustalw, infile=self.clustal_input, outfile=self.clustal_output) print(clustalw_cline) stdout, stderr = clustalw_cline() # print(stdout, '\n', stderr) align = AlignIO.read(self.clustal_output, "clustal") id_seq = self.extract_seqs(align) return id_seq
def example(): file1 = "../data/myfasta.fasta" clustalw_cline = ClustalwCommandline(path_to_programs + "clustalw2.exe", infile=file1) clustalstdout, stderr = clustalw_cline() print(clustalstdout) file2 = "../data/myfasta.aln" align2 = AlignIO.read(file2, file_extension(file2)) print(align2)
def clustalAlignment(filename): from Bio.Align.Applications import ClustalwCommandline cline = ClustalwCommandline("clustalw", infile=filename) print 'performing alignment..' stdout, stderr = cline() align = AlignIO.read("test.aln", "clustal") '''print align from Bio import Phylo tree = Phylo.read("test.dnd", "newick") Phylo.draw_ascii(tree)''' return align
def do_clustalw(file_for_clustalw): infiles = glob.glob(file_for_clustalw) #clustalw_exe = r"/Applications/clustalw-2.1-macosx/clustalw2" clustalw_exe = r"/zzh_gpfs/apps/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2" assert os.path.isfile(clustalw_exe), "Clustal W executable missing" for in_file in infiles: print "Processing %s ......."%in_file in_file = in_file.replace('&','\&') in_file = in_file.replace('*','\*') clustalw_cline = ClustalwCommandline(clustalw_exe, infile=in_file) stdout, stderr = clustalw_cline()
def clustal_alignment(infile, outfile): # create an alignment file with clustal omega if (user_OS == 'darwin'): clustal_exe = "static/tools/MacOS/clustal-omega-1.2.3-macosx" if (user_OS == 'linux'): clustal_exe = "static/tools/Linux/clustalo-1.2.4-Ubuntu-x86_64" if (user_OS == 'win32'): clustal_exe = current_path + "/static/tools/Windows/clustal-omega-1.2.2-win64/clustalo.exe" cline = ClustalwCommandline(clustal_exe, infile="static/data/sauvegardes/" + dirName + infile, outfile="static/data/sauvegardes/" + dirName + outfile) stdout, stderr = cline()
def Clustal_alignment(xmlfile=None, fastafile=None, alnfile=None): if fastafile is None: fastafile = xmlfile.replace('.xml', '.fasta') xml2fasta(infile=xmlfile, outfile=fastafile) # Run the command line version of clustal using the sequences.fasta # file and output to a clustal format alignment file print('\nAligning ' + fastafile + ' with clustal...') cmd = ClustalwCommandline("clustalo", infile=fastafile, outfile=alnfile) cmd() print('\tDone: writing to ' + alnfile)
def performAlignSequences(filename): clustalw_exe = r"clustalw2.exe" cline = ClustalwCommandline(clustalw_exe, infile=filename, outfile='alignOutput.aln', gapopen=0, gapext=0) return_code = subprocess.call(str(cline), shell=(sys.platform != "win32")) assert return_code == 0, "Calling ClustalW failed" resultAlignment = loadAlignmentFromFile('alignOutput.aln', 'clustal') return resultAlignment
def alignseq(): # clustalw_exe = r"/home/wangdi/apps/clustalw2" clustalw_exe = r"/Users/wangdi/apps/anaconda3/envs/snp2str/bin/clustalw" cline = ClustalwCommandline(clustalw_exe, infile="test.fasta", outorder="input") assert isfile(clustalw_exe), "Clustal W executable missing" stdout = cline() alignfile = open("test.aln") align = AlignIO.read(alignfile, "clustal") alignfile.close() return align
def test_empty_file(self): """Test a non-existing input file.""" input_file = "does_not_exist.fasta" self.assertFalse(os.path.isfile(input_file)) cline = ClustalwCommandline(clustalw_exe, infile=input_file) try: stdout, stderr = cline() except ApplicationError, err: self.assertTrue("Cannot open sequence file" in str(err) or "Cannot open input file" in str(err) or "non-zero exit status" in str(err))
def clustal_alignment(filename=None, seqs=None, command="clustalw"): """Align 2 sequences with clustal""" if filename == None: filename = 'temp.faa' SeqIO.write(seqs, filename, "fasta") name = os.path.splitext(filename)[0] from Bio.Align.Applications import ClustalwCommandline cline = ClustalwCommandline(command, infile=filename) stdout, stderr = cline() align = AlignIO.read(name+'.aln', 'clustal') return align
def clustal(archivo, tipo, matriz): #utilidad de carpeta de secuencias fasta a un archivo fasta clustalw_exe = "Algoritmos/programs/clustalw2" clustalw_cline = ClustalwCommandline(clustalw_exe, matrix=matriz, type=tipo, infile=archivo) #assert os.path.isfile(clustalw_exe)#, "Clustal W executable missing" stdout, stderr = clustalw_cline() print(stderr) print(stdout)
def alignFunction(folderName, files, tkWindow): records = [] for filename in files: handle = open(folderName + "/" + filename) record = SeqIO.read(handle, "fasta") records.append(record) SeqIO.write(records, "TOALIGN.fasta", "fasta") if platform.system() == "Linux": cline = ClustalwCommandline("./clustalw2", infile="TOALIGN.fasta") else: cline = ClustalwCommandline("clustalw2", infile="TOALIGN.fasta") window = Toplevel(tkWindow) window.title("Alineamiento") canvas = Canvas(window, width=800, height=650, bg='#afeeee') cline() alignment = AlignIO.read(open("TOALIGN.aln"), "clustal") summary_align = AlignInfo.SummaryInfo(alignment) consensus = summary_align.dumb_consensus() canvas.create_text(10, 10, anchor=NW, fill="darkblue", font="Courier 12", text=str(alignment)) canvas.pack() scorer = ParsimonyScorer() searcher = NNITreeSearcher(scorer) constructor = ParsimonyTreeConstructor(searcher) pars_tree = constructor.build_tree(alignment) Phylo.draw(pars_tree) window.mainloop() return
def clustalW(infil, config_file): """ This method run ClustalW software and extract a multiple sequence alignment (MSA) from a multiple fasta file. We need to especify the path of the clustalW program in our computers in our configuration file. The MSA is saved in a .aln file. """ #clustalw_path = parse_config(config_file, "clustalw") #clustalw2= "'"+ clustalw_path + "'" clustalw2= r'/Applications/clustalw2' cline = ClustalwCommandline(clustalw2, infile=infil, align="input", seqnos="ON", outorder="input", type="PROTEIN") assert os.path.isfile(clustalw2), "Clustal W executable missing" stdout, stderr = cline()
def test_input_filename_with_space(self): """Test an input filename containing a space.""" input_file = "Clustalw/temp horses.fasta" with open(input_file, "w") as handle: SeqIO.write(SeqIO.parse("Phylip/hennigian.phy", "phylip"), handle, "fasta") output_file = "temp with space.aln" cline = ClustalwCommandline( clustalw_exe, infile=input_file, outfile=output_file ) self.add_file_to_clean(input_file) self.standard_test_procedure(cline)
def __init__(self, inputSequences): self.__dists = {} seqs = [] print("reading fastas") for seq in SeqIO.parse(inputSequences, "fasta"): seqs.append((seq.id, seq.seq._data)) self.__dm = np.zeros((len(seqs), len(seqs))) ### pairwise gives shit topos # for i in range(len(seqs)): # for j in range(len(seqs)): # s = pairwise2.align.globalxx(seqs[i][1], seqs[j][1], score_only=True) # s = s / min(len(seqs[i][1]), len(seqs[j][1])) # self.__dists[(seqs[i][0], seqs[j][0])] = s # # self.__dists[(seqs[j][0], seqs[i][0])] = s # self.__dm[i,j] = s # # self.__dm[j,i] = s # print(self.__dm) print("MSA") cline = ClustalwCommandline( "C:\Program Files (x86)\ClustalW2\clustalw2", infile=inputSequences, outfile="outAlign.aln") cline() aln = AlignIO.read('outAlign.aln', 'clustal') print("Alignment output to 'alignment.txt'") with open('alignment.txt', 'w') as f: for s in aln._records: n = len(str(s.id)) p = str(" " * (10 - n)) f.write(s.id + p + '\t' + str(s.seq) + '\n') calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) # print(dm) self.__nodes = dm.names for i in range(len(dm.matrix)): for j in range(i + 1): self.__dists[(dm.names[i], dm.names[j])] = dm.matrix[i][j] self.__dists[(dm.names[j], dm.names[i])] = dm.matrix[i][j] self.__dm[i, j] = dm.matrix[i][j] self.__dm[j, i] = dm.matrix[i][j] nodes = [] print("Neighbour joining") for leaf in aln._records: nodes.append(Node(leaf.id, str(leaf.seq))) self.__root = self.neighborJoin(nodes, self.__dm, 0)
def get_alignments(self): """Performs multialignment for sequences in each input file.""" for path in self.seqs_paths: # set command parameters for clustalw2 execution args = [self.clustal_path] kwargs = {'infile': path} if self.custom_matrix: kwargs['transweight'] = 0 kwargs['matrix'] = self.matrix_path # execute command cline = ClustalwCommandline(*args, **kwargs) stdout, stderr = cline() print(stdout, '\n', stderr)
# -*- coding: utf-8 -*- """ Created on Wed Dec 21 22:34:11 2016 @author: sbassi """ import os from Bio.Align.Applications import ClustalwCommandline base_dir = os.getcwd() clustalw_exe = os.path.join(base_dir, 'clustalw2') #cl = MultipleAlignCL('conglycinin.fasta') data = os.path.join('samples','conglycinin.fasta') cl = ClustalwCommandline(clustalw_exe, infile=data) cl.outfile = 'cltest.aln' print('Command line: {}'.format(cl)) cl()
output = commands.getoutput("clustalw --version") if "not found" not in output and "CLUSTAL" in output and "Multiple Sequence Alignments" in output: clustalw_exe = "clustalw" if not clustalw_exe: raise MissingExternalDependencyError("Install clustalw or clustalw2 if you want to use it from Biopython.") ################################################################# print "Checking error conditions" print "=========================" print "Empty file" input_file = "does_not_exist.fasta" assert not os.path.isfile(input_file) cline = ClustalwCommandline(clustalw_exe, infile=input_file) try: stdout, stderr = cline() assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr) except ApplicationError, err: print "Failed (good)" # Python 2.3 on Windows gave (0, 'Error') # Python 2.5 on Windows gives [Errno 0] Error assert ( "Cannot open sequence file" in str(err) or "Cannot open input file" in str(err) or "non-zero exit status" in str(err) ), str(err) print print "Single sequence"
def GetExec(self, optList, frame): # Respond to the "clustalw" type command. self.frame = frame self.boxList = optList plugin_exe = r"C:/Program Files (x86)/ClustalW2/clustalw2.exe" dummy = self.GetOutFile() dummy = self.GetOutType() cline = ClustalwCommandline(plugin_exe,infile=r".\plugins\my_seq.fasta", outfile=self.outfile) if self.frame.abet=="AA": cline.type="protein" else: cline.type="dna" if '1PairCheck' in self.frame.paramBoxes: if self.frame.paramBoxes['1PairCheck'].GetValue(): if '1PairwFastCheck' in sfbd: if self.frame.paramBoxes['1PairwFastCheck'].GetValue(): if 'FastPairGapPenSpin' in self.frame.paramBoxes: cline.pairgap = int(self.frame.paramBoxes['FastPairGapPenSpin'].GetValue()) if 'FastKTupleSpin' in self.frame.paramBoxes: cline.ktuple = int(self.frame.paramBoxes['FastKTupleSpin'].GetValue()) if '1DiagCheck' in self.frame.paramBoxes: if self.frame.paramBoxes['1DiagCheck'].GetValue: if 'DFastTopDiagsSpin' in self.frame.paramBoxes: cline.topdiags = int(self.frame.paramBoxes['DFastTopDiagsSpin'].GetValue()) if 'DFastDiagWinSpin' in self.frame.paramBoxes: cline.window = int(self.frame.paramBoxes['DFastDiagWinSpin'].GetValue()) else: if 'SlowPairGapPenSpin' in self.frame.paramBoxes: cline.pwgapopen = int(self.frame.paramBoxes['SlowPairGapPenSpin'].GetValue()) if 'SlowPairGapExtPenSpin' in self.frame.paramBoxes: cline.pwgapext = int(self.frame.paramBoxes['SlowPairGapExtPenSpin'].GetValue()) else: if '1ProfileCheck' in self.frame.paramBoxes: if not self.frame.paramBoxes['1ProfileCheck'].GetValue(): if 'SlowPairGapPenSpin' in self.frame.paramBoxes: cline.gapopen = int(self.frame.paramBoxes['SlowPairGapPenSpin'].GetValue()) if 'SlowPairGapExtPenSpin' in self.frame.paramBoxes: cline.gapext = int(self.frame.paramBoxes['SlowPairGapExtPenSpin'].GetValue()) if self.frame.options: cline.output = str(self.boxList[1].GetValue()) cline.outorder = str(self.boxList[3].GetValue()) if '1PairCheck' in self.frame.paramBoxes: if self.frame.paramBoxes['1PairCheck'].GetValue(): if '1PairwFastCheck' in self.frame.paramBoxes: if self.frame.paramBoxes['1PairwFastCheck'].GetValue(): cline.score = str(self.boxList[5].GetValue()) elif self.frame.abet=="AA": cline.pwmatrix = str(self.boxList[5].GetValue()) else: cline.pwdnamatrix = str(self.boxList[5].GetValue()) else: if '1ProfileCheck' in self.frame.paramBoxes: if not self.frame.paramBoxes['1ProfileCheck'].GetValue(): if self.frame.abet=="AA": cline.matrix = str(self.boxList[5].GetValue()) else: cline.dnamatrix = str(self.boxList[5].GetValue()) cline.nopgap = str(self.boxList[7].GetValue()) cline.nohgap = str(self.boxList[9].GetValue()) cline.maxdiv = int(self.boxList[11].GetValue()) cline.transweight = int(self.boxList[13].GetValue()) cline.iteration = str(self.boxList[17].GetValue()) cline.numiter = int(self.boxList[15].GetValue()) return cline