def get_alignment_commands(fastafile_name, outdir, aligner, threads): geneName = fastafile_name.split('/')[-1].split('.')[0] if aligner == "prank": command = PrankCommandline(d=fastafile_name, o=geneName, f=8, codon=True) elif (threads > 3): if aligner == "mafft": command = MafftCommandline(input=fastafile_name, auto=True, nuc=True) elif aligner == "clustal": command = ClustalOmegaCommandline( infile=fastafile_name, outfile=outdir + "aligned_gene_sequences/" + geneName + ".aln.fas", seqtype="DNA") elif (threads <= 3): if aligner == "mafft": command = MafftCommandline(input=fastafile_name, auto=True, thread=threads, nuc=True) elif aligner == "clustal": command = ClustalOmegaCommandline( infile=fastafile_name, outfile=outdir + "aligned_gene_sequences/" + geneName + ".aln.fas", seqtype="DNA", threads=threads) return (command, fastafile_name)
def test_properties(self): """Test setting options via properties.""" input_file = "Registry/seqs.fasta" output_file = "temp_test.aln" cline = ClustalOmegaCommandline(clustalo_exe) cline.infile = input_file cline.outfile = output_file cline.outfmt = "clustal" self.standard_test_procedure(cline)
def test_newtree_files(self): """Test requesting a guide tree.""" input_file = "Fasta/f002" output_file = "temp_test.aln" newtree_file = "temp_test.dnd" cline = ClustalOmegaCommandline( clustalo_exe, infile=input_file, outfile=output_file, guidetree_out=newtree_file, outfmt="clustal" ) self.standard_test_procedure(cline) cline.guidetree_out = "temp with space.dnd" self.standard_test_procedure(cline)
def clustalo(geneSeq_file_path, treeid, alignment_out_path="", dist_matrix_out_path="", aligned=False, cmd_path="utils/clustalo-1.2.0"): from Bio.Align.Applications import ClustalOmegaCommandline # Clustal Omega (v1.2.0) # Multiple Sequence Alignment # Output : [treeid].aln alignment file and [treeid].mat distance matrix # output : alignment + dist matrix if alignment_out_path and dist_matrix_out_path: clustalo = ClustalOmegaCommandline(cmd=cmd_path, infile=geneSeq_file_path, outfile=alignment_out_path, distmat_full=True, distmat_out=dist_matrix_out_path, verbose=False, outfmt="clu", auto=True) # output : alignment elif alignment_out_path and not dist_matrix_out_path: clustalo = ClustalOmegaCommandline(cmd=cmd_path, infile=geneSeq_file_path, outfile=alignment_out_path, verbose=False, outfmt="clu", auto=True) # output : dist matrix elif not alignment_out_path and dist_matrix_out_path: if aligned: clustalo = ClustalOmegaCommandline( cmd=cmd_path, infile=geneSeq_file_path, max_hmm_iterations=-1, distmat_full=True, distmat_out=dist_matrix_out_path, verbose=False) else: clustalo = ClustalOmegaCommandline( cmd=cmd_path, infile=geneSeq_file_path, max_hmm_iterations=-1, distmat_full=True, distmat_out=dist_matrix_out_path, dealign=True, verbose=False) clustalo()
def test_newtree_files(self): """Test requesting a guide tree.""" input_file = "Fasta/f002" output_file = "temp_test.aln" newtree_file = "temp_test.dnd" cline = ClustalOmegaCommandline(clustalo_exe, infile=input_file, outfile=output_file, guidetree_out=newtree_file, outfmt="clustal") self.standard_test_procedure(cline) cline.guidetree_out = "temp with space.dnd" self.standard_test_procedure(cline)
def align_db(hxb2, ali_root, db_content): if not os.path.isdir(ali_root): os.mkdir(ali_root) hxb2_seq = next(SeqIO.parse(hxb2, "fasta")) print("HXB2 sequence loaded (%s, %sbp)" % (hxb2_seq.id, len(hxb2_seq.seq))) db_content_seqs = {} records = SeqIO.parse(db_content, "fasta") for record in records: db_content_seqs[record.id] = record print("Sequence database loaded (%s records)" % len(db_content_seqs)) print("Aligning Sequences to HXB2 Reference") total = len(db_content_seqs) count = 0 for record in tqdm(db_content_seqs.keys()): records = [hxb2_seq, db_content_seqs[record]] ali_outfile = ali_root + db_content_seqs[record].id with open(ali_infile, 'w') as handle: SeqIO.write(records, handle, "fasta") clustalomega_cline = ClustalOmegaCommandline(infile=ali_infile, outfile=ali_outfile, verbose=True, auto=True, force=True, threads=16) clustalomega_cline() count += 1 print("Done, pairwise alignments written out to %s" % ali_root)
def BuildOutputAlignments(options, region_name, AllRegionSequences, TemplateProtein): OutputProteinFilePrefix = os.path.join(options.WorkingDirectory, "Protein_%s" % region_name) OutputProteinFilePath = OutputProteinFilePrefix + ".fasta" OutputAlignmentFilePath = OutputProteinFilePrefix + ".aln" OutputTreeFilePath = OutputProteinFilePrefix + ".dnd" with open(OutputProteinFilePath, 'w') as f: SeqIO.write(AllRegionSequences, f, format="fasta") cmd = ClustalOmegaCommandline(Definitions.ClustalCommand, infile=OutputProteinFilePath, outfile=OutputAlignmentFilePath, guidetree_out=OutputTreeFilePath, outfmt="clustal", force=True) OutputProteinReferenceFilePath = os.path.join( options.WorkingDirectory, "Protein_ref_%s.fasta" % region_name) with open(OutputProteinReferenceFilePath, 'w') as f: SeqIO.write(TemplateProtein, f, format="fasta") cmd()
def clustal_align(infasta, outclustal): cline = ClustalOmegaCommandline(infile=infasta, outfile=outclustal, outfmt='clustal', verbose=True, auto=False) sp.check_call(str(cline), shell=True)
def aln_struct_to_core(alnf, outf, seqf, resmap, cwd, merinfo, query, totmer, clustalopath, updates=False, cores=None): clustalomega_cline = ClustalOmegaCommandline( infile=cwd + "/" + seqf, profile1=cwd + "/" + alnf, outfile=cwd + "/" + outf, verbose=False, auto=True, force=True, ) clustalomega_cline() alndata, _ = parse_fasta_aln_multi(cwd + "/" + outf) refaln = alndata.filter(regex="refseq_", axis=0) structaln = alndata.filter(regex=".pdb", axis=0) if not updates: core = find_core(refaln) else: core = cores resid, broken = find_resid_onetoone(structaln, cwd + "/" + resmap, core) completemers = {} fullids = list(set([key.split("|")[0] for key in resid.index])) for pdb in fullids: pdbid, mer = pdb.split(".")[0].split("_") amer = [] # merinfo[pdbid] #What this line was supposed to be ? for ch in merinfo[pdbid][1][int(mer) - 1]: if pdb + "|" + ch + "|" in broken: continue else: amer.append(ch) completemers[pdb] = amer return (completemers, resid, broken, refaln, structaln, core)
def dist_mat(entries,file,out_list): #parse fasta file for wanted sequences input_iterator = SeqIO.parse(open(file,'r'),'fasta') filter_iterator = (x for x in input_iterator if x.id.split('|')[1] in entries) with open('temp.fasta','w') as f: SeqIO.write(filter_iterator,f,'fasta') #run distance matrix/alignment on select sequences with clustalo in_file, out_file, matrix = 'temp.fasta','out.fasta','matrix' clustalomega_cline = ClustalOmegaCommandline(infile=in_file,outfile=out_file,distmat_out=matrix,force=True,distmat_full=True) clustalomega_cline() #read in distance matrix with open('matrix','r') as m: num = int(m.readline()) l = m.read().split() dist = [float(x) for x in l if '0.' in x] rows = [dist[i:i+num] for i in range(0,len(dist),num)] means = [] #find mean difference for every sequence for row in rows: means.append(sum(row)/num) #return sequence with lowest mean difference want_ind = means.index(min(means)) out_list.append(entries[want_ind])
def make_alignment(self, method): ### Mulltiple Sequence Alignment ### path = os.getcwd() in_file = "example.fasta" out_file = "alignment.aln" if os.path.isfile("alignment.aln"): os.remove("alignment.aln") clustalomega_cline = ClustalOmegaCommandline( infile=in_file, outfile=out_file, verbose=True, iterations=1, max_guidetree_iterations=1, max_hmm_iterations=1, dealign=True, outfmt="clu") print(clustalomega_cline) stdout, stderr = clustalomega_cline() ### Convert to phylip format ### SeqIO.convert("alignment.aln", "clustal", "alignment.phy", "phylip") ### Phylogentetic analysis ### # Choose method proml, dnaml # Maximum likelihood analysis # # Run Phylip Proml program instructions = bytes("alignment.phy\ny\n", 'utf-8') proml = Popen("phylip " + method, stdin=PIPE, shell=True) (out, err) = proml.communicate(instructions) # Change output files names files = Popen("mv outfile " + method + ".out", stdin=PIPE, shell=True) (out, err) = files.communicate() files = Popen("mv outtree " + method + ".tree", stdin=PIPE, shell=True) (out, err) = files.communicate()
def alignClustalSequences(inFile, outFile): # Alignment of sequences with Clustal Omega program clustalomega_cline = ClustalOmegaCommandline( infile=inFile, outfile=outFile, verbose=True, auto=True) return clustalomega_cline
def align_genes(gene1, gene2): """Align the two genes with clustal-omega""" # Make temp files for clustal in and out clust_in = tempfile.NamedTemporaryFile(prefix='CO_in_', suffix='.fasta', mode='w+t') clust_out = tempfile.NamedTemporaryFile(prefix='CO_out_', suffix='.fasta', mode='w+t') # Write the sequences into the temp file SeqIO.write([gene1, gene2], clust_in, 'fasta') # Seek to the beginning else the file will appear empty clust_in.seek(0) # Run the command cline = ClustalOmegaCommandline(infile=clust_in.name, outfile=clust_out.name, seqtype='protein', force=True, iterations=10, distmat_full=True, distmat_full_iter=True) cline() clust_in.close() # Return the handle to the output file return clust_out
def allelealigner(self): """ Perform a multiple sequence alignment of the allele sequences """ logging.info('Aligning alleles') # Create the threads for the analysis for _ in range(self.cpus): threads = Thread(target=self.alignthreads, args=()) threads.setDaemon(True) threads.start() for sample in self.samples: sample.alignpath = os.path.join(self.path, 'alignedalleles', sample.organism) make_path(sample.alignpath) # Create a list to store objects sample.alignedalleles = list() for outputfile in sample.allelefiles: aligned = os.path.join(sample.alignpath, os.path.basename(outputfile)) sample.alignedalleles.append(aligned) # Create the command line call clustalomega = ClustalOmegaCommandline(infile=outputfile, outfile=aligned, threads=4, auto=True) sample.clustalomega = str(clustalomega) self.queue.put((sample, clustalomega, outputfile, aligned)) self.queue.join()
def runclustalomega(self): """Run clustalomega.""" try: # Run clustal omega using the multifasta file clustalo_cline = ClustalOmegaCommandline( infile=self.infile, cmd="clustalo", outfile=self.outfile, # "RNA"/"DNA" seqtype="PROTEIN", max_hmm_iterations=2, infmt="fasta", # "aln", "phy" outfmt=self.outfmt, iterations=3, # Notable verbose=True, force=True, log=self.logpath) clustalo_cline() stdout, _ = clustalo_cline() self.clustalolog.info(stdout) except ApplicationError as err: self.clustalolog.error(err)
def alignSeqs(unalignedFastaPath: str(), alignedFastaName: str()) -> list: ''' Performs a multiple sequence alignment of the protein sequences found within the input fasta file, outputting an aligned fasta file This function requires the installation of the standalone ClustalOmega alignment software. ''' from Bio.Align.Applications import ClustalOmegaCommandline from Bio.Align import MultipleSeqAlignment from Bio.SeqRecord import SeqRecord inputfile = unalignedFastaPath outputfile = alignedFastaName cOmegaCommand = ClustalOmegaCommandline(infile=inputfile, outfile=outputfile, verbose=True, auto=True) cOmegaCommand() alignedSeq = [] with open(outputfile, 'r') as FastaFile: for line in FastaFile: if ">" in line: continue else: alignedSeq.append(line) return alignedSeq #return sequence alignment
def run_clustal_omega(msa_file): out_file = 'output/msa_output_carbapemenase.fasta' clustalomega_cline = ClustalOmegaCommandline(infile=msa_file, outfile=out_file, auto=False) cmd = str(clustalomega_cline) + ' --force' print(cmd) os.system(cmd)
def build_phylogeny_trees(): path = "out/homologous_gene_sequences/" output_path = "out/aligned_homologous_gene_sequences/" for homologous_gene_sequence in os.listdir(path): input = path + homologous_gene_sequence output = output_path + homologous_gene_sequence clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True) os.system(str(clustal_omega)) multi_seq_align = AlignIO.read(output, 'fasta') # Distance Matrix calculator = DistanceCalculator('identity') dist_mat = calculator.get_distance(multi_seq_align) tree_constructor = DistanceTreeConstructor() phylo_tree = tree_constructor.upgma(dist_mat) Phylo.draw(phylo_tree) print('\nPhylogenetic Tree\n', homologous_gene_sequence) Phylo.draw_ascii(phylo_tree) Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
def jalview(request,contig): print "\n\nJALVIEW! \n\n" path = os.path.dirname(os.path.abspath(__file__)) ip = get_client_ip(request) idcluster = Cluster2.objects.values('idcluster').filter(idcontig=contig) sequences = Read2.objects.values('idread','readseq').filter(idcluster__in=idcluster) #print "idcluster -> " + str(idcluster) #print "sequences -> " + str(sequences) try: os.makedirs(path + "/jalview/"+ip) except: #The folder already exists pass with open(path + "/jalview/"+ip+'/clustalIN.fasta', 'wb+') as destination: print "REAAAAAAAAAAAAAD"+destination.read() for sequence in sequences: destination.write(">" + str(sequence["idread"]) + "\n") destination.write(sequence["readseq"] + "\n") in_file = path + "/jalview/"+ip+'/clustalIN.fasta' out_file = path + "/jalview/"+ip+'/clustalOUT.aln' clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=False) print clustalomega_cline os.system(str(clustalomega_cline) + ' --force ') return render(request, 'chromevaloaAPP/jalview.html',{'idcontig':contig})
def cdsAlign(CDSfile, outfile): muscle_cline = ClustalOmegaCommandline(infile=CDSfile, outfile=outfile, verbose=True, auto=True) print(muscle_cline) subprocess.run(str(muscle_cline), shell=True)
def clustalW(): clustalomega_cline = ClustalOmegaCommandline(infile="teste.fasta", outfile="out.txt", verbose=True, auto=True, force=True) clustalomega_cline()
def weblogo(request): # performs clustal alignment in order to use it in the weblogo analysis seqs = unquote(request.GET.get('seq')) in_file = "unaligned.fasta" file = open("unaligned.fasta", "w") file.write(seqs) file.close() out_file = "out_filename.fasta" clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=False) print(clustalomega_cline) os.system('cmd /c crmapp\clustal-omega-1.2.2-win64\\' + str(clustalomega_cline) + ' --force') """ out_file = "out_filename.clustal_num" clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=False) print(clustalomega_cline) os.system('cmd /c crmapp\clustal-omega-1.2.2-win64\\' + str(clustalomega_cline) + ' --outfmt clustal --force') """ file_out = open("out_filename.fasta", "r") seqs_aligned = file_out.readlines() # return_data = {'data': seqs_aligned} seqs = read_seq_data(file_out) logodata = LogoData.from_seqs(seqs) logooptions = LogoOptions() logooptions.title = "VFP WEBSERVER" logoformat = LogoFormat(logodata, logooptions) weblogo_txt = txt_formatter(logodata, logoformat) weblogo_jpeg = jpeg_formatter(logodata, logoformat) weblogo_file = "weblogo.txt" weblogo = open(weblogo_file, "w") data_weblogo = str(weblogo_txt)[2:len(str(weblogo_txt)) - 1].replace( '\\n', '\n').replace('\\t', '\t') weblogo.write(data_weblogo) weblogo.close() file_out.close() os.remove(in_file) os.remove(out_file) # print(return_data) output = seqs_aligned[0] seq_found = False for i in range(1, len(seqs_aligned) - 1): if seqs_aligned[i + 1][0] == '>': output += seqs_aligned[i] seq_found = True elif seq_found: output += seqs_aligned[i] seq_found = False else: output += seqs_aligned[i][0:len(seqs_aligned[i]) - 1] output += seqs_aligned[len(seqs_aligned) - 1] return JsonResponse({'data': output}, safe=False)
def clustalo(): for fasta_files in files[:]: fasta_file_s = fasta_files.split('.')[0] print fasta_file_s cline = ClustalOmegaCommandline('clustalo',infile = fasta_files,outfile = fasta_file_s + '.aln' ,verbose= False, auto=True) cline()
def cluster_align(self, clusterID, allowedOrganisms=None): alignSeqs = [] homCluster = self.homDB.get_cluster(clusterID) for org in homCluster: if allowedOrganisms == None or org in allowedOrganisms: for seqid in homCluster[org]: alignSeqs.append((org, seqid)) seqRecords = [] seqID2Element = {} for org, seqid in alignSeqs: genSeq = self.genomDB.get_sequence(org, seqid) seqRecID = "_".join([org, seqid]) seqID2Element[seqRecID] = self.genomDB.get_element(org, seqid) seq = SeqRecord(Seq(genSeq, generic_dna), id=seqRecID, description="") seqRecords.append(seq) with tempfile.NamedTemporaryFile( 'w', delete=True) as tmpFastaFile, tempfile.NamedTemporaryFile( 'w', delete=True) as tmpMSAFile: try: #print(tmpFastaFile.name) #print(tmpMSAFile.name) SeqIO.write(seqRecords, tmpFastaFile, "fasta") tmpFastaFile.flush() clustalomega_cline = ClustalOmegaCommandline( infile=tmpFastaFile.name, outfile=tmpMSAFile.name, force=True, outfmt='fa', verbose=True, auto=True) clustalomega_cline = str(clustalomega_cline) clustalomega_cline += " --full" print(clustalomega_cline) output = subprocess.getoutput([str(clustalomega_cline)]) print("Clustalomega finished") with open(tmpMSAFile.name, 'r') as fin: alignment = AlignIO.read(fin, "fasta") return alignment finally: pass return None
def algo_msa(msa_type: str, seq_id: List[int], consensus: bool = None): if len(seq_id) > 10: return "Cannot process more than 10 sequences for MSA. Operation aborted." result = Virus.query.with_entities("id", "fasta").filter(Virus.id.in_(seq_id)) result_dict = {} for r in result: result_dict[r[0]] = r[1] fasta_file = "tmp/%s" % str(uuid.uuid4()) with open(fasta_file, "w") as fasta: # Ensure ordering of sequences based on input for i in seq_id: fasta.write(result_dict[i] + "\n\n") msa_command = None if msa_type == "muscle": msa_command = MuscleCommandline("muscle", input=fasta_file, html=True, quiet=True) ret = msa_command() elif msa_type == "clustalo": msa_command = ClustalOmegaCommandline(infile=fasta_file) ret = msa_command() else: # if msa_type == "mview": clustal_file = "tmp/%s" % str(uuid.uuid4()) msa_command = ClustalOmegaCommandline(infile=fasta_file, outfile=clustal_file) msa_command() con = "on" if consensus else "off" ret = runCommand([ "mview", "--css", "on", "--pcid", "aligned", "--ruler", "on", "--width", "80", "-coloring", "mismatch", "-colormap", "pink", "-consensus", con, "-con_threshold", "100", "-html", "head", "-in", "fasta", clustal_file ]) os.remove(clustal_file) os.remove(fasta_file) return ret
def clustal_all(request): if request.method == "POST": # seqs = unquote(request.GET.get('seq')) data = request.data seqs = data['seqs'] type = data['type'] try: type_os = data['os'] except: type_os = "linux" ########################## # type_os = "windows" ########################### if type == "fasta": out_file = "aligned.fasta" elif type == "phylip": out_file = "aligned_clustal.phy" else: out_file = "aligned_clustal.alm" type = "clu" in_file = "unaligned_clustal.fasta" file = open(in_file, "w") file.write(seqs) file.close() clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=False, outfmt=type, guidetree_out="tree.dnd") print(clustalomega_cline) if type_os == "windows": os.system('cmd /c crmapp\clustal-omega-1.2.2-win64\\' + str(clustalomega_cline) + ' --force') else: # cmd = 'crmapp/clustal-omega-1.2.2-win64s/' + str(clustalomega_cline) + ' --force' cmd = str(clustalomega_cline) + ' --force' # subprocess.Popen(['/bin/bash', '-c', 'chmod u+x clustalo']) p = subprocess.Popen(['/bin/bash', '-c', cmd]) p.communicate() file_out = open(out_file, "r") data_send = file_out.read() # data_send['dnd'] = file_tree_out.read() return HttpResponse(data_send, content_type="text/plain")
def check_what_algorithm(alg, in_file, out_file): if alg == "CLUSTAL": clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) return clustalomega_cline elif alg.upper() == "KALIGN": return 0
def COP7(in_file, out_file, outfmt, logfile): """Default with auto set to TRUE""" clustalo_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, seqtype="DNA", infmt="fasta", outfmt=outfmt, iterations=1, verbose=True, threads=8, auto=True, log=logfile) stdout, stderr = clustalo_cline() clustalo_cline() print(stdout, stderr) print("\n" + "File has been created." + "\n") return;
def test_simple_fasta(self): """Test a simple fasta file.""" input_file = "Registry/seqs.fasta" output_file = "temp_test.aln" cline = ClustalOmegaCommandline( clustalo_exe, infile=input_file, outfile=output_file, outfmt="clustal" ) self.standard_test_procedure(cline)
def test_output_filename_with_spaces(self): """Test an output filename containing spaces.""" input_file = "Registry/seqs.fasta" output_file = "temp with spaces.aln" cline = ClustalOmegaCommandline( clustalo_exe, infile=input_file, outfile=output_file, outfmt="clustal" ) self.standard_test_procedure(cline)
def clustal(in_file="unaligned.fasta", out_file="out_filename.fasta"): # file = open("unaligned.fasta", "w") # file.write(seqs) # file.close() clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=False) os.system('cmd /c crmapp\clustal-omega-1.2.2-win64\\' + str(clustalomega_cline) + ' --outfmt clustal --force')
if output.startswith("Clustal Omega"): clustalo_exe = "clustalo" if not clustalo_exe: raise MissingExternalDependencyError(\ "Install clustalo if you want to use Clustal Omega from Biopython.") ################################################################# print "Checking error conditions" print "=========================" print "Empty file" input_file = "does_not_exist.fasta" assert not os.path.isfile(input_file) cline = ClustalOmegaCommandline(clustalo_exe, infile=input_file) try: stdout, stderr = cline() assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr) except ApplicationError, err: print "Failed (good)" #Python 2.3 on Windows gave (0, 'Error') #Python 2.5 on Windows gives [Errno 0] Error assert "Cannot open sequence file" in str(err) or \ "Cannot open input file" in str(err) or \ "non-zero exit status" in str(err), str(err) print print "Single sequence" input_file = "Fasta/f001" assert os.path.isfile(input_file)