def test_properties(self): """Test setting options via properties.""" input_file = "Registry/seqs.fasta" output_file = "temp_test.aln" cline = ClustalOmegaCommandline(clustalo_exe) cline.infile = input_file cline.outfile = output_file cline.outfmt = "clustal" self.standard_test_procedure(cline)
def global_msa(matches, search_seq, file_name="msa"): """ Generates a global multiple sequence alignment from a list of seqs """ # Build list of sequences for input FASTA file # Start with original search + others for comparison # NOTE: synapsin paralogs are currently hard-coded seqs = [search_seq, SeqIO.read("synapsinIIa.fasta", "fasta"), SeqIO.read("synapsinIIb.fasta", "fasta"), SeqIO.read("synapsinIII.fasta", "fasta")] # SeqIO.read("GFPclover.fasta", "fasta")] # SeqIO.read("FireflyLuciferase.fasta", "fasta")] # Build Biopython Seq objects from sequences for match in matches: # Remove gaps from AA sequence to build Seq object's sequence # "L*R" somehow breaks things! This is a hack-y solution # Better might be this: # ''.join(re.split("\w\*", match["subject"])).replace("-", "") seq = SeqRecord(Seq(match["subject"].replace("-", "").replace("L*R", "L-R"), IUPAC.protein), id="gi|" + match["gi"], description=match["title"]) seqs.append(seq) # Make FASTA file from sequences infile = f"{file_name}_in.fasta" SeqIO.write(seqs, infile, "fasta") # Set up and run ClustalOmega clustal = ClustalOmegaCommandline() clustal.program_name = "./clustalo" outfile = f"{file_name}_out.aln" clustal.outfmt = "clustal" clustal.infile = infile clustal.outfile = outfile clustal.force = True # Allows overwriting file # Run MSA, print success/failure print(f"Performing multiple sequence alignment on {len(seqs)} sequences") stdout, stderr = clustal() print(stdout + stderr) print(f"Results saved as '{outfile}' to {os.getcwd()}\n") # Return MSA file name return outfile