def test_properties(self):
        """Test setting options via properties."""
        input_file = "Registry/seqs.fasta"
        output_file = "temp_test.aln"

        cline = ClustalOmegaCommandline(clustalo_exe)
        cline.infile = input_file
        cline.outfile = output_file
        cline.outfmt = "clustal"

        self.standard_test_procedure(cline)
Example #2
0
    def test_properties(self):
        """Test setting options via properties."""
        input_file = "Registry/seqs.fasta"
        output_file = "temp_test.aln"

        cline = ClustalOmegaCommandline(clustalo_exe)
        cline.infile = input_file
        cline.outfile = output_file
        cline.outfmt = "clustal"

        self.standard_test_procedure(cline)
Example #3
0
def global_msa(matches, search_seq, file_name="msa"):
    """
    Generates a global multiple sequence alignment from a list of seqs
    """
    # Build list of sequences for input FASTA file
    # Start with original search + others for comparison
    # NOTE: synapsin paralogs are currently hard-coded
    seqs = [search_seq,
            SeqIO.read("synapsinIIa.fasta", "fasta"),
            SeqIO.read("synapsinIIb.fasta", "fasta"),
            SeqIO.read("synapsinIII.fasta", "fasta")]
#            SeqIO.read("GFPclover.fasta", "fasta")]
#            SeqIO.read("FireflyLuciferase.fasta", "fasta")]

    # Build Biopython Seq objects from sequences
    for match in matches:
        # Remove gaps from AA sequence to build Seq object's sequence
        # "L*R" somehow breaks things! This is a hack-y solution
        # Better might be this:
        # ''.join(re.split("\w\*", match["subject"])).replace("-", "")
        seq = SeqRecord(Seq(match["subject"].replace("-", "").replace("L*R", "L-R"),
                        IUPAC.protein),
                        id="gi|" + match["gi"],
                        description=match["title"])
        seqs.append(seq)

    # Make FASTA file from sequences
    infile = f"{file_name}_in.fasta"
    SeqIO.write(seqs, infile, "fasta")

    # Set up and run ClustalOmega
    clustal = ClustalOmegaCommandline()
    clustal.program_name = "./clustalo"
    outfile = f"{file_name}_out.aln"
    clustal.outfmt = "clustal"
    clustal.infile = infile
    clustal.outfile = outfile
    clustal.force = True    # Allows overwriting file

    # Run MSA, print success/failure
    print(f"Performing multiple sequence alignment on {len(seqs)} sequences")
    stdout, stderr = clustal()
    print(stdout + stderr)
    print(f"Results saved as '{outfile}' to {os.getcwd()}\n")

    # Return MSA file name
    return outfile