Exemple #1
0
 def it_raises_on_bad_fasta():
     fasta_str = """
     MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLH
     """
     with zest.raises(ValueError) as e:
         uniprot.fasta_split(fasta_str)
     assert "data before the header" in str(e.exception)
Exemple #2
0
def protein_fasta(fasta_str, override_name=None):
    """
    Parse fasta format as defined by Uniprot.
    https://www.uniprot.org/help/fasta-headers

    Arguments:
        fasta_str: The string (not the file path!) of the fasta
        override_name: If non-None will replace the found protein name

    Returns:
        List(Dict(id, seqstr))
    """

    seqs = uniprot.fasta_split(fasta_str)
    ret = []

    for header, seqstr in seqs:
        header = header.split(" ")[0]
        parts = header.split("|")
        if len(parts) >= 2:
            id = parts[1]
        else:
            id = parts[0]

        if override_name is not None:
            id = override_name
            assert len(seqs) == 1

        ret += [dict(id=id, seqstr=seqstr)]

    return ret
Exemple #3
0
 def it_returns_multiple_lines_and_headers():
     fasta_str = """
     >sp|P08069|IGF1R_HUMAN Insulin-like growth factor 1 receptor OS=H**o sapiens OX=9606 GN=IGF1R PE=1 SV=1
     MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLH
     ILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIF
     >sp|P1010|IGF1R_HUMAN something else OS=H**o sapiens OX=9606 GN=IGF1R PE=1 SV=1
     ZZZZZZZ
     AAAAAAA
     """
     groups = uniprot.fasta_split(fasta_str)
     assert groups == [
         (
             "sp|P08069|IGF1R_HUMAN Insulin-like growth factor 1 receptor OS=H**o sapiens OX=9606 GN=IGF1R PE=1 SV=1",
             "MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLHILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIF",
         ),
         (
             "sp|P1010|IGF1R_HUMAN something else OS=H**o sapiens OX=9606 GN=IGF1R PE=1 SV=1",
             "ZZZZZZZAAAAAAA",
         ),
     ]
Exemple #4
0
 def it_returns_on_none():
     groups = uniprot.fasta_split(None)
     assert groups == []
Exemple #5
0
 def it_returns_empty():
     groups = uniprot.fasta_split("")
     assert groups == []