Beispiel #1
0
    def get_alignment(cls, seq1: str, seq2: str, local: bool = True):
        """
        Generate an alignment between two sequences

        Parameters
        ----------
        seq1: str
            The first sequence to be aligned
        seq1: str
            The second sequence to be aligned
        local: bool
            If false, a global alignment is performed
            (based on the Needleman-Wunsch algorithm),
            otherwise a local alignment is performed
            (based on the Smith–Waterman algorithm).
            (Default: True)

        Returns
        -------
        Alignment
        """

        import biotite.sequence as seq
        import biotite.sequence.align as align
        import numpy as np

        # create the default matrix
        # TODO add more options for the choice of matrix
        matrix = align.SubstitutionMatrix.std_protein_matrix()

        alignments = align.align_optimal(
            seq.ProteinSequence(seq1),
            seq.ProteinSequence(seq2),
            matrix,
            local=local,
        )

        alignment = alignments[0]

        score = alignment.score
        seq_identity = align.get_sequence_identity(alignment)
        symbols = align.get_symbols(alignment)
        codes = align.get_codes(alignment)

        return cls(
            alignment=alignment,
            metadata={
                "score": score,
                "sequence_identity": seq_identity,
                "symbols": symbols,
                "codes": codes,
            },
        )
Beispiel #2
0
def test_conversion_to_symbols():
    """
    Test conversion of alignments to strings.
    """
    seq_str1 = "HAKLPRDD--WKL--"
    seq_str2 = "HA--PRDDADWKLHH"
    seq_str3 = "HA----DDADWKLHH"
    seq_strings = [seq_str1, seq_str2, seq_str3]
    sequences = [seq.ProteinSequence(seq_str.replace("-",""))
                 for seq_str in seq_strings]
    trace = align.Alignment.trace_from_strings(seq_strings)
    alignment = align.Alignment(sequences, trace, score=None)
    # Test the conversion bach to strings of symbols
    symbols = align.get_symbols(alignment)
    symbols = ["".join([sym if sym is not None else "-" for sym in sym_list])
               for sym_list in symbols]
    assert symbols == seq_strings
Beispiel #3
0
########################################################################
# If you are interested in more advanced visualization examples, have a
# look at the :doc:`example gallery <../examples/gallery/index>`.
#
# You can also do some simple analysis on these objects, like
# determining the sequence identity or calculating the score.
# For further custom analysis, it can be convenient to have directly the
# aligned symbos codes instead of the trace.

alignment = alignments[0]
print("Score: ", alignment.score)
print("Recalculated score:", align.score(alignment, matrix=matrix))
print("Sequence identity:", align.get_sequence_identity(alignment))
print("Symbols:")
print(align.get_symbols(alignment))
print("symbols codes:")
print(align.get_codes(alignment))

########################################################################
#
# .. currentmodule:: biotite.sequence.io.fasta
#
# You may ask, why should you recalculate the score, when the score has
# already been directly calculated via :func:`align_optimal()`.
# The answer is, that you might load an alignment from an external
# alignment program as FASTA file using :func:`get_alignment()`.
#
# .. currentmodule:: biotite.sequence.align
#
# If you want to perform a multiple sequence alignment, have a look at