def test_needleman_wunsch_align_DNA(self):
        """Test the Needleman-Wunsch sequence alignment for two DNA sequences."""

        # The sequences.
        seq1 = 'GCATTACT'
        seq2 = 'GATTACT'
        print("\nIn:")
        print(seq1)
        print(seq2)

        # Perform the alignment. 
        score, align1, align2, gaps = needleman_wunsch_align(seq1, seq2, sub_matrix=SIMILARITY_DNA, sub_seq=SIMILARITY_DNA_SEQ, gap_open_penalty=1, gap_extend_penalty=1)
        print("\nOut:")
        print(score)
        print(align1)
        print(align2)
        print(gaps)
        print("\n")

        # Check the alignment.
        self.assertEqual(align1, 'GCATTACT')
        self.assertEqual(align2, 'G-ATTACT')

        # The gap matrix.
        real_gaps = [
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 1, 0, 0, 0, 0, 0, 0]
        ]
        for i in range(2):
            for j in range(8):
                self.assertEqual(gaps[i, j], real_gaps[i][j])
Example #2
0
    def test_needleman_wunsch_align_DNA(self):
        """Test the Needleman-Wunsch sequence alignment for two DNA sequences."""

        # The sequences.
        seq1 = 'GCATTACT'
        seq2 = 'GATTACT'
        print("\nIn:")
        print(seq1)
        print(seq2)

        # Perform the alignment.
        score, align1, align2, gaps = needleman_wunsch_align(
            seq1,
            seq2,
            sub_matrix=SIMILARITY_DNA,
            sub_seq=SIMILARITY_DNA_SEQ,
            gap_open_penalty=1,
            gap_extend_penalty=1)
        print("\nOut:")
        print(score)
        print(align1)
        print(align2)
        print(gaps)
        print("\n")

        # Check the alignment.
        self.assertEqual(align1, 'GCATTACT')
        self.assertEqual(align2, 'G-ATTACT')

        # The gap matrix.
        real_gaps = [[0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0]]
        for i in range(2):
            for j in range(8):
                self.assertEqual(gaps[i, j], real_gaps[i][j])
Example #3
0
    def test_needleman_wunsch_align_NUC_4_4(self):
        """Test the Needleman-Wunsch sequence alignment for two DNA sequences using the NUC 4.4 matrix.

        From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be::

            https://www.ebi.ac.uk/Tools/psa/emboss_needle/
            EMBOSS_001         1 GAAAAAAT      8
                                 |    |||
            EMBOSS_001         1 G----AAT      4
        """

        # The sequences.
        seq1 = 'GAAAAAAT'
        seq2 = 'GAAT'
        print("\nIn:")
        print(seq1)
        print(seq2)

        # Perform the alignment.
        score, align1, align2, gaps = needleman_wunsch_align(
            seq1,
            seq2,
            sub_matrix=NUC_4_4,
            sub_seq=NUC_4_4_SEQ,
            gap_open_penalty=5,
            gap_extend_penalty=1)
        print("\nOut:")
        print(score)
        print(align1)
        print(align2)
        print(gaps)
        print("\n")

        # Check the alignment.
        self.assertEqual(align1, 'GAAAAAAT')
        self.assertEqual(align2, 'G----AAT')

        # The gap matrix.
        real_gaps = [[0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 0, 0, 0]]
        for i in range(2):
            for j in range(8):
                self.assertEqual(gaps[i, j], real_gaps[i][j])
    def test_needleman_wunsch_align_NUC_4_4(self):
        """Test the Needleman-Wunsch sequence alignment for two DNA sequences using the NUC 4.4 matrix.

        From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be::

            https://www.ebi.ac.uk/Tools/psa/emboss_needle/
            EMBOSS_001         1 GAAAAAAT      8
                                 |    |||
            EMBOSS_001         1 G----AAT      4
        """

        # The sequences.
        seq1 = 'GAAAAAAT'
        seq2 = 'GAAT'
        print("\nIn:")
        print(seq1)
        print(seq2)

        # Perform the alignment. 
        score, align1, align2, gaps = needleman_wunsch_align(seq1, seq2, sub_matrix=NUC_4_4, sub_seq=NUC_4_4_SEQ, gap_open_penalty=5, gap_extend_penalty=1)
        print("\nOut:")
        print(score)
        print(align1)
        print(align2)
        print(gaps)
        print("\n")

        # Check the alignment.
        self.assertEqual(align1, 'GAAAAAAT')
        self.assertEqual(align2, 'G----AAT')

        # The gap matrix.
        real_gaps = [
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 1, 1, 1, 1, 0, 0, 0]
        ]
        for i in range(2):
            for j in range(8):
                self.assertEqual(gaps[i, j], real_gaps[i][j])
Example #5
0
def align_pairwise(sequence1, sequence2, algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0, verbosity=1):
    """Align two protein sequences.

    @param sequence1:                   The first protein sequence as one letter codes.
    @type sequence1:                    str
    @param sequence2:                   The second protein sequence as one letter codes.
    @type sequence2:                    str
    @keyword algorithm:                 The pairwise sequence alignment algorithm to use.
    @type algorithm:                    str
    @keyword matrix:                    The substitution matrix to use.
    @type matrix:                       str
    @keyword gap_open_penalty:          The penalty for introducing gaps, as a positive number.
    @type gap_open_penalty:             float
    @keyword gap_extend_penalty:        The penalty for extending a gap, as a positive number.
    @type gap_extend_penalty:           float
    @keyword end_gap_open_penalty:      The optional penalty for opening a gap at the end of a sequence.
    @type end_gap_open_penalty:         float
    @keyword end_gap_extend_penalty:    The optional penalty for extending a gap at the end of a sequence.
    @type end_gap_extend_penalty:       float
    @keyword verbosity:                 The level of verbosity.  Setting this to zero silences all printouts.
    @type verbosity:                    int
    @return:                            The alignment score, two alignment strings and the gap matrix.
    @rtype:                             float, str, str, numpy rank-2 int array
    """

    # Checks.
    allowed_algor = ['NW70']
    if algorithm not in allowed_algor:
        raise RelaxError("The sequence alignment algorithm '%s' is unknown, it must be one of %s." % (algorithm, allowed_algor))
    allowed_matrices = ['BLOSUM62', 'PAM250']
    if matrix not in allowed_matrices:
        raise RelaxError("The substitution matrix '%s' is unknown, it must be one of %s." % (matrix, allowed_matrices))

    # Capitalise the sequences.
    sequence1 = sequence1.upper()
    sequence2 = sequence2.upper()

    # Initial printout.
    if verbosity:
        sys.stdout.write("\nPairwise protein alignment.\n")
        sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix))
        sys.stdout.write("%-30s %s\n" % ("Gap opening penalty:", gap_open_penalty))
        sys.stdout.write("%-30s %s\n" % ("Gap extend penalty:", gap_extend_penalty))
        sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1))
        sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2))

    # Select the substitution matrix.
    if matrix == 'BLOSUM62':
        sub_matrix = BLOSUM62
        sub_seq = BLOSUM62_SEQ
    elif matrix == 'PAM250':
        sub_matrix = PAM250
        sub_seq = PAM250_SEQ

    # The alignment.
    if algorithm == 'NW70':
        score, align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2, sub_matrix=sub_matrix, sub_seq=sub_seq, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)

    # Final printout.
    if verbosity:
        sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1))
        sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2))
        sys.stdout.write("%-30s " % "")
        for i in range(len(align1)):
            if align1[i] == align2[i]:
                sys.stdout.write("*")
            else:
                sys.stdout.write(" ")
        sys.stdout.write("\n\n")

    # Return the results.
    return score, align1, align2, gaps