コード例 #1
0
ファイル: test_pairwise.py プロジェクト: 7924102/scikit-bio
    def test_nucleotide_aligners_use_substitution_matrices(self):
        alt_sub = make_identity_substitution_matrix(10, -10)
        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with local alignment
        actual_no_sub = local_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
            gap_extend_penalty=5., match_score=5, mismatch_score=-4)
        actual_alt_sub = local_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
            gap_extend_penalty=5., match_score=5, mismatch_score=-4,
            substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())

        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with global alignment
        actual_no_sub = local_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
            gap_extend_penalty=5., match_score=5, mismatch_score=-4)
        actual_alt_sub = global_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
            gap_extend_penalty=5., match_score=5, mismatch_score=-4,
            substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())
コード例 #2
0
    def test_nucleotide_aligners_use_substitution_matrices(self):
        alt_sub = make_identity_substitution_matrix(10, -10)
        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with local alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())

        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with global alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = global_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())
コード例 #3
0
ファイル: test_pairwise.py プロジェクト: bctaylor/scikit-bio
    def test_local_pairwise_align_nucleotide(self):
        expected = ("ACCTTGACCAGGTACC", "ACTTTGAC---GTAAC", 41.0, 1, 2)
        actual = local_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5.,
            gap_extend_penalty=0.5, match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 16), (2, 14)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        actual = local_pairwise_align_nucleotide(
            "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
            gap_extend_penalty=5., match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), list('01'))

        # DNA (rather than str) as input
        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        actual = local_pairwise_align_nucleotide(
            DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"),
            gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
            mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # Fails when either input is passed as an Alignment
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          Alignment([DNA("GACCTTGACCAGGTACC", "s1")]),
                          DNA("GAACTTTGACGTAAC", "s2"),
                          gap_open_penalty=10., gap_extend_penalty=5.,
                          match_score=5, mismatch_score=-4)
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          DNA("GACCTTGACCAGGTACC", "s1"),
                          Alignment([DNA("GAACTTTGACGTAAC", "s2")]),
                          gap_open_penalty=10., gap_extend_penalty=5.,
                          match_score=5, mismatch_score=-4)

        # ids are provided if they're not passed in
        actual = local_pairwise_align_nucleotide(
            DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
            gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
            mismatch_score=-4)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          42, "HEAGAWGHEE")
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          "HEAGAWGHEE", 42)
コード例 #4
0
    def test_local_pairwise_align_nucleotide(self):
        obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide(
            DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
            gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5,
            mismatch_score=-4)

        self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGACCAGGTACC"),
                                              DNA("ACTTTGAC---GTAAC")]))
        self.assertEqual(obs_score, 41.0)
        self.assertEqual(obs_start_end, [(1, 16), (2, 14)])

        obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide(
            DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
            gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
            mismatch_score=-4)

        self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGAC"),
                                              DNA("ACTTTGAC")]))
        self.assertEqual(obs_score, 31.0)
        self.assertEqual(obs_start_end, [(1, 8), (2, 9)])

        # DNA sequences with metadata
        obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide(
            DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}),
            DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
            mismatch_score=-4)

        self.assertEqual(
            obs_msa,
            TabularMSA([DNA("ACCTTGAC", metadata={'id': "s1"}),
                        DNA("ACTTTGAC", metadata={'id': "s2"})]))

        self.assertEqual(obs_score, 31.0)
        self.assertEqual(obs_start_end, [(1, 8), (2, 9)])

        # Fails when either input is passed as a TabularMSA
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          TabularMSA([DNA("GACCTTGACCAGGTACC",
                                          metadata={'id': "s1"})]),
                          DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}),
                          gap_open_penalty=10., gap_extend_penalty=5.,
                          match_score=5, mismatch_score=-4)
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}),
                          TabularMSA([DNA("GAACTTTGACGTAAC",
                                      metadata={'id': "s2"})]),
                          gap_open_penalty=10., gap_extend_penalty=5.,
                          match_score=5, mismatch_score=-4)

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          42, DNA("ACGT"))
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          DNA("ACGT"), 42)
コード例 #5
0
    def test_nucleotide_aligners_use_substitution_matrices(self):
        alt_sub = make_identity_substitution_matrix(10, -10)
        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with local alignment
        msa_no_sub, score_no_sub, start_end_no_sub = \
            local_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4)

        msa_alt_sub, score_alt_sub, start_end_alt_sub = \
            local_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4, substitution_matrix=alt_sub)

        self.assertNotEqual(msa_no_sub, msa_alt_sub)
        self.assertNotEqual(score_no_sub, score_alt_sub)
        self.assertNotEqual(start_end_no_sub, start_end_alt_sub)

        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with global alignment
        msa_no_sub, score_no_sub, start_end_no_sub = \
            global_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4)

        msa_alt_sub, score_alt_sub, start_end_alt_sub = \
            global_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4, substitution_matrix=alt_sub)

        self.assertNotEqual(msa_no_sub, msa_alt_sub)
        self.assertNotEqual(score_no_sub, score_alt_sub)
        self.assertEqual(start_end_no_sub, start_end_alt_sub)
コード例 #6
0
def dnaAlign(seq1, seq2, gap_open_penalty, gap_extend_penalty, local=False):
    seq1 = seq1.upper()
    seq2 = seq2.upper()

    if local:
        aln, score, _ = local_pairwise_align_nucleotide(
            DNA(seq1), DNA(seq2), gap_open_penalty, gap_extend_penalty)
    else:
        aln, score, _ = global_pairwise_align_nucleotide(
            DNA(seq1), DNA(seq2), gap_open_penalty, gap_extend_penalty)

    response = {
        'aln1':
        str(aln[0]),
        'aln2':
        str(aln[1]),
        'score':
        score,
        'similarity':
        float('{:.2f}'.format(aln[0].match_frequency(aln[1], relative=True) *
                              100))
    }

    return response
コード例 #7
0
    def test_local_pairwise_align_nucleotide(self):
        expected = ("ACCTTGACCAGGTACC", "ACTTTGAC---GTAAC", 41.0, 1, 2)
        actual = local_pairwise_align_nucleotide("GACCTTGACCAGGTACC",
                                                 "GAACTTTGACGTAAC",
                                                 gap_open_penalty=5.,
                                                 gap_extend_penalty=0.5,
                                                 match_score=5,
                                                 mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 16), (2, 14)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        actual = local_pairwise_align_nucleotide("GACCTTGACCAGGTACC",
                                                 "GAACTTTGACGTAAC",
                                                 gap_open_penalty=10.,
                                                 gap_extend_penalty=5.,
                                                 match_score=5,
                                                 mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), list('01'))

        # DNA (rather than str) as input
        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        actual = local_pairwise_align_nucleotide(DNA("GACCTTGACCAGGTACC",
                                                     "s1"),
                                                 DNA("GAACTTTGACGTAAC", "s2"),
                                                 gap_open_penalty=10.,
                                                 gap_extend_penalty=5.,
                                                 match_score=5,
                                                 mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # Fails when either input is passed as an Alignment
        self.assertRaises(TypeError,
                          local_pairwise_align_nucleotide,
                          Alignment([DNA("GACCTTGACCAGGTACC", "s1")]),
                          DNA("GAACTTTGACGTAAC", "s2"),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.,
                          match_score=5,
                          mismatch_score=-4)
        self.assertRaises(TypeError,
                          local_pairwise_align_nucleotide,
                          DNA("GACCTTGACCAGGTACC", "s1"),
                          Alignment([DNA("GAACTTTGACGTAAC", "s2")]),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.,
                          match_score=5,
                          mismatch_score=-4)

        # ids are provided if they're not passed in
        actual = local_pairwise_align_nucleotide(DNA("GACCTTGACCAGGTACC"),
                                                 DNA("GAACTTTGACGTAAC"),
                                                 gap_open_penalty=10.,
                                                 gap_extend_penalty=5.,
                                                 match_score=5,
                                                 mismatch_score=-4)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_nucleotide, 42,
                          "HEAGAWGHEE")
        self.assertRaises(TypeError, local_pairwise_align_nucleotide,
                          "HEAGAWGHEE", 42)