Ejemplo n.º 1
0
    def test_paralinear_variance(self):
        """calculate paralinear variance consistent with hand calculation"""
        data = [
            (
                "seq1",
                "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            ),
            (
                "seq2",
                "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC",
            ),
        ]
        aln = make_aligned_seqs(data=data, moltype=DNA)
        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)

        index = dict(list(zip("ACGT", list(range(4)))))
        J = numpy.zeros((4, 4))
        for p in zip(data[0][1], data[1][1]):
            J[index[p[0]], index[p[1]]] += 1
        for i in range(4):
            if J[i, i] == 0:
                J[i, i] += 0.5
        J /= J.sum()
        M = numpy.linalg.inv(J)
        f = J.sum(1), J.sum(0)
        var = 0.0
        for i in range(4):
            for j in range(4):
                var += M[j, i]**2 * J[i, j]
            var -= 1 / numpy.sqrt(f[0][i] * f[1][i])
        var /= 16 * len(data[0][1])

        assert_allclose(paralinear_calc.variances[1, 1], var, atol=1e-3)
Ejemplo n.º 2
0
    def test_paralinear_distance(self):
        """calculate paralinear variance consistent with hand calculation"""
        data = [
            (
                "seq1",
                "GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            ),
            (
                "seq2",
                "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC",
            ),
        ]
        aln = make_aligned_seqs(data=data, moltype=DNA)
        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)

        index = dict(list(zip("ACGT", list(range(4)))))
        J = numpy.zeros((4, 4))
        for p in zip(data[0][1], data[1][1]):
            J[index[p[0]], index[p[1]]] += 1
        for i in range(4):
            if J[i, i] == 0:
                J[i, i] += 0.5
        J /= J.sum()
        M = numpy.linalg.inv(J)
        f = J.sum(1), J.sum(0)
        dist = -0.25 * numpy.log(
            numpy.linalg.det(J) / numpy.sqrt(f[0].prod() * f[1].prod()))

        assert_allclose(paralinear_calc.dists["seq1", "seq2"], dist)
Ejemplo n.º 3
0
 def test_paralinear_pair_aa(self):
     """paralinear shouldn't fail to produce distances for aa seqs"""
     aln = load_aligned_seqs("data/brca1_5.paml", moltype=DNA)
     aln = aln.get_translation()
     paralinear_calc = ParalinearPair(moltype=PROTEIN, alignment=aln)
     paralinear_calc.run(show_progress=False)
     dists = paralinear_calc.get_pairwise_distances()
Ejemplo n.º 4
0
 def test_paralinear_pair_dna(self):
     """calculate paralinear distance consistent with logdet distance"""
     data = [
         (
             "seq1",
             "TAATTCATTGGGACGTCGAATCCGGCAGTCCTGCCGCAAAAGCTTCCGGAATCGAATTTTGGCA",
         ),
         (
             "seq2",
             "AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCTTTTTTTTTTTTTTTTGGGGGGGGGGGGGGGG",
         ),
     ]
     aln = make_aligned_seqs(data=data, moltype=DNA)
     paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
     paralinear_calc.run(show_progress=False)
     logdet_calc = LogDetPair(moltype=DNA, alignment=aln)
     logdet_calc.run(show_progress=False)
     self.assertEqual(logdet_calc.dists[1, 1], paralinear_calc.dists[1, 1])
     self.assertEqual(paralinear_calc.variances[1, 1], logdet_calc.variances[1, 1])
Ejemplo n.º 5
0
    def test_paralinear_for_determinant_lte_zero(self):
        """returns distance of None if the determinant is <= 0"""
        data = dict(
            seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC",
        )
        aln = make_aligned_seqs(data=data, moltype=DNA)

        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)
        dists = paralinear_calc.get_pairwise_distances().to_dict()
        self.assertTrue(numpy.isnan(list(dists.values())[0]))
        paralinear_calc.run(show_progress=False)
        dists = paralinear_calc.get_pairwise_distances().to_dict()
        self.assertTrue(numpy.isnan(list(dists.values())[0]))
Ejemplo n.º 6
0
 def get_calc(data):
     aln = make_aligned_seqs(data=data, moltype=DNA)
     calc = ParalinearPair(moltype=DNA, alignment=aln)
     calc(show_progress=False)
     return calc