예제 #1
0
 def test_names(self):
     """names property works"""
     data = {
         ("ABAYE2984", "Atu3667"): 0.25,
         ("ABAYE2984", "Avin_42730"): 0.638,
         ("ABAYE2984", "BAA10469"): None,
         ("Atu3667", "ABAYE2984"): 0.25,
         ("Atu3667", "Avin_42730"): 2.368,
         ("Atu3667", "BAA10469"): 0.25,
         ("Avin_42730", "ABAYE2984"): 0.638,
         ("Avin_42730", "Atu3667"): 2.368,
         ("Avin_42730", "BAA10469"): 1.85,
         ("BAA10469", "ABAYE2984"): 0.25,
         ("BAA10469", "Atu3667"): 0.25,
         ("BAA10469", "Avin_42730"): 1.85,
     }
     names = set()
     for p in data:
         names.update(p)
     darr = DistanceMatrix(data)
     self.assertEqual(set(darr.names), names)
     darr = darr.drop_invalid()
     for n in ("ABAYE2984", "BAA10469"):
         names.remove(n)
     self.assertEqual(set(darr.names), names)
예제 #2
0
    def test_deserialise_tabular_distancematrix(self):
        """correctly deserialises DistanceMatrix"""
        from cogent3.evolve.fast_distance import DistanceMatrix

        data = {
            ("ABAYE2984", "Atu3667"): None,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): None,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): None,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): None,
            ("BAA10469", "Atu3667"): None,
            ("BAA10469", "Avin_42730"): 1.85,
        }

        dm = DistanceMatrix(data)
        json = dm.to_json()
        got = deserialise_object(json)
        dm_dict = dm.to_dict()
        got_dict = got.to_dict()
        for (a, b), dist in dm_dict.items():
            if dist is None:
                assert numpy.isnan(got_dict[a, b])
            else:
                assert_allclose(dist, got_dict[a, b])
예제 #3
0
    def test_build_phylogeny(self):
        """build a NJ tree"""
        from cogent3 import make_tree

        dists = {
            ("DogFaced", "FlyingFox"): 0.05,
            ("DogFaced", "FreeTaile"): 0.14,
            ("DogFaced", "LittleBro"): 0.16,
            ("DogFaced", "TombBat"): 0.15,
            ("FlyingFox", "DogFaced"): 0.05,
            ("FlyingFox", "FreeTaile"): 0.12,
            ("FlyingFox", "LittleBro"): 0.13,
            ("FlyingFox", "TombBat"): 0.14,
            ("FreeTaile", "DogFaced"): 0.14,
            ("FreeTaile", "FlyingFox"): 0.12,
            ("FreeTaile", "LittleBro"): 0.09,
            ("FreeTaile", "TombBat"): 0.1,
            ("LittleBro", "DogFaced"): 0.16,
            ("LittleBro", "FlyingFox"): 0.13,
            ("LittleBro", "FreeTaile"): 0.09,
            ("LittleBro", "TombBat"): 0.12,
            ("TombBat", "DogFaced"): 0.15,
            ("TombBat", "FlyingFox"): 0.14,
            ("TombBat", "FreeTaile"): 0.1,
            ("TombBat", "LittleBro"): 0.12,
        }
        dists = DistanceMatrix(dists)
        got = dists.quick_tree(show_progress=False)
        expect = make_tree(
            treestring="((TombBat,(DogFaced,FlyingFox)),LittleBro,FreeTaile)")
        self.assertTrue(expect.same_topology(got))
예제 #4
0
파일: test_io.py 프로젝트: aglucaci/cogent3
 def test_load_tabular_distance_matrix(self):
     """correctly loads tabular data for DistanceMatrix"""
     data = {(0, 0): 0, (0, 1): 4, (1, 0): 4, (1, 1): 0}
     matrix = DistanceMatrix(data)
     loader = io_app.load_tabular(sep="\t", as_type="distances")
     with TemporaryDirectory(dir=".") as dirname:
         writer = io_app.write_tabular(data_path=dirname, format="tsv")
         outpath = join(dirname, "delme.tsv")
         writer.write(matrix, identifier=outpath)
         new = loader(outpath)
         self.assertEqual(matrix.to_dict(), new.to_dict())
예제 #5
0
    def test_matrix_dtype(self):
        """tests DistanceMatrix correctly accepts the data with proper dtype"""
        data = {
            ("ABAYE2984", "Atu3667"): None,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): None,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): None,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): None,
            ("BAA10469", "Atu3667"): None,
            ("BAA10469", "Avin_42730"): 1.85,
        }
        names = set()
        for p in data:
            names.update(p)

        # tests when data has None values and DistanceMatrix using dtype('float')
        darr = DistanceMatrix(data)
        self.assertEqual(darr.shape, (4, 4))
        self.assertEqual(set(darr.names), names)
        for (a, b), dist in data.items():
            if dist is None:
                assert numpy.isnan(darr[a, b])
            else:
                assert_allclose(dist, darr[a, b])

        data = {
            ("ABAYE2984", "Atu3667"): "None",
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): None,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): "None",
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): None,
            ("BAA10469", "Atu3667"): None,
            ("BAA10469", "Avin_42730"): 1.85,
        }

        # tests when data has str values and DistanceMatrix using dtype('float')
        with self.assertRaises(ValueError):
            darr = DistanceMatrix(data)
예제 #6
0
파일: io.py 프로젝트: jbw900/cogent3
    def load(self, path):
        if type(path) == str:
            # we use a data store as it's read() handles compression
            path = SingleReadDataStore(path)[0]

        try:
            header, data, title = self._parse(path)
        except Exception as err:
            result = NotCompleted("ERROR", self, err.args[0], source=str(path))

        if self.as_type == "table":
            return Table(header, rows=data, title=title)

        assert data.shape[1] == 3, "Invalid tabular data"

        if self.as_type == "distances":
            # records is of the form [ [dim-1, dim-2, value] for entries in DistanceMatrix ]
            return DistanceMatrix({(e[0], e[1]): e[2] for e in data})

        if self.as_type == "motif_counts":
            return make_motif_counts_from_tabular(data)
        if self.as_type == "motif_freqs":
            return make_motif_freqs_from_tabular(data)
        if self.as_type == "pssm":
            return make_pssm_from_tabular(data)

        return None
예제 #7
0
 def test_to_table(self):
     """converts a distance matrix to a Table"""
     data = {
         ("A", "B"): 2,
         ("A", "C"): 3,
         ("B", "C"): 1,
         ("B", "A"): 2,
         ("C", "A"): 3,
         ("C", "B"): 1,
     }
     darr = DistanceMatrix(data)
     table = darr.to_table()
     self.assertEqual(table.shape, (3, 4))
     self.assertEqual(table.columns["names"].tolist(), list(darr.names))
     self.assertEqual(table["A", "B"], 2)
     self.assertEqual(table["A", "A"], 0)
예제 #8
0
파일: test_io.py 프로젝트: aglucaci/cogent3
 def test_write_tabular_distance_matrix(self):
     """correctly writes tabular data for DistanceMatrix"""
     data = {(0, 0): 0, (0, 1): 4, (1, 0): 4, (1, 1): 0}
     matrix = DistanceMatrix(data)
     loader = io_app.load_tabular(sep="\t")
     with TemporaryDirectory(dir=".") as dirname:
         writer = io_app.write_tabular(data_path=dirname, format="tsv")
         outpath = join(dirname, "delme.tsv")
         writer.write(matrix, identifier=outpath)
         new = loader(outpath)
         # when written to file in tabular form
         # the loaded table will have dim-1 dim-2 as column labels
         # and the key-values pairs listed below; in dict form...
         expected = {
             0: {
                 "dim-1": 0,
                 "dim-2": 1,
                 "value": 4
             },
             1: {
                 "dim-1": 1,
                 "dim-2": 0,
                 "value": 4
             },
         }
         self.assertEqual(expected, new.to_dict())
예제 #9
0
파일: dist.py 프로젝트: cogent3/cogent3
    def calc_distance(self, aln):
        if self._moltype and self._moltype != aln.moltype:
            aln = aln.to_moltype(self._moltype)

        if self.fast_calc:
            self.fast_calc(aln, show_progress=False)
            dists = self.fast_calc.get_pairwise_distances()
        else:
            empty = {p: 0 for p in itertools.product(aln.names, aln.names)}
            dists = DistanceMatrix(empty)
        dists.source = aln.info.source
        if self._sm:
            for a in dists.template.names[0]:
                for b in dists.template.names[1]:
                    if not dists[a, b] and a != b:
                        subset = aln.take_seqs([a, b])
                        dist = self._est_dist_pair_slow(subset)
                        dists[a, b] = dists[b, a] = dist
        return dists
예제 #10
0
 def test_take_dists(self):
     """subsets the distance matrix"""
     data = {
         ("ABAYE2984", "Atu3667"): 0.25,
         ("ABAYE2984", "Avin_42730"): 0.638,
         ("ABAYE2984", "BAA10469"): None,
         ("Atu3667", "ABAYE2984"): 0.25,
         ("Atu3667", "Avin_42730"): 2.368,
         ("Atu3667", "BAA10469"): 0.25,
         ("Avin_42730", "ABAYE2984"): 0.638,
         ("Avin_42730", "Atu3667"): 2.368,
         ("Avin_42730", "BAA10469"): 1.85,
         ("BAA10469", "ABAYE2984"): 0.25,
         ("BAA10469", "Atu3667"): 0.25,
         ("BAA10469", "Avin_42730"): 1.85,
     }
     darr = DistanceMatrix(data)
     got1 = darr.take_dists(["ABAYE2984", "Atu3667", "Avin_42730"])
     got2 = darr.take_dists("BAA10469", negate=True)
     assert_allclose(got1.array.astype(float), got2.array.astype(float))
예제 #11
0
    def get_pairwise_distances(self, summary_function="mean", **kwargs):
        """Return the pairwise distances as a dictionary keyed by (seq1, seq2).
        Convenience interface to get_pairwise_param.

        Parameters
        ----------
        summary_function
            a string naming the function used for
            estimating param from threeway distances. Valid values are 'mean'
            (default) and 'median'.

        """
        dists = self.get_pairwise_param(
            "length", summary_function=summary_function, **kwargs
        )
        return None if not dists else DistanceMatrix(dists)
예제 #12
0
 def test_slice_dmatrix(self):
     data = {
         ("ABAYE2984", "Atu3667"): 0.25,
         ("ABAYE2984", "Avin_42730"): 0.638,
         ("ABAYE2984", "BAA10469"): None,
         ("Atu3667", "ABAYE2984"): 0.25,
         ("Atu3667", "Avin_42730"): 2.368,
         ("Atu3667", "BAA10469"): 0.25,
         ("Avin_42730", "ABAYE2984"): 0.638,
         ("Avin_42730", "Atu3667"): 2.368,
         ("Avin_42730", "BAA10469"): 1.85,
         ("BAA10469", "ABAYE2984"): 0.25,
         ("BAA10469", "Atu3667"): 0.25,
         ("BAA10469", "Avin_42730"): 1.85,
     }
     darr = DistanceMatrix(data)
     names = darr.template.names[0][:3]
     got = darr[:3, :3]
     self.assertEqual(list(got.template.names[0]), names)
예제 #13
0
    def test_dropping_from_matrix(self):
        """pairwise distances should have method for dropping invalid data"""
        data = {
            ("ABAYE2984", "Atu3667"): None,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): None,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): None,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): None,
            ("BAA10469", "Atu3667"): None,
            ("BAA10469", "Avin_42730"): 1.85,
        }

        darr = DistanceMatrix(data)
        new = darr.drop_invalid()
        self.assertEqual(new, None)

        data = {
            ("ABAYE2984", "Atu3667"): 0.25,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): 0.25,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): 0.25,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): 0.25,
            ("BAA10469", "Atu3667"): 0.25,
            ("BAA10469", "Avin_42730"): 1.85,
        }
        darr = DistanceMatrix(data)
        new = darr.drop_invalid()
        self.assertEqual(new.shape, (2, 2))
예제 #14
0
 def test_to_dict(self):
     """distance matrix correctly produces a 1D dict"""
     data = {("s1", "s2"): 0.25, ("s2", "s1"): 0.25}
     dmat = DistanceMatrix(data)
     got = dmat.to_dict()
     self.assertEqual(got, data)
예제 #15
0
파일: test_tree.py 프로젝트: wjjmjh/cogent3
    def test_quick_tree_taking_distance_matrix(self):
        """quick_tree should take a distance matrix"""
        quick_tree = tree_app.quick_tree()
        data = {
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "Avin_42730"): 1.85,
        }

        darr = DistanceMatrix(data)
        tree = quick_tree.quick_tree(darr)
        self.assertIsInstance(tree, PhyloNode)
        self.assertIsNotNone(tree.children)
        self.assertEqual(set(tree.get_tip_names()),
                         set.union(*(set(tup) for tup in data.keys())))

        data = {
            ("DogFaced", "FlyingFox"): 0.05,
            ("DogFaced", "FreeTaile"): 0.14,
            ("DogFaced", "LittleBro"): 0.16,
            ("DogFaced", "TombBat"): 0.15,
            ("FlyingFox", "DogFaced"): 0.05,
            ("FlyingFox", "FreeTaile"): 0.12,
            ("FlyingFox", "LittleBro"): 0.13,
            ("FlyingFox", "TombBat"): 0.14,
            ("FreeTaile", "DogFaced"): 0.14,
            ("FreeTaile", "FlyingFox"): 0.12,
            ("FreeTaile", "LittleBro"): 0.09,
            ("FreeTaile", "TombBat"): 0.1,
            ("LittleBro", "DogFaced"): 0.16,
            ("LittleBro", "FlyingFox"): 0.13,
            ("LittleBro", "FreeTaile"): 0.09,
            ("LittleBro", "TombBat"): 0.12,
            ("TombBat", "DogFaced"): 0.15,
            ("TombBat", "FlyingFox"): 0.14,
            ("TombBat", "FreeTaile"): 0.1,
            ("TombBat", "LittleBro"): 0.12,
        }
        darr = DistanceMatrix(data)
        tree = quick_tree.quick_tree(darr)
        self.assertIsInstance(tree, PhyloNode)
        self.assertIsNotNone(tree.children)
        self.assertEqual(set(tree.get_tip_names()),
                         set.union(*(set(tup) for tup in data.keys())))

        data = {
            ("ABAYE2984", "Atu3667"): 0.25,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): 0.25,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): 0.25,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): 0.25,
            ("BAA10469", "Atu3667"): 0.25,
            ("BAA10469", "Avin_42730"): 1.85,
        }
        darr = DistanceMatrix(data)
        tree = quick_tree.quick_tree(darr)
        self.assertIsInstance(tree, PhyloNode)
        self.assertIsNotNone(tree.children)
        self.assertEqual(set(tree.get_tip_names()),
                         set.union(*(set(tup) for tup in data.keys())))

        data = {
            ("ABAYE2984", "Atu3667"): None,
            ("ABAYE2984", "Avin_42730"): 0.638,
            ("ABAYE2984", "BAA10469"): None,
            ("Atu3667", "ABAYE2984"): None,
            ("Atu3667", "Avin_42730"): 2.368,
            ("Atu3667", "BAA10469"): None,
            ("Avin_42730", "ABAYE2984"): 0.638,
            ("Avin_42730", "Atu3667"): 2.368,
            ("Avin_42730", "BAA10469"): 1.85,
            ("BAA10469", "ABAYE2984"): None,
            ("BAA10469", "Atu3667"): None,
            ("BAA10469", "Avin_42730"): 1.85,
        }

        darr = DistanceMatrix(data)
        with self.assertRaises(KeyError):
            tree = quick_tree.quick_tree(darr)
        # when distance_matrix is None after dropping invalid
        with self.assertRaises(ValueError):
            quick_tree = tree_app.quick_tree(drop_invalid=True)
            tree = quick_tree.quick_tree(darr)

        data = {
            ("DogFaced", "FlyingFox"): 0.05,
            ("DogFaced", "FreeTaile"): 0.14,
            ("DogFaced", "LittleBro"): 0.16,
            ("DogFaced", "TombBat"): 0.15,
            ("FlyingFox", "DogFaced"): 0.05,
            ("FlyingFox", "FreeTaile"): 0.12,
            ("FlyingFox", "LittleBro"): 0.13,
            ("FlyingFox", "TombBat"): 0.14,
            ("FreeTaile", "DogFaced"): 0.14,
            ("FreeTaile", "FlyingFox"): 0.12,
            ("FreeTaile", "LittleBro"): 0.09,
            ("FreeTaile", "TombBat"): 0.1,
            ("LittleBro", "DogFaced"): 0.16,
            ("LittleBro", "FlyingFox"): 0.13,
            ("LittleBro", "FreeTaile"): 0.09,
            ("LittleBro", "TombBat"): 0.12,
            ("TombBat", "DogFaced"): 0.15,
            ("TombBat", "FlyingFox"): 0.14,
            ("TombBat", "FreeTaile"): 0.1,
            ("TombBat", "LittleBro"): 0.12,
        }
        darr = DistanceMatrix(data)
        tree = quick_tree.quick_tree(darr)
        self.assertIsInstance(tree, PhyloNode)
        self.assertIsNotNone(tree.children)
        self.assertEqual(set(tree.get_tip_names()),
                         set.union(*(set(tup) for tup in data.keys())))