Beispiel #1
0
    def test_construct_fails(self):
        """construction fails for invalid input"""

        # fails for entries all zero
        data_all_zero = [
            [0.0, 0.0, 0.0, 0.0],
            [0.0, 0.0, 0.0, 0.0],
            [0.0, 0.0, 0.0, 0.0],
        ]
        with self.assertRaises(ValueError):
            pssm = PSSM(data_all_zero, "ACTG")

        # fails for numpy.nan
        data_nan = [
            [nan, -0.263, -1.0, -1.322],
            [-2.322, -1.678, -2.322, -1.322],
            [-1.485, -1.322, -1.322, -1.322],
            [-1.263, -0.737, -2.322, -0.322],
        ]
        with self.assertRaises(ValueError):
            pssm = PSSM(data_nan, "ACTG")

        # fails for entries all negative numbers
        data = [
            [-1.322, -0.263, -1.0, -1.322],
            [-2.322, -1.678, -2.322, -1.322],
            [-1.485, -1.322, -1.322, -1.322],
            [-1.263, -0.737, -2.322, -0.322],
        ]
        with self.assertRaises(ValueError):
            pssm = PSSM(data, "ACTG")
Beispiel #2
0
    def test_score_seq_obj(self):
        """produce correct score from seq"""
        from cogent3 import DNA

        data = [
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ]
        pssm = PSSM(data, "ACTG")
        seq = DNA.make_seq("".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3]))
        scores = pssm.score_seq(seq)
        assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)
Beispiel #3
0
 def test_score_str(self):
     """produce correct score from seq"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.25, 0.25, 0.25, 0.25],
         [0.05, 0.8, 0.05, 0.1],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     pssm = PSSM(data, "ACTG")
     seq = "".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3])
     scores = pssm.score_seq(seq)
     assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)
     with self.assertRaises(ValueError):
         pssm.score_seq(seq[:3])
Beispiel #4
0
    def test_write_tabular_pssm(self):
        """correctly writes tabular data for PSSM"""

        # data from test_profile
        data = numpy.array([
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ])
        pssm = PSSM(data, "ACTG")
        loader = io_app.load_tabular(sep="\t")
        with TemporaryDirectory(dir=".") as dirname:
            writer = io_app.write_tabular(data_path=dirname, format="tsv")
            outpath = join(dirname, "delme.tsv")
            writer.write(pssm, identifier=outpath)
            new = loader(outpath)
            expected = safe_log(data) - safe_log(
                numpy.array([0.25, 0.25, 0.25, 0.25]))
            for i in range(len(expected)):
                j = i // 4
                self.assertTrue(
                    numpy.isclose(new.array[i][2],
                                  expected[j][i - j],
                                  atol=0.0001))
Beispiel #5
0
 def test_construct_succeeds(self):
     """correctly construction from freqs"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.25, 0.25, 0.25, 0.25],
         [0.05, 0.8, 0.05, 0.1],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     pssm = PSSM(data, "ACTG")
     expect = array([
         [-1.322, 0.263, 1.0, -1.322],
         [0.0, 0.0, 0.0, 0.0],
         [-2.322, 1.678, -2.322, -1.322],
         [1.485, -1.322, -1.322, -1.322],
         [1.263, -0.737, -2.322, -0.322],
     ])
     assert_allclose(pssm.array, expect, atol=1e-3)
Beispiel #6
0
    def test_load_tabular_pssm(self):
        """correctly loads tabular data for PSSM"""

        # data from test_profile
        data = [
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ]
        pssm = PSSM(data, "ACTG")
        loader = io_app.load_tabular(sep="\t", as_type="pssm")
        with TemporaryDirectory(dir=".") as dirname:
            writer = io_app.write_tabular(data_path=dirname, format="tsv")
            outpath = join(dirname, "delme.tsv")
            writer.write(pssm, identifier=outpath)
            new = loader(outpath)
            assert_allclose(pssm.array, new.array, atol=0.0001)
Beispiel #7
0
    def test_construct_fails(self):
        """fails if given wrong data type or no data"""
        # can't use a string
        data = [["A", "A"], ["A", "A"], ["A", "A"]]
        with self.assertRaises(ValueError):
            got = MotifCountsArray(data, "AB")

        # or a float
        data = [[1.1, 2.1], [0.0, 2.1], [3.0, 4.5]]
        with self.assertRaises(ValueError):
            got = MotifCountsArray(data, "AB")
        # or be empty
        with self.assertRaises(ValueError):
            got = MotifCountsArray([], "AB")

        with self.assertRaises(ValueError):
            got = MotifCountsArray([[], []], "AB")

        data = [[2, 4], [3, 5], [4, 8]]
        with self.assertRaises(ValueError):
            pssm = PSSM(data, "ACGT")
Beispiel #8
0
    def test_score_indices(self):
        """produce correct score from indexed seq"""
        data = [
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ]
        pssm = PSSM(data, "ACTG")
        indices = [3, 1, 2, 0, 2, 2, 3]
        scores = pssm.score_indexed_seq(indices)
        assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)

        indices = [4, 1, 2, 0, 2, 2, 3]
        scores = pssm.score_indexed_seq(indices)
        # log2 of (0.25 * 0.05 * 0.7 * 0.05) / .25**4 = -3.158...
        assert_allclose(scores, [-3.158, -5.703, -2.966], atol=1e-3)

        # fails if sequence too short
        with self.assertRaises(ValueError):
            pssm.score_indexed_seq(indices[:3])