Exemple #1
0
 def test_to_freqs(self):
     """produces a freqs array"""
     data = array([[2, 4], [3, 5], [4, 8]])
     marr = MotifCountsArray(array(data), "AB")
     expect = data / vstack(data.sum(axis=1))
     got = marr.to_freq_array()
     assert_allclose(got.array, expect)
Exemple #2
0
 def test_to_freqs_1d(self):
     """produce a freqs array from 1D counts"""
     data = [43, 48, 114, 95]
     total = sum(data)
     a = MotifCountsArray([43, 48, 114, 95], motifs=("T", "C", "A", "G"))
     f = a.to_freq_array()
     assert_allclose(f.array, array([v / total for v in data], dtype=float))
Exemple #3
0
 def test_sliced_range(self):
     """a sliced range should preserve row indices"""
     motifs = ("A", "C", "G", "T")
     names = ["FlyingFox", "DogFaced", "FreeTaile"]
     data = [[316, 134, 133, 317], [321, 136, 123, 314], [331, 143, 127, 315]]
     counts = MotifCountsArray(data, motifs, row_indices=names)
     self.assertEqual(counts.keys(), names)
     subset = counts[:2]
     self.assertEqual(subset.keys(), names[:2])
Exemple #4
0
 def test_slicing(self):
     """slice by keys should work"""
     counts = MotifCountsArray(
         [[3, 2, 3, 2], [3, 2, 3, 2]],
         ["A", "C", "G", "T"],
         row_indices=["DogFaced", "FlyingFox"],
     )
     freqs = counts.to_freq_array()
     got = freqs["FlyingFox"].to_array()
     assert_allclose(got, [0.3, 0.2, 0.3, 0.2])
Exemple #5
0
    def test_load_tabular_motif_counts_array(self):
        """correctly loads tabular data for MotifCountsArray"""

        data = [[2, 4], [3, 5], [4, 8]]
        mca = MotifCountsArray(data, "AB")
        loader = io_app.load_tabular(sep="\t", as_type="motif_counts")
        with TemporaryDirectory(dir=".") as dirname:
            writer = io_app.write_tabular(data_path=dirname, format="tsv")
            outpath = join(dirname, "delme.tsv")
            writer.write(mca, identifier=outpath)
            new = loader(outpath)
            self.assertEqual(mca.to_dict(), new.to_dict())
Exemple #6
0
    def test_to_freqs_pseudocount(self):
        """produces a freqs array with pseudocount"""
        data = array([[2, 4], [3, 5], [0, 8]])
        marr = MotifCountsArray(array(data), "AB")
        got = marr.to_freq_array(pseudocount=1)
        adj = data + 1
        expect = adj / vstack(adj.sum(axis=1))
        assert_allclose(got.array, expect)

        got = marr.to_freq_array(pseudocount=0.5)
        adj = data + 0.5
        expect = adj / vstack(adj.sum(axis=1))
        assert_allclose(got.array, expect)
Exemple #7
0
 def test_to_pssm_pseudocount(self):
     """produces a PSSM array with pseudocount"""
     data = array([
         [10, 30, 50, 10],
         [25, 25, 25, 25],
         [5, 80, 0, 10],
         [70, 10, 10, 10],
         [60, 15, 0, 20],
     ])
     marr = MotifCountsArray(array(data), "ACGT")
     adj = data + 1
     got = marr.to_pssm(pseudocount=1)
     freqs = marr._to_freqs(pseudocount=1)
     expect = log2(freqs / 0.25)
     assert_allclose(got.array, expect, atol=1e-3)
Exemple #8
0
    def test_construct_succeeds(self):
        """construct from int array or list"""
        from cogent3.maths.stats.number import CategoryCounter

        states = "ACGT"
        rows = [CategoryCounter([b] * 20) for b in "ACGT"]
        rows = [r.tolist(states) for r in rows]
        pwm = MotifCountsArray(rows, states)

        data = [[2, 4], [3, 5], [4, 8]]
        got = MotifCountsArray(array(data), "AB")
        self.assertEqual(got.array.tolist(), data)

        got = MotifCountsArray(data, "AB")
        self.assertEqual(got.array.tolist(), data)
Exemple #9
0
def read(filepath):
    """returns matrixid and MotifCountsArray matrix"""
    with open(filepath) as infile:
        matrix = []
        states = []
        for line in infile:
            line = line.strip()
            if line.startswith(">"):
                identifier = line[1:].split()
            elif line:
                line = _brackets.sub("", line)
                line = line.split()
                states.append(line.pop(0).upper())
                matrix.append([int(i) for i in line])

    matrix = dict(zip(states, matrix))
    if len(states) == 4:
        name = "rna" if "U" in states else "dna"
    else:
        name = "protein"

    states = list(get_moltype(name))
    matrix = array([matrix[s] for s in states], dtype=int).T

    pwm = MotifCountsArray(matrix, states)
    return identifier, pwm
Exemple #10
0
 def test_getitem(self):
     """slicing should return correct class"""
     data = array([[2, 4], [3, 5], [4, 8]])
     marr = MotifCountsArray(array(data), "AB")
     # print(marr[[1, 2], :])
     self.assertEqual(marr[0].array.tolist(), [2, 4])
     self.assertEqual(marr[0, "B"], 4)
     self.assertEqual(marr[0, :].array.tolist(), [2, 4])
     self.assertEqual(marr[:, "A"].array.tolist(), [[2], [3], [4]])
     self.assertEqual(marr[:, "A":"B"].array.tolist(), [[2], [3], [4]])
     self.assertEqual(marr[1, "A"], 3)
     marr = MotifCountsArray(array(data), "AB", row_indices=["a", "b", "c"])
     self.assertEqual(marr["a"].array.tolist(), [2, 4])
     self.assertEqual(marr["a", "B"], 4)
     self.assertEqual(marr["a", :].array.tolist(), [2, 4])
     self.assertEqual(marr[:, "A"].array.tolist(), [[2], [3], [4]])
     self.assertEqual(marr[:, "A":"B"].array.tolist(), [[2], [3], [4]])
     self.assertEqual(marr["b", "A"], 3)
Exemple #11
0
 def test_to_pssm(self):
     """produces a PSSM array"""
     data = array([
         [10, 30, 50, 10],
         [25, 25, 25, 25],
         [5, 80, 5, 10],
         [70, 10, 10, 10],
         [60, 15, 5, 20],
     ])
     marr = MotifCountsArray(array(data), "ACGT")
     got = marr.to_pssm()
     expect = array([
         [-1.322, 0.263, 1.0, -1.322],
         [0.0, 0.0, 0.0, 0.0],
         [-2.322, 1.678, -2.322, -1.322],
         [1.485, -1.322, -1.322, -1.322],
         [1.263, -0.737, -2.322, -0.322],
     ])
     assert_allclose(got.array, expect, atol=1e-3)
Exemple #12
0
    def test_construct_fails(self):
        """fails if given wrong data type or no data"""
        # can't use a string
        data = [["A", "A"], ["A", "A"], ["A", "A"]]
        with self.assertRaises(ValueError):
            got = MotifCountsArray(data, "AB")

        # or a float
        data = [[1.1, 2.1], [0.0, 2.1], [3.0, 4.5]]
        with self.assertRaises(ValueError):
            got = MotifCountsArray(data, "AB")
        # or be empty
        with self.assertRaises(ValueError):
            got = MotifCountsArray([], "AB")

        with self.assertRaises(ValueError):
            got = MotifCountsArray([[], []], "AB")

        data = [[2, 4], [3, 5], [4, 8]]
        with self.assertRaises(ValueError):
            pssm = PSSM(data, "ACGT")
Exemple #13
0
    def test_write_tabular_motif_counts_array(self):
        """correctly writes tabular data for MotifCountsArray"""

        data = [[2, 4], [3, 5], [4, 8]]
        mca = MotifCountsArray(data, "AB")
        loader = io_app.load_tabular(sep="\t")
        with TemporaryDirectory(dir=".") as dirname:
            writer = io_app.write_tabular(data_path=dirname, format="tsv")
            outpath = join(dirname, "delme.tsv")
            writer.write(mca, identifier=outpath)
            new = loader(outpath)
            # when written to file in tabular form
            # the loaded table will have dim-1 dim-2 as column labels
            # and the key-values pairs listed below; in dict form...
            expected = {
                0: {
                    "dim-1": 0,
                    "dim-2": "A",
                    "value": 2
                },
                1: {
                    "dim-1": 0,
                    "dim-2": "B",
                    "value": 4
                },
                2: {
                    "dim-1": 1,
                    "dim-2": "A",
                    "value": 3
                },
                3: {
                    "dim-1": 1,
                    "dim-2": "B",
                    "value": 5
                },
                4: {
                    "dim-1": 2,
                    "dim-2": "A",
                    "value": 4
                },
                5: {
                    "dim-1": 2,
                    "dim-2": "B",
                    "value": 8
                },
            }
            self.assertEqual(expected, new.to_dict())
Exemple #14
0
 def test_to_dict(self):
     """correctly converts to a dict"""
     motifs = ["A", "C", "D"]
     counts = [[4, 0, 0]]
     marr = MotifCountsArray(counts, motifs)
     self.assertEqual(marr.to_dict(), {0: {"A": 4, "C": 0, "D": 0}})
Exemple #15
0
 def test_str_repr(self):
     """exercise str and repr"""
     data = array([[2, 4], [3, 5], [4, 8]])
     marr = MotifCountsArray(array(data), "AB")
     str(marr)
     repr(marr)
Exemple #16
0
    def test_take(self):
        """take works like numpy take, supporting negation"""
        data = array([[2, 4, 9, 2], [3, 5, 8, 0], [4, 8, 25, 13]])
        marr = MotifCountsArray(data, ["A", "B", "C", "D"])
        # fails if don't provide an indexable indices
        with self.assertRaises(ValueError):
            marr.take(1, axis=1)

        # indexing columns using keys
        cols = marr.take(["A", "D"], axis=1)
        assert_allclose(cols.array, data.take([0, 3], axis=1))
        cols = marr.take(["A", "D"], negate=True, axis=1)
        assert_allclose(cols.array, data.take([1, 2], axis=1))
        # indexing columns using indexs
        cols = marr.take([0, 3], axis=1)
        assert_allclose(cols.array, data.take([0, 3], axis=1))
        cols = marr.take([0, 3], negate=True, axis=1)
        assert_allclose(cols.array, data.take([1, 2], axis=1))

        marr = MotifCountsArray(data, ["A", "B", "C", "D"],
                                row_indices=["a", "b", "c"])
        # rows using keys
        rows = marr.take(["a", "c"], axis=0)
        assert_allclose(rows.array, data.take([0, 2], axis=0))
        rows = marr.take(["a"], negate=True, axis=0)
        assert_allclose(rows.array, data.take([1, 2], axis=0))
        # rows using indexes
        rows = marr.take([0, 2], axis=0)
        assert_allclose(rows.array, data.take([0, 2], axis=0))
        rows = marr.take([0], negate=True, axis=0)
        assert_allclose(rows.array, data.take([1, 2], axis=0))

        # 1D profile
        marr = MotifCountsArray(data[0], ["A", "B", "C", "D"])
        cols = marr.take([0], negate=True, axis=1)
        assert_allclose(cols.array, data[0].take([1, 2, 3]))
Exemple #17
0
 def test_iter(self):
     """iter count array traverses positions"""
     data = [[2, 4], [3, 5], [4, 8]]
     got = MotifCountsArray(array(data), "AB")
     for row in got:
         self.assertEqual(row.shape, (2, ))