def test_construct_fails(self): """construction fails for invalid input""" # fails for entries all zero data_all_zero = [ [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], ] with self.assertRaises(ValueError): pssm = PSSM(data_all_zero, "ACTG") # fails for numpy.nan data_nan = [ [nan, -0.263, -1.0, -1.322], [-2.322, -1.678, -2.322, -1.322], [-1.485, -1.322, -1.322, -1.322], [-1.263, -0.737, -2.322, -0.322], ] with self.assertRaises(ValueError): pssm = PSSM(data_nan, "ACTG") # fails for entries all negative numbers data = [ [-1.322, -0.263, -1.0, -1.322], [-2.322, -1.678, -2.322, -1.322], [-1.485, -1.322, -1.322, -1.322], [-1.263, -0.737, -2.322, -0.322], ] with self.assertRaises(ValueError): pssm = PSSM(data, "ACTG")
def test_score_seq_obj(self): """produce correct score from seq""" from cogent3 import DNA data = [ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ] pssm = PSSM(data, "ACTG") seq = DNA.make_seq("".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3])) scores = pssm.score_seq(seq) assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)
def test_score_str(self): """produce correct score from seq""" data = [ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ] pssm = PSSM(data, "ACTG") seq = "".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3]) scores = pssm.score_seq(seq) assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3) with self.assertRaises(ValueError): pssm.score_seq(seq[:3])
def test_write_tabular_pssm(self): """correctly writes tabular data for PSSM""" # data from test_profile data = numpy.array([ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ]) pssm = PSSM(data, "ACTG") loader = io_app.load_tabular(sep="\t") with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_tabular(data_path=dirname, format="tsv") outpath = join(dirname, "delme.tsv") writer.write(pssm, identifier=outpath) new = loader(outpath) expected = safe_log(data) - safe_log( numpy.array([0.25, 0.25, 0.25, 0.25])) for i in range(len(expected)): j = i // 4 self.assertTrue( numpy.isclose(new.array[i][2], expected[j][i - j], atol=0.0001))
def test_construct_succeeds(self): """correctly construction from freqs""" data = [ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ] pssm = PSSM(data, "ACTG") expect = array([ [-1.322, 0.263, 1.0, -1.322], [0.0, 0.0, 0.0, 0.0], [-2.322, 1.678, -2.322, -1.322], [1.485, -1.322, -1.322, -1.322], [1.263, -0.737, -2.322, -0.322], ]) assert_allclose(pssm.array, expect, atol=1e-3)
def test_load_tabular_pssm(self): """correctly loads tabular data for PSSM""" # data from test_profile data = [ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ] pssm = PSSM(data, "ACTG") loader = io_app.load_tabular(sep="\t", as_type="pssm") with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_tabular(data_path=dirname, format="tsv") outpath = join(dirname, "delme.tsv") writer.write(pssm, identifier=outpath) new = loader(outpath) assert_allclose(pssm.array, new.array, atol=0.0001)
def test_construct_fails(self): """fails if given wrong data type or no data""" # can't use a string data = [["A", "A"], ["A", "A"], ["A", "A"]] with self.assertRaises(ValueError): got = MotifCountsArray(data, "AB") # or a float data = [[1.1, 2.1], [0.0, 2.1], [3.0, 4.5]] with self.assertRaises(ValueError): got = MotifCountsArray(data, "AB") # or be empty with self.assertRaises(ValueError): got = MotifCountsArray([], "AB") with self.assertRaises(ValueError): got = MotifCountsArray([[], []], "AB") data = [[2, 4], [3, 5], [4, 8]] with self.assertRaises(ValueError): pssm = PSSM(data, "ACGT")
def test_score_indices(self): """produce correct score from indexed seq""" data = [ [0.1, 0.3, 0.5, 0.1], [0.25, 0.25, 0.25, 0.25], [0.05, 0.8, 0.05, 0.1], [0.7, 0.1, 0.1, 0.1], [0.6, 0.15, 0.05, 0.2], ] pssm = PSSM(data, "ACTG") indices = [3, 1, 2, 0, 2, 2, 3] scores = pssm.score_indexed_seq(indices) assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3) indices = [4, 1, 2, 0, 2, 2, 3] scores = pssm.score_indexed_seq(indices) # log2 of (0.25 * 0.05 * 0.7 * 0.05) / .25**4 = -3.158... assert_allclose(scores, [-3.158, -5.703, -2.966], atol=1e-3) # fails if sequence too short with self.assertRaises(ValueError): pssm.score_indexed_seq(indices[:3])