def test_baddata(self): """inference should raise when input is bad""" rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=["a", "b", "c"]) rs.prep() with self.assertRaises(Exception) as e: rs.inference(5)
def test_prep_row_priors(self): """prepare row priors.""" # let ref universe have two annotations and one null refA = Representation(data=dict(a=1), name="refA") refA.defaults(zerovals) refB = Representation(data=dict(a=1, b=0.8), name="refB") refB.defaults(zerovals) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys()) rs.add(refA).add(refB) # compute feature priors rs.prep() # row_priors should gain key/values for all features expected_features = set(zerovals.keys()) self.assertEqual(set(rs.row_names), expected_features) # features declared in representations should get reasonable priors a_index = rs.rows["a"] b_index = rs.rows["b"] d_index = rs.rows["d"] self.assertEqual(rs.row_priors[a_index], 1, "refA and refB both have a") self.assertEqual(rs.row_priors[b_index], 0.4, "only refB has b, so 0.8/2") self.assertEqual(rs.row_priors[d_index], 0.2, "value is 1/num features")
def test_between2(self): """inference when model equally similar to two refs""" # let ref universe have two annotations rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def test_between_refs_and_null(self): """inference when model is similar to two refs and a there is a null""" # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.8, refA=0.15, refB=0.15), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def test_model_nodata(self): """inference when references are unequal but model has no data""" model = Representation(name="nodata") # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.8, refA=0.1, refB=0.1), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def test_difference_in_priors(self): """inference when model matches two references, but have different priors""" # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.85, refA=0.05, refB=0.1), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertLess(inf["refA"], inf["refB"], msg="equal match, but A has weaker prior")
def test_underflow(self): """attempt to get underflow in individual p.""" # let model have very sure values model = Representation(name="underflow") model.set("Y:007", 0.00001).set("Y:004", 1).set("Y:003", 1) # let ref universe have two annotations and one null refA = Representation(name="refA").set("Y:003", 1) refB = Representation(name="refB").set("Y:003", 1) rs = ReferenceSet(dict(null=0.98, refA=0.001, refB=0.001), ids=self.obo.ids()) rs.add(refA).add(refB) rs.learn_obo(self.obo) rs.prep() result = rs.inference(model, verbose=True) self.assertGreaterEqual(result["refA"], 0, msg="must always be a number, even if zero") self.assertGreaterEqual(result["refB"], 0, msg="must always be a number, even if zero") self.assertGreaterEqual(result["refB"], 0, msg="must always be a number, even if zero")
class ReferenceSetWriteTests(unittest.TestCase): """Test cases for class ReferenceSet.""" def setUp(self): """prepare a reference set with some data.""" # let ref universe have two annotations and one null refA = Representation(data=dict(a=1, b=0.8), name="refA") refA.defaults(zerovals) refB = Representation(data=dict(a=1, d=0.2), name="refB") refB.defaults(zerovals) self.rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15), ids=zerovals.keys()) self.rs.add(refA).add(refB) self.rs.prep() def tearDown(self): """Perhaps remove some temporary files.""" remove_if_exists(outfile + "_row_priors.json") remove_if_exists(outfile + "_column_priors.json") remove_if_exists(outfile + "_data.tsv.gz") def test_write(self): """Writing a summary of the data to disk files""" self.rs.save(outfile) row_file = join(outfile + "_row_priors.json") col_file = join(outfile + "_column_priors.json") data_file = join(outfile + "_data.tsv.gz") self.assertTrue(exists(row_file)) self.assertTrue(exists(col_file)) self.assertTrue(exists(data_file)) # load files and check expected structure with open(row_file, "r") as f: row_content = json.load(f) with open(col_file, "r") as f: col_content = json.load(f) with gzip.open(data_file, "rt") as f: data_content = f.readlines() self.assertEqual(len(row_content), len(zerovals)) self.assertEqual(len(col_content), 3, "null, refA, refB") self.assertEqual(len(data_content), len(zerovals) + 1) self.assertEqual(data_content[0], "\tnull\trefA\trefB\n") def test_write_header(self): """Writing a data matrix with first column name""" # perform a save with a name in the first column self.rs.save(outfile, "phenotype") data_file = join(outfile + "_data.tsv.gz") self.assertTrue(exists(data_file)) with gzip.open(data_file, "rt") as f: data_content = f.readlines() self.assertEqual(len(data_content), len(zerovals) + 1) self.assertEqual(data_content[0], "phenotype\tnull\trefA\trefB\n")