Ejemplo n.º 1
0
 def test_baddata(self):
     """inference should raise when input is bad"""  
     
     rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=["a", "b", "c"])
     rs.prep()        
     with self.assertRaises(Exception) as e:
         rs.inference(5)
Ejemplo n.º 2
0
    def test_prep_row_priors(self):
        """prepare row priors."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, b=0.8), name="refB")
        refB.defaults(zerovals)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys())
        rs.add(refA).add(refB)
        # compute feature priors
        rs.prep()
        # row_priors should gain key/values for all features
        expected_features = set(zerovals.keys())
        self.assertEqual(set(rs.row_names), expected_features)
        # features declared in representations should get reasonable priors
        a_index = rs.rows["a"]
        b_index = rs.rows["b"]
        d_index = rs.rows["d"]
        self.assertEqual(rs.row_priors[a_index], 1,
                         "refA and refB both have a")
        self.assertEqual(rs.row_priors[b_index], 0.4,
                         "only refB has b, so 0.8/2")
        self.assertEqual(rs.row_priors[d_index], 0.2,
                         "value is 1/num features")
Ejemplo n.º 3
0
 def test_between2(self):
     """inference when model equally similar to two refs"""  
     
     # let ref universe have two annotations
     rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids(),
                       row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)
     self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")        
Ejemplo n.º 4
0
 def test_between_refs_and_null(self):
     """inference when model is similar to two refs and a there is a null"""  
     
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.8, refA=0.15, refB=0.15), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)        
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)            
     self.assertAlmostEqual(inf["refA"], inf["refB"], 
                            msg="equally likely")
Ejemplo n.º 5
0
 def test_model_nodata(self):
     """inference when references are unequal but model has no data"""  
     
     model = Representation(name="nodata")
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.8, refA=0.1, refB=0.1), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(model)             
     self.assertAlmostEqual(inf["refA"], inf["refB"], 
                            msg="equally likely")
Ejemplo n.º 6
0
 def test_difference_in_priors(self):
     """inference when model matches two references, 
     but have different priors"""  
     
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.85, refA=0.05, refB=0.1), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)             
     self.assertLess(inf["refA"], inf["refB"], 
                     msg="equal match, but A has weaker prior")
Ejemplo n.º 7
0
 def test_underflow(self):
     """attempt to get underflow in individual p."""  
     
     # let model have very sure values
     model = Representation(name="underflow")
     model.set("Y:007", 0.00001).set("Y:004", 1).set("Y:003", 1)
     # let ref universe have two annotations and one null
     refA = Representation(name="refA").set("Y:003", 1)        
     refB = Representation(name="refB").set("Y:003", 1)        
     rs = ReferenceSet(dict(null=0.98, refA=0.001, refB=0.001), 
                       ids=self.obo.ids())
     rs.add(refA).add(refB)
     rs.learn_obo(self.obo)
     rs.prep()        
             
     result = rs.inference(model, verbose=True)                 
     self.assertGreaterEqual(result["refA"], 0, 
                     msg="must always be a number, even if zero")        
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")        
Ejemplo n.º 8
0
class ReferenceSetWriteTests(unittest.TestCase):
    """Test cases for class ReferenceSet."""
    def setUp(self):
        """prepare a reference set with some data."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1, b=0.8), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, d=0.2), name="refB")
        refB.defaults(zerovals)
        self.rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15),
                               ids=zerovals.keys())
        self.rs.add(refA).add(refB)
        self.rs.prep()

    def tearDown(self):
        """Perhaps remove some temporary files."""

        remove_if_exists(outfile + "_row_priors.json")
        remove_if_exists(outfile + "_column_priors.json")
        remove_if_exists(outfile + "_data.tsv.gz")

    def test_write(self):
        """Writing a summary of the data to disk files"""

        self.rs.save(outfile)

        row_file = join(outfile + "_row_priors.json")
        col_file = join(outfile + "_column_priors.json")
        data_file = join(outfile + "_data.tsv.gz")
        self.assertTrue(exists(row_file))
        self.assertTrue(exists(col_file))
        self.assertTrue(exists(data_file))

        # load files and check expected structure
        with open(row_file, "r") as f:
            row_content = json.load(f)
        with open(col_file, "r") as f:
            col_content = json.load(f)
        with gzip.open(data_file, "rt") as f:
            data_content = f.readlines()

        self.assertEqual(len(row_content), len(zerovals))
        self.assertEqual(len(col_content), 3, "null, refA, refB")
        self.assertEqual(len(data_content), len(zerovals) + 1)
        self.assertEqual(data_content[0], "\tnull\trefA\trefB\n")

    def test_write_header(self):
        """Writing a data matrix with first column name"""

        # perform a save with a name in the first column
        self.rs.save(outfile, "phenotype")

        data_file = join(outfile + "_data.tsv.gz")
        self.assertTrue(exists(data_file))

        with gzip.open(data_file, "rt") as f:
            data_content = f.readlines()

        self.assertEqual(len(data_content), len(zerovals) + 1)
        self.assertEqual(data_content[0], "phenotype\tnull\trefA\trefB\n")