def test_chi_square_model_reject(self): data1 = np.stack([ np.concatenate([np.ones(50), np.zeros(50)], axis=0), np.concatenate([np.ones(50), np.zeros(50)], axis=0) ], axis=1) data2 = np.stack([ np.concatenate([np.ones(30), np.zeros(70)], axis=0), np.concatenate([np.ones(50), np.zeros(50)], axis=0) ], axis=1) data3 = np.stack([ np.concatenate([np.ones(50), np.zeros(50)], axis=0), np.concatenate([np.ones(1), np.zeros(99)], axis=0) ], axis=1) model1 = trepan.DiscreteModel() model1.fit(data1) model2 = trepan.DiscreteModel() model2.fit(data2) model3 = trepan.DiscreteModel() model3.fit(data3) self.assertFalse(trepan.chi_square_model(model1, model2, set())) self.assertFalse(trepan.chi_square_model(model1, model3, set()))
def test_chi_square_model_accept(self): data1 = np.stack([ np.concatenate([np.ones(50), np.zeros(50)], axis=0), np.concatenate([np.ones(50), np.zeros(50)], axis=0) ], axis=1) data2 = np.stack([ np.concatenate([np.ones(50), np.zeros(50)], axis=0), np.concatenate([np.ones(50), np.zeros(50)], axis=0) ], axis=1) data3 = np.stack([ np.concatenate([np.ones(40), np.zeros(60)], axis=0), np.concatenate([np.ones(50), np.zeros(50)], axis=0) ], axis=1) model1 = trepan.DiscreteModel() model1.fit(data1) model2 = trepan.DiscreteModel() model2.fit(data2) model3 = trepan.DiscreteModel() model3.fit(data3) self.assertTrue(trepan.chi_square_model(model1, model2, set())) self.assertTrue(trepan.chi_square_model(model1, model3, set()))
def test_sample(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(1, dtype=np.float32), np.ones(99, dtype=np.float32)], axis=0) data = np.stack([f1, f2], axis=1) model = trepan.DiscreteModel() model.fit(data) np.random.seed(2018) num_samples = 1000 samples = [] for _ in range(num_samples): samples.append(model.sample()) samples = np.stack(samples, axis=0) p0_0 = np.sum(samples[:, 0] == 0) / samples.shape[0] p0_1 = np.sum(samples[:, 0] == 1) / samples.shape[0] p1_0 = np.sum(samples[:, 1] == 0) / samples.shape[0] p1_1 = np.sum(samples[:, 1] == 1) / samples.shape[0] self.assertTrue(0.7 <= p0_0 <= 0.8) self.assertTrue(0.2 <= p0_1 <= 0.3) self.assertTrue(0.001 <= p1_0 <= 0.1) self.assertTrue(0.8 <= p1_1 <= 1.0)
def test_sample_with_hard_constraints(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(50, dtype=np.float32), np.ones(50, dtype=np.float32)], axis=0) f3 = np.concatenate([np.zeros(20, dtype=np.float32), np.ones(80, dtype=np.float32)], axis=0) data = np.stack([f1, f2, f3], axis=1) model = trepan.DiscreteModel() model.fit(data) constraints = [ ("left", trepan.Rule(0, 0.5, trepan.Rule.SplitType.BELOW)), ("right", trepan.Rule(2, 0.5, trepan.Rule.SplitType.BELOW)) ] oracle = trepan.Oracle(lambda x: x[:, 0], trepan.Oracle.DataType.DISCRETE, 0.05, 0.05) num_samples = 1000 samples = [] for _ in range(num_samples): samples.append(oracle.sample_with_constraints(model, constraints)) samples = np.stack(samples) self.assertTrue(np.all(samples[:, 0] == 0)) self.assertTrue(np.all(samples[:, 2] == 1)) p1_0 = np.sum(samples[:, 1] == 0) / samples.shape[0] p1_1 = np.sum(samples[:, 1] == 1) / samples.shape[0] self.assertTrue(0.4 <= p1_0 <= 0.6) self.assertTrue(0.4 <= p1_1 <= 0.6)
def test_fit(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(1, dtype=np.float32), np.ones(99, dtype=np.float32)], axis=0) data = np.stack([f1, f2], axis=1) model = trepan.DiscreteModel() model.fit(data) np.testing.assert_array_almost_equal(np.array([0.75, 0.25], dtype=np.float32), model.distributions[0]) np.testing.assert_array_almost_equal(np.array([0.01, 0.99], dtype=np.float32), model.distributions[1]) self.assertEqual([[0.0, 1.0], [0.0, 1.0]], model.values)
def test_sample_failure_mode(self): # test with a specific failure mode model = trepan.DiscreteModel() model.distributions = [ np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([0.5, 0.5], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([0.5, 0.5], dtype=np.float32), np.array([0.5, 0.5], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32), np.array([1.], dtype=np.float32) ] model.values = [ [0.0], [1.0], [1.0], [1.0], [1.0], [0.0, 1.0], [1.0], [1.0], [0.0, 1.0], [0.0, 1.0], [1.0], [1.0], [1.0], [0.0], [1.0] ] model.num_features = len(model.values) rule1 = trepan.Rule(1, 0.5, trepan.Rule.SplitType.ABOVE) rule1.add_split(5, 0.5, trepan.Rule.SplitType.BELOW) rule1.num_required = 2 rule2 = trepan.Rule(14, 0.5, trepan.Rule.SplitType.ABOVE) rule2.add_split(4, 0.5, trepan.Rule.SplitType.BELOW) rule2.num_required = 2 rule3 = trepan.Rule(13, 0.5, trepan.Rule.SplitType.ABOVE) rule3.add_split(7, 0.5, trepan.Rule.SplitType.ABOVE) rule3.add_split(8, 0.5, trepan.Rule.SplitType.BELOW) rule4 = trepan.Rule(3, 0.5, trepan.Rule.SplitType.ABOVE) rule4.add_split(10, 0.5, trepan.Rule.SplitType.ABOVE) rule4.num_required = 2 rule5 = trepan.Rule(2, 0.5, trepan.Rule.SplitType.ABOVE) rule5.add_split(9, 0.5, trepan.Rule.SplitType.ABOVE) constraints = [ ("left", trepan.Rule(6, 0.5, trepan.Rule.SplitType.ABOVE)), ("right", trepan.Rule(0, 0.5, trepan.Rule.SplitType.ABOVE)), ("left", rule1), ("left", rule2), ("left", rule3), ("left", trepan.Rule(12, 0.5, trepan.Rule.SplitType.ABOVE)), ("left", rule4), ("left", rule5) ] oracle = trepan.Oracle(lambda x: x[:, 0], trepan.Oracle.DataType.DISCRETE, 0.05, 0.05) oracle.sample_with_constraints(model, constraints)
def test_split_probability(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(50, dtype=np.float32), np.ones(50, dtype=np.float32)], axis=0) data = np.stack([f1, f2], axis=1) model = trepan.DiscreteModel() model.fit(data) split1 = (0, 0.5, trepan.Rule.SplitType.BELOW) split2 = (1, 0.5, trepan.Rule.SplitType.ABOVE) self.assertEqual(model.split_probability(*split1), 0.75) self.assertEqual(model.split_probability(*split2), 0.5)
def test_set_zero(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(50, dtype=np.float32), np.ones(50, dtype=np.float32)], axis=0) data = np.stack([f1, f2], axis=1) model = trepan.DiscreteModel() model.fit(data) model.set_zero(0, 1) model.set_zero(1, 0) for _ in range(100): np.testing.assert_array_almost_equal(model.sample(), [0., 1.])
def test_zero_by_split(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(50, dtype=np.float32), np.ones(50, dtype=np.float32)], axis=0) data = np.stack([f1, f2], axis=1) model = trepan.DiscreteModel() model.fit(data) split1 = (0, 0.5, trepan.Rule.SplitType.BELOW) split2 = (1, 0.5, trepan.Rule.SplitType.ABOVE) model.zero_by_split(*split1) model.zero_by_split(*split2) for _ in range(100): np.testing.assert_array_almost_equal(model.sample(), [0., 1.])
def test_sample_with_disj_constraints(self): f1 = np.concatenate([np.zeros(75, dtype=np.float32), np.ones(25, dtype=np.float32)], axis=0) f2 = np.concatenate([np.zeros(50, dtype=np.float32), np.ones(50, dtype=np.float32)], axis=0) f3 = np.concatenate([np.zeros(20, dtype=np.float32), np.ones(80, dtype=np.float32)], axis=0) data = np.stack([f1, f2, f3], axis=1) model = trepan.DiscreteModel() model.fit(data) rule = trepan.Rule(0, 0.5, trepan.Rule.SplitType.BELOW) rule.add_split(1, 0.5, trepan.Rule.SplitType.ABOVE) rule.add_split(2, 0.5, trepan.Rule.SplitType.BELOW) rule.num_required = 2 constraints = [ ("left", rule) ] oracle = trepan.Oracle(lambda x: x[:, 0], trepan.Oracle.DataType.DISCRETE, 0.05, 0.05) oracle.sample_with_constraints(model, constraints)