def test_label_model_sparse(self) -> None: """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset. This tests the common setting where LFs abstain most of the time, which can cause issues for example if parameter clamping set too high (e.g. see Issue #1422). """ np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality, abstain_multiplier=1000.0) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=1000, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() np.testing.assert_array_almost_equal(P, P_lm, decimal=2) # Test predicted labels *only on non-abstained data points* Y_pred = label_model.predict(L, tie_break_policy="abstain") idx, = np.where(Y_pred != -1) acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx) self.assertGreaterEqual(acc, 0.65) # Make sure that we don't output abstain when an LF votes, per issue #1422 self.assertEqual(len(idx), np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
def test_set_mu_eps(self): mu_eps = 0.0123 # Construct a label matrix such that P(\lambda_1 = 0 | Y) = 0.0, so it will hit # the mu_eps floor L = np.array([[1, 1, 1], [1, 1, 1]]) label_model = LabelModel(verbose=False) label_model.fit(L, mu_eps=mu_eps) self.assertAlmostEqual(label_model.get_conditional_probs()[0, 1, 0], mu_eps)
def test_label_model_basic(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() np.testing.assert_array_almost_equal(P, P_lm, decimal=2) # Test predicted labels score = label_model.score(L, Y) self.assertGreaterEqual(score["accuracy"], 0.9)