def test_label_model_sparse(self) -> None: """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset. This tests the common setting where LFs abstain most of the time, which can cause issues for example if parameter clamping set too high (e.g. see Issue #1422). """ np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality, abstain_multiplier=1000.0) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=1000, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() np.testing.assert_array_almost_equal(P, P_lm, decimal=2) # Test predicted labels *only on non-abstained data points* Y_pred = label_model.predict(L, tie_break_policy="abstain") (idx, ) = np.where(Y_pred != -1) acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx) self.assertGreaterEqual(acc, 0.65) # Make sure that we don't output abstain when an LF votes, per issue #1422 self.assertEqual(len(idx), np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
def test_sparse_and_regular_make_same_probs(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix( self.known_dimensions.num_examples, self.known_dimensions.num_functions, self.known_dimensions.num_classes, ) example_event_lists: List[ExampleEventListOccurence] = [] for example_num, example in enumerate(L): event_list = [] for func_id, cls_id in enumerate(example): if (cls_id) > -1: event_id = func_id * self.known_dimensions.num_classes + cls_id event_list.append(event_id) example_event_lists.append((ExampleEventListOccurence(event_list))) sparse_model = SparseExampleEventListLabelModel() sparse_model.fit_from_sparse_example_event_list( example_event_list=example_event_lists, known_dimensions=self.known_dimensions, n_epochs=200, lr=0.01, seed=123, ) label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) P_lm = label_model.get_conditional_probs() P_slm = sparse_model.get_conditional_probs() np.testing.assert_array_almost_equal( P_slm, P_lm, )
def test_set_mu_eps(self): mu_eps = 0.0123 # Construct a label matrix such that P(\lambda_1 = 0 | Y) = 0.0, so it will hit # the mu_eps floor L = np.array([[1, 1, 1], [1, 1, 1]]) label_model = LabelModel(verbose=False) label_model.fit(L, mu_eps=mu_eps) self.assertAlmostEqual(label_model.get_conditional_probs()[0, 1, 0], mu_eps)
def test_label_model_basic(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() np.testing.assert_array_almost_equal(P, P_lm, decimal=2) # Test predicted labels score = label_model.score(L, Y) self.assertGreaterEqual(score["accuracy"], 0.9)
def test_label_model_basic(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() conditional_probs_err = ( np.linalg.norm(P.flatten() - P_lm.flatten(), ord=1) / P.size) self.assertLessEqual(conditional_probs_err, 0.01) # Test predicted labels score = label_model.score(L, Y) self.assertGreaterEqual(score["accuracy"], 0.9)
def test_sparse_and_regular_make_same_probs(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix( self.known_dimensions.num_examples, self.known_dimensions.num_functions, self.known_dimensions.num_classes, ) sparse_event_occurence: List[EventCooccurence] = [] label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model._set_constants(L) L_shift = L + 1 label_model_lind = label_model._create_L_ind(L_shift) co_oc_matrix = label_model_lind.T @ label_model_lind for a_id, cols in enumerate(co_oc_matrix): for b_id, freq in enumerate(cols): sparse_event_occurence.append( EventCooccurence(a_id, b_id, frequency=freq)) sparse_model = SparseEventPairLabelModel() sparse_model.fit_from_sparse_event_cooccurrence( sparse_event_occurence=sparse_event_occurence, known_dimensions=self.known_dimensions, n_epochs=200, lr=0.01, seed=123, ) label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) P_lm = label_model.get_conditional_probs() P_slm = sparse_model.get_conditional_probs() np.testing.assert_array_almost_equal( P_slm, P_lm, )