def test_label_model_sparse(self) -> None:
        """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset.

        This tests the common setting where LFs abstain most of the time, which can
        cause issues for example if parameter clamping set too high (e.g. see Issue
        #1422).
        """
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n,
                                               self.m,
                                               self.cardinality,
                                               abstain_multiplier=1000.0)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=1000, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(P, P_lm, decimal=2)

        # Test predicted labels *only on non-abstained data points*
        Y_pred = label_model.predict(L, tie_break_policy="abstain")
        idx, = np.where(Y_pred != -1)
        acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx)
        self.assertGreaterEqual(acc, 0.65)

        # Make sure that we don't output abstain when an LF votes, per issue #1422
        self.assertEqual(len(idx),
                         np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
    def test_set_mu_eps(self):
        mu_eps = 0.0123

        # Construct a label matrix such that P(\lambda_1 = 0 | Y) = 0.0, so it will hit
        # the mu_eps floor
        L = np.array([[1, 1, 1], [1, 1, 1]])
        label_model = LabelModel(verbose=False)
        label_model.fit(L, mu_eps=mu_eps)
        self.assertAlmostEqual(label_model.get_conditional_probs()[0, 1, 0], mu_eps)
    def test_label_model_basic(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(P, P_lm, decimal=2)

        # Test predicted labels
        score = label_model.score(L, Y)
        self.assertGreaterEqual(score["accuracy"], 0.9)