Esempio n. 1
0
    def test_label_model_sparse(self) -> None:
        """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset.

        This tests the common setting where LFs abstain most of the time, which can
        cause issues for example if parameter clamping set too high (e.g. see Issue
        #1422).
        """
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n,
                                               self.m,
                                               self.cardinality,
                                               abstain_multiplier=1000.0)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=1000, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(P, P_lm, decimal=2)

        # Test predicted labels *only on non-abstained data points*
        Y_pred = label_model.predict(L, tie_break_policy="abstain")
        (idx, ) = np.where(Y_pred != -1)
        acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx)
        self.assertGreaterEqual(acc, 0.65)

        # Make sure that we don't output abstain when an LF votes, per issue #1422
        self.assertEqual(len(idx),
                         np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
Esempio n. 2
0
    def test_sparse_and_regular_make_same_probs(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(
            self.known_dimensions.num_examples,
            self.known_dimensions.num_functions,
            self.known_dimensions.num_classes,
        )
        example_event_lists: List[ExampleEventListOccurence] = []

        for example_num, example in enumerate(L):
            event_list = []
            for func_id, cls_id in enumerate(example):
                if (cls_id) > -1:
                    event_id = func_id * self.known_dimensions.num_classes + cls_id
                    event_list.append(event_id)
            example_event_lists.append((ExampleEventListOccurence(event_list)))

        sparse_model = SparseExampleEventListLabelModel()
        sparse_model.fit_from_sparse_example_event_list(
            example_event_list=example_event_lists,
            known_dimensions=self.known_dimensions,
            n_epochs=200,
            lr=0.01,
            seed=123,
        )
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)
        P_lm = label_model.get_conditional_probs()
        P_slm = sparse_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(
            P_slm,
            P_lm,
        )
Esempio n. 3
0
    def test_set_mu_eps(self):
        mu_eps = 0.0123

        # Construct a label matrix such that P(\lambda_1 = 0 | Y) = 0.0, so it will hit
        # the mu_eps floor
        L = np.array([[1, 1, 1], [1, 1, 1]])
        label_model = LabelModel(verbose=False)
        label_model.fit(L, mu_eps=mu_eps)
        self.assertAlmostEqual(label_model.get_conditional_probs()[0, 1, 0], mu_eps)
Esempio n. 4
0
    def test_label_model_basic(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n, self.m,
                                               self.cardinality)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(P, P_lm, decimal=2)

        # Test predicted labels
        score = label_model.score(L, Y)
        self.assertGreaterEqual(score["accuracy"], 0.9)
Esempio n. 5
0
    def test_label_model_basic(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n, self.m,
                                               self.cardinality)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        conditional_probs_err = (
            np.linalg.norm(P.flatten() - P_lm.flatten(), ord=1) / P.size)
        self.assertLessEqual(conditional_probs_err, 0.01)

        # Test predicted labels
        score = label_model.score(L, Y)
        self.assertGreaterEqual(score["accuracy"], 0.9)
Esempio n. 6
0
    def test_sparse_and_regular_make_same_probs(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(
            self.known_dimensions.num_examples,
            self.known_dimensions.num_functions,
            self.known_dimensions.num_classes,
        )
        sparse_event_occurence: List[EventCooccurence] = []
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model._set_constants(L)
        L_shift = L + 1
        label_model_lind = label_model._create_L_ind(L_shift)
        co_oc_matrix = label_model_lind.T @ label_model_lind
        for a_id, cols in enumerate(co_oc_matrix):
            for b_id, freq in enumerate(cols):
                sparse_event_occurence.append(
                    EventCooccurence(a_id, b_id, frequency=freq))

        sparse_model = SparseEventPairLabelModel()

        sparse_model.fit_from_sparse_event_cooccurrence(
            sparse_event_occurence=sparse_event_occurence,
            known_dimensions=self.known_dimensions,
            n_epochs=200,
            lr=0.01,
            seed=123,
        )
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)
        P_lm = label_model.get_conditional_probs()
        P_slm = sparse_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(
            P_slm,
            P_lm,
        )