Exemple #1
0
    def test_sparse_and_regular_make_same_objective(self):
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(
            self.known_dimensions.num_examples,
            self.known_dimensions.num_functions,
            self.known_dimensions.num_classes,
        )
        sparse_event_occurence: List[EventCooccurence] = []
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model._set_constants(L)
        L_shift = L + 1
        label_model_lind = label_model._create_L_ind(L_shift)
        co_oc_matrix = label_model_lind.T @ label_model_lind
        for a_id, cols in enumerate(co_oc_matrix):
            for b_id, freq in enumerate(cols):
                sparse_event_occurence.append(
                    EventCooccurence(a_id, b_id, frequency=freq))

        sparse_model = SparseEventPairLabelModel()
        sparse_model._set_constants(known_dimensions=self.known_dimensions)

        sparse_model_objective = sparse_model._prepare_objective_from_sparse_event_cooccurence(
            known_dimensions=self.known_dimensions,
            sparse_event_occurence=sparse_event_occurence,
        )
        self.assertEqual(label_model.n, sparse_model.n)
        self.assertEqual(label_model.m, sparse_model.m)
        self.assertEqual(label_model.cardinality, sparse_model.cardinality)
        label_model._generate_O(L_shift, )
        label_model_O = label_model.O.detach().numpy()
        np.testing.assert_almost_equal(label_model_O, sparse_model_objective)
    def test_augmented_L_construction(self):
        # 5 LFs
        n = 3
        m = 5
        k = 2
        L = np.array([[0, 0, 0, 1, 0], [0, 1, 1, 0, -1], [0, 0, 0, 0, -1]])
        L_shift = L + 1
        lm = LabelModel(cardinality=k, verbose=False)
        lm._set_constants(L_shift)
        lm._create_tree()
        L_aug = lm._get_augmented_label_matrix(L_shift, higher_order=True)

        # Should have 10 columns:
        # - 5 * 2 = 10 for the sources
        self.assertEqual(L_aug.shape, (3, 10))

        # 13 total nonzero entries
        self.assertEqual(L_aug.sum(), 13)

        # Next, check the singleton entries
        for i in range(n):
            for j in range(m):
                if L_shift[i, j] > 0:
                    self.assertEqual(L_aug[i, j * k + L_shift[i, j] - 1], 1)

        # Finally, check the clique entries
        # Singleton clique 1
        self.assertEqual(len(lm.c_tree.node[1]["members"]), 1)
        j = lm.c_tree.node[1]["start_index"]
        self.assertEqual(L_aug[0, j], 1)

        # Singleton clique 2
        self.assertEqual(len(lm.c_tree.node[2]["members"]), 1)
        j = lm.c_tree.node[2]["start_index"]
        self.assertEqual(L_aug[0, j + 1], 0)
 def _set_up_model(self, L: np.ndarray, class_balance: List[float] = [0.5, 0.5]):
     label_model = LabelModel(cardinality=2, verbose=False)
     label_model.train_config = TrainConfig()  # type: ignore
     L_aug = L + 1
     label_model._set_constants(L_aug)
     label_model._create_tree()
     label_model._generate_O(L_aug)
     label_model._build_mask()
     label_model._get_augmented_label_matrix(L_aug)
     label_model._set_class_balance(class_balance=class_balance, Y_dev=None)
     label_model._init_params()
     return label_model
    def test_L_form(self):
        label_model = LabelModel(cardinality=2, verbose=False)
        L = np.array([[-1, 1, -1], [-1, 1, -1], [1, -1, -1], [-1, 1, -1]])
        label_model._set_constants(L)
        self.assertEqual(label_model.n, 4)
        self.assertEqual(label_model.m, 3)

        L = np.array([[-1, 0, 1], [-1, 0, 2], [0, -1, 2], [-1, 0, -1]])
        with self.assertRaisesRegex(ValueError, "L_train has cardinality"):
            label_model.fit(L, n_epochs=1)

        L = np.array([[0, 1], [1, 1], [0, 1]])
        with self.assertRaisesRegex(ValueError, "L_train should have at least 3"):
            label_model.fit(L, n_epochs=1)
Exemple #5
0
    def test_sparse_and_regular_make_same_l_ind_and_o(self):
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(
            self.known_dimensions.num_examples,
            self.known_dimensions.num_functions,
            self.known_dimensions.num_classes,
        )
        example_event_lists: List[ExampleEventListOccurence] = []
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model._set_constants(L)
        L_shift = L + 1
        label_model_lind = label_model._create_L_ind(L_shift)

        for example_num, example in enumerate(L):
            event_list = []
            for func_id, cls_id in enumerate(example):
                if (cls_id) > -1:
                    event_id = func_id * self.known_dimensions.num_classes + cls_id
                    event_list.append(event_id)
            example_event_lists.append((ExampleEventListOccurence(event_list)))

        sparse_model = SparseExampleEventListLabelModel()
        sparse_model._set_constants(known_dimensions=self.known_dimensions)
        sparse_model_lind = sparse_model.get_l_ind(
            known_dimensions=self.known_dimensions,
            example_events_list=example_event_lists,
            return_array=True,
        )
        sparse_model_objective = sparse_model._prepare_objective_from_sparse_example_eventlist(
            known_dimensions=self.known_dimensions,
            example_events_list=example_event_lists,
        )
        np.testing.assert_equal(label_model_lind, sparse_model_lind)
        np.testing.assert_equal(label_model_lind, sparse_model_lind)
        self.assertEqual(label_model.n, sparse_model.n)
        self.assertEqual(label_model.m, sparse_model.m)
        self.assertEqual(label_model.cardinality, sparse_model.cardinality)
        label_model._generate_O(L_shift, )
        label_model_O = label_model.O.detach().numpy()
        np.testing.assert_almost_equal(label_model_O, sparse_model_objective)
Exemple #6
0
    def setUpClass(cls) -> None:
        known_dimensions = KnownDimensions(num_classes=7,
                                           num_examples=1000,
                                           num_functions=10)

        possible_function_values = np.eye(known_dimensions.num_classes)
        choice_set = np.random.choice(
            known_dimensions.num_classes,
            size=[
                known_dimensions.num_functions, known_dimensions.num_examples
            ],
        )
        cooccurence = np.hstack(possible_function_values[choice_set])
        cls.known_dimensions = known_dimensions

        cls.L_ind = cooccurence
        cls.O_counts = cls.L_ind.T @ cls.L_ind
        cls.O = cls.O_counts / cls.known_dimensions.num_examples
        model = LabelModel()
        model._set_constants(known_dimensions=cls.known_dimensions)
        model._create_tree()
        model._generate_O_from_L_aug(cls.L_ind)
        cls.model_O = model.O.detach().numpy()
Exemple #7
0
    def test_sparse_and_regular_make_same_probs(self) -> None:
        """Test the LabelModel's estimate of P and Y on a simple synthetic dataset."""
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(
            self.known_dimensions.num_examples,
            self.known_dimensions.num_functions,
            self.known_dimensions.num_classes,
        )
        sparse_event_occurence: List[EventCooccurence] = []
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model._set_constants(L)
        L_shift = L + 1
        label_model_lind = label_model._create_L_ind(L_shift)
        co_oc_matrix = label_model_lind.T @ label_model_lind
        for a_id, cols in enumerate(co_oc_matrix):
            for b_id, freq in enumerate(cols):
                sparse_event_occurence.append(
                    EventCooccurence(a_id, b_id, frequency=freq))

        sparse_model = SparseEventPairLabelModel()

        sparse_model.fit_from_sparse_event_cooccurrence(
            sparse_event_occurence=sparse_event_occurence,
            known_dimensions=self.known_dimensions,
            n_epochs=200,
            lr=0.01,
            seed=123,
        )
        label_model = LabelModel(cardinality=self.known_dimensions.num_classes)
        label_model.fit(L, n_epochs=200, lr=0.01, seed=123)
        P_lm = label_model.get_conditional_probs()
        P_slm = sparse_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(
            P_slm,
            P_lm,
        )