def test_sparse_and_regular_make_same_objective(self): np.random.seed(123) P, Y, L = generate_simple_label_matrix( self.known_dimensions.num_examples, self.known_dimensions.num_functions, self.known_dimensions.num_classes, ) sparse_event_occurence: List[EventCooccurence] = [] label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model._set_constants(L) L_shift = L + 1 label_model_lind = label_model._create_L_ind(L_shift) co_oc_matrix = label_model_lind.T @ label_model_lind for a_id, cols in enumerate(co_oc_matrix): for b_id, freq in enumerate(cols): sparse_event_occurence.append( EventCooccurence(a_id, b_id, frequency=freq)) sparse_model = SparseEventPairLabelModel() sparse_model._set_constants(known_dimensions=self.known_dimensions) sparse_model_objective = sparse_model._prepare_objective_from_sparse_event_cooccurence( known_dimensions=self.known_dimensions, sparse_event_occurence=sparse_event_occurence, ) self.assertEqual(label_model.n, sparse_model.n) self.assertEqual(label_model.m, sparse_model.m) self.assertEqual(label_model.cardinality, sparse_model.cardinality) label_model._generate_O(L_shift, ) label_model_O = label_model.O.detach().numpy() np.testing.assert_almost_equal(label_model_O, sparse_model_objective)
def test_augmented_L_construction(self): # 5 LFs n = 3 m = 5 k = 2 L = np.array([[0, 0, 0, 1, 0], [0, 1, 1, 0, -1], [0, 0, 0, 0, -1]]) L_shift = L + 1 lm = LabelModel(cardinality=k, verbose=False) lm._set_constants(L_shift) lm._create_tree() L_aug = lm._get_augmented_label_matrix(L_shift, higher_order=True) # Should have 10 columns: # - 5 * 2 = 10 for the sources self.assertEqual(L_aug.shape, (3, 10)) # 13 total nonzero entries self.assertEqual(L_aug.sum(), 13) # Next, check the singleton entries for i in range(n): for j in range(m): if L_shift[i, j] > 0: self.assertEqual(L_aug[i, j * k + L_shift[i, j] - 1], 1) # Finally, check the clique entries # Singleton clique 1 self.assertEqual(len(lm.c_tree.node[1]["members"]), 1) j = lm.c_tree.node[1]["start_index"] self.assertEqual(L_aug[0, j], 1) # Singleton clique 2 self.assertEqual(len(lm.c_tree.node[2]["members"]), 1) j = lm.c_tree.node[2]["start_index"] self.assertEqual(L_aug[0, j + 1], 0)
def _set_up_model(self, L: np.ndarray, class_balance: List[float] = [0.5, 0.5]): label_model = LabelModel(cardinality=2, verbose=False) label_model.train_config = TrainConfig() # type: ignore L_aug = L + 1 label_model._set_constants(L_aug) label_model._create_tree() label_model._generate_O(L_aug) label_model._build_mask() label_model._get_augmented_label_matrix(L_aug) label_model._set_class_balance(class_balance=class_balance, Y_dev=None) label_model._init_params() return label_model
def test_L_form(self): label_model = LabelModel(cardinality=2, verbose=False) L = np.array([[-1, 1, -1], [-1, 1, -1], [1, -1, -1], [-1, 1, -1]]) label_model._set_constants(L) self.assertEqual(label_model.n, 4) self.assertEqual(label_model.m, 3) L = np.array([[-1, 0, 1], [-1, 0, 2], [0, -1, 2], [-1, 0, -1]]) with self.assertRaisesRegex(ValueError, "L_train has cardinality"): label_model.fit(L, n_epochs=1) L = np.array([[0, 1], [1, 1], [0, 1]]) with self.assertRaisesRegex(ValueError, "L_train should have at least 3"): label_model.fit(L, n_epochs=1)
def test_sparse_and_regular_make_same_l_ind_and_o(self): np.random.seed(123) P, Y, L = generate_simple_label_matrix( self.known_dimensions.num_examples, self.known_dimensions.num_functions, self.known_dimensions.num_classes, ) example_event_lists: List[ExampleEventListOccurence] = [] label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model._set_constants(L) L_shift = L + 1 label_model_lind = label_model._create_L_ind(L_shift) for example_num, example in enumerate(L): event_list = [] for func_id, cls_id in enumerate(example): if (cls_id) > -1: event_id = func_id * self.known_dimensions.num_classes + cls_id event_list.append(event_id) example_event_lists.append((ExampleEventListOccurence(event_list))) sparse_model = SparseExampleEventListLabelModel() sparse_model._set_constants(known_dimensions=self.known_dimensions) sparse_model_lind = sparse_model.get_l_ind( known_dimensions=self.known_dimensions, example_events_list=example_event_lists, return_array=True, ) sparse_model_objective = sparse_model._prepare_objective_from_sparse_example_eventlist( known_dimensions=self.known_dimensions, example_events_list=example_event_lists, ) np.testing.assert_equal(label_model_lind, sparse_model_lind) np.testing.assert_equal(label_model_lind, sparse_model_lind) self.assertEqual(label_model.n, sparse_model.n) self.assertEqual(label_model.m, sparse_model.m) self.assertEqual(label_model.cardinality, sparse_model.cardinality) label_model._generate_O(L_shift, ) label_model_O = label_model.O.detach().numpy() np.testing.assert_almost_equal(label_model_O, sparse_model_objective)
def setUpClass(cls) -> None: known_dimensions = KnownDimensions(num_classes=7, num_examples=1000, num_functions=10) possible_function_values = np.eye(known_dimensions.num_classes) choice_set = np.random.choice( known_dimensions.num_classes, size=[ known_dimensions.num_functions, known_dimensions.num_examples ], ) cooccurence = np.hstack(possible_function_values[choice_set]) cls.known_dimensions = known_dimensions cls.L_ind = cooccurence cls.O_counts = cls.L_ind.T @ cls.L_ind cls.O = cls.O_counts / cls.known_dimensions.num_examples model = LabelModel() model._set_constants(known_dimensions=cls.known_dimensions) model._create_tree() model._generate_O_from_L_aug(cls.L_ind) cls.model_O = model.O.detach().numpy()
def test_sparse_and_regular_make_same_probs(self) -> None: """Test the LabelModel's estimate of P and Y on a simple synthetic dataset.""" np.random.seed(123) P, Y, L = generate_simple_label_matrix( self.known_dimensions.num_examples, self.known_dimensions.num_functions, self.known_dimensions.num_classes, ) sparse_event_occurence: List[EventCooccurence] = [] label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model._set_constants(L) L_shift = L + 1 label_model_lind = label_model._create_L_ind(L_shift) co_oc_matrix = label_model_lind.T @ label_model_lind for a_id, cols in enumerate(co_oc_matrix): for b_id, freq in enumerate(cols): sparse_event_occurence.append( EventCooccurence(a_id, b_id, frequency=freq)) sparse_model = SparseEventPairLabelModel() sparse_model.fit_from_sparse_event_cooccurrence( sparse_event_occurence=sparse_event_occurence, known_dimensions=self.known_dimensions, n_epochs=200, lr=0.01, seed=123, ) label_model = LabelModel(cardinality=self.known_dimensions.num_classes) label_model.fit(L, n_epochs=200, lr=0.01, seed=123) P_lm = label_model.get_conditional_probs() P_slm = sparse_model.get_conditional_probs() np.testing.assert_array_almost_equal( P_slm, P_lm, )