Ejemplo n.º 1
0
 def apply(self, tasks):
     print('Create regions...')
     random.shuffle(tasks)
     regions = self.create_regions(tasks[:100])
     print(f'Num regions: {len(regions)}')
     L_train = self.applier.apply(regions)
     lfa = LFAnalysis(L=L_train, lfs=self.lfs)
     confl = lfa.lf_conflicts()
     cov = lfa.lf_coverages()
     confli = np.argsort(confl)
     lfs_sorted = [self.lfs[i] for i in confli]
     out = []
     for lf, cf, cv in zip(lfs_sorted, confl[confli], cov[confli]):
         print(lf.name, cf, cv)
         out.append({'lop': lf.name, 'conflict': cf, 'coverage': cv})
     return out
class TestAnalysis(unittest.TestCase):
    def setUp(self) -> None:
        self.lfa = LFAnalysis(np.array(L))
        self.lfa_wo_abstain = LFAnalysis(np.array(L_wo_abstain))
        self.Y = np.array(Y)

    def test_label_coverage(self) -> None:
        self.assertEqual(self.lfa.label_coverage(), 5 / 6)

    def test_label_overlap(self) -> None:
        self.assertEqual(self.lfa.label_overlap(), 4 / 6)

    def test_label_conflict(self) -> None:
        self.assertEqual(self.lfa.label_conflict(), 3 / 6)

    def test_lf_polarities(self) -> None:
        polarities = self.lfa.lf_polarities()
        self.assertEqual(polarities, [[1, 2], [], [0, 2], [2], [0, 1], [0]])

    def test_lf_coverages(self) -> None:
        coverages = self.lfa.lf_coverages()
        coverages_expected = [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6]
        np.testing.assert_array_almost_equal(coverages,
                                             np.array(coverages_expected))

    def test_lf_overlaps(self) -> None:
        overlaps = self.lfa.lf_overlaps(normalize_by_coverage=False)
        overlaps_expected = [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6]
        np.testing.assert_array_almost_equal(overlaps,
                                             np.array(overlaps_expected))

        overlaps = self.lfa.lf_overlaps(normalize_by_coverage=True)
        overlaps_expected = [1, 0, 1, 1 / 2, 1, 1]
        np.testing.assert_array_almost_equal(overlaps,
                                             np.array(overlaps_expected))

    def test_lf_conflicts(self) -> None:
        conflicts = self.lfa.lf_conflicts(normalize_by_overlaps=False)
        conflicts_expected = [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6]
        np.testing.assert_array_almost_equal(conflicts,
                                             np.array(conflicts_expected))

        conflicts = self.lfa.lf_conflicts(normalize_by_overlaps=True)
        conflicts_expected = [1, 0, 2 / 3, 1, 1, 3 / 4]
        np.testing.assert_array_almost_equal(conflicts,
                                             np.array(conflicts_expected))

    def test_lf_empirical_accuracies(self) -> None:
        accs = self.lfa.lf_empirical_accuracies(self.Y)
        accs_expected = [1 / 3, 0, 1 / 3, 1 / 2, 1 / 2, 2 / 4]
        np.testing.assert_array_almost_equal(accs, np.array(accs_expected))

    def test_lf_empirical_probs(self) -> None:
        P_emp = self.lfa.lf_empirical_probs(self.Y, 3)
        P = np.array([
            [[1 / 2, 1, 0], [0, 0, 0], [1 / 2, 0, 1 / 2], [0, 0, 1 / 2]],
            [[1, 1, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
            [[0, 1, 1 / 2], [1 / 2, 0, 1 / 2], [0, 0, 0], [1 / 2, 0, 0]],
            [[1, 1 / 2, 1 / 2], [0, 0, 0], [0, 0, 0], [0, 1 / 2, 1 / 2]],
            [[1 / 2, 1, 1 / 2], [1 / 2, 0, 0], [0, 0, 1 / 2], [0, 0, 0]],
            [[0, 1, 0], [1, 0, 1], [0, 0, 0], [0, 0, 0]],
        ])
        np.testing.assert_array_almost_equal(P, P_emp)

    def test_lf_summary(self) -> None:
        df = self.lfa.lf_summary(self.Y, est_weights=None)
        df_expected = pd.DataFrame({
            "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
            "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
            "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
            "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
            "Correct": [1, 0, 1, 1, 1, 2],
            "Incorrect": [2, 0, 2, 1, 1, 2],
            "Emp. Acc.": [1 / 3, 0, 1 / 3, 1 / 2, 1 / 2, 2 / 4],
        })
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))

        df = self.lfa.lf_summary(Y=None, est_weights=None)
        df_expected = pd.DataFrame({
            "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
            "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
            "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
            "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
        })
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))

        est_weights = [1, 0, 1, 1, 1, 0.5]
        names = list("abcdef")
        lfs = [LabelingFunction(s, f) for s in names]
        lfa = LFAnalysis(np.array(L), lfs)
        df = lfa.lf_summary(self.Y, est_weights=est_weights)
        df_expected = pd.DataFrame({
            "j": [0, 1, 2, 3, 4, 5],
            "Polarity": [[1, 2], [], [0, 2], [2], [0, 1], [0]],
            "Coverage": [3 / 6, 0, 3 / 6, 2 / 6, 2 / 6, 4 / 6],
            "Overlaps": [3 / 6, 0, 3 / 6, 1 / 6, 2 / 6, 4 / 6],
            "Conflicts": [3 / 6, 0, 2 / 6, 1 / 6, 2 / 6, 3 / 6],
            "Correct": [1, 0, 1, 1, 1, 2],
            "Incorrect": [2, 0, 2, 1, 1, 2],
            "Emp. Acc.": [1 / 3, 0, 1 / 3, 1 / 2, 1 / 2, 2 / 4],
            "Learned Weight": [1, 0, 1, 1, 1, 0.5],
        }).set_index(pd.Index(names))
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))

    def test_wrong_number_of_lfs(self) -> None:
        with self.assertRaisesRegex(ValueError, "Number of LFs"):
            LFAnalysis(np.array(L), [LabelingFunction(s, f) for s in "ab"])

    def test_lf_summary_without_abstain(self) -> None:
        df = self.lfa_wo_abstain.lf_summary(self.Y + 4, est_weights=None)
        df_expected = pd.DataFrame({
            "Polarity": [[3, 4, 5], [3, 4], [3, 4, 5], [4, 5], [3, 4, 5], [3]],
            "Coverage": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            "Overlaps": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            "Conflicts": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            "Correct": [1, 1, 1, 3, 1, 0],
            "Incorrect": [5, 5, 5, 3, 5, 6],
            "Emp. Acc.": [1 / 6, 1 / 6, 1 / 6, 3 / 6, 1 / 6, 0],
        })
        pd.testing.assert_frame_equal(df.round(6), df_expected.round(6))