예제 #1
0
 def test_lf_applier(self) -> None:
     data_points = [SimpleNamespace(num=num) for num in DATA]
     applier = LFApplier([f, g])
     L = applier.apply(data_points, progress_bar=False)
     np.testing.assert_equal(L, L_EXPECTED)
     L = applier.apply(data_points, progress_bar=True)
     np.testing.assert_equal(L, L_EXPECTED)
 def test_lf_applier(self) -> None:
     data_points = [SimpleNamespace(num=num) for num in DATA]
     applier = LFApplier([f, g])
     L = applier.apply(data_points, progress_bar=False)
     np.testing.assert_equal(L, L_EXPECTED)
     L = applier.apply(data_points, progress_bar=True)
     np.testing.assert_equal(L, L_EXPECTED)
     L, meta = applier.apply(data_points, return_meta=True)
     np.testing.assert_equal(L, L_EXPECTED)
     self.assertEqual(meta, ApplierMetadata(dict()))
 def test_lf_applier_fault(self) -> None:
     data_points = [SimpleNamespace(num=num) for num in DATA]
     applier = LFApplier([f, f_bad])
     with self.assertRaises(AttributeError):
         applier.apply(data_points, progress_bar=False)
     L = applier.apply(data_points, progress_bar=False, fault_tolerant=True)
     np.testing.assert_equal(L, L_EXPECTED_BAD)
     L, meta = applier.apply(
         data_points, progress_bar=False, fault_tolerant=True, return_meta=True
     )
     np.testing.assert_equal(L, L_EXPECTED_BAD)
     self.assertEqual(meta, ApplierMetadata(dict(f_bad=5)))
예제 #4
0
def main():
    lfs = [lf_contains_link, lf_contains_co, lf_contains_sub]
    baseApp = LFApplier(lfs)
    labels = baseApp.apply(src)
    print(labels)
    print(LFAnalysis(labels, lfs).lf_summary())
    buckets = get_label_buckets(labels[:, 0], labels[:, 1])
    print(buckets)

    label_model = LabelModel(cardinality=2, verbose=True)
    label_model.fit(labels, n_epochs=500, log_freq=50, seed=123)
    pred_labels = label_model.predict(L=labels, tie_break_policy="abstain")
    print(pred_labels)
    def test_lf_applier_preprocessor_memoized(self) -> None:
        data_points = [SimpleNamespace(num=num) for num in DATA]
        square_hit_tracker = SquareHitTracker()

        @preprocessor(memoize=True)
        def square_memoize(x: DataPoint) -> DataPoint:
            x.num_squared = square_hit_tracker(x.num)
            return x

        @labeling_function(pre=[square_memoize])
        def fp_memoized(x: DataPoint) -> int:
            return 0 if x.num_squared > 42 else -1

        applier = LFApplier([f, fp_memoized])
        L = applier.apply(data_points, progress_bar=False)
        np.testing.assert_equal(L, L_PREPROCESS_EXPECTED)
        self.assertEqual(square_hit_tracker.n_hits, 4)
예제 #6
0
    def _create_model(self, ctx):
        X, y = [], []
        texts = []
        n = None
        for label in sorted(ctx):
            for text, text_orig in sorted(ctx[label]):
                X.append(text)
                texts.append(text_orig)
                y.append(label)
        le = LabelEncoder()
        y_idx = le.fit_transform(y)
        m = make_pipeline(
            TfidfVectorizer(ngram_range=(1, 3),
                            max_features=n,
                            tokenizer=lambda s: s.split(),
                            lowercase=False), LogisticRegression())
        m.fit(X, y_idx)

        voc = {}
        for word, idx in m.steps[0][1].vocabulary_.items():
            voc[idx] = word

        k = 3
        keywords = {}
        for label, weights in zip(le.classes_, m.steps[1][1].coef_):
            i = np.argsort(weights)[-k:]
            kw = [voc[ii] for ii in i]
            keywords[label] = kw

        k = defaultdict(list)
        for label, kws in keywords.items():
            other_kws = set(
                sum((v for l, v in keywords.items() if l != label), []))
            for kw in kws:
                if kw in other_kws:
                    continue
                k[label].append(kw)
        keywords = k

        self.lfs, self.idx_label_map = get_lfs(keywords)
        self.applier = LFApplier(self.lfs)
 def test_lf_applier_numpy(self) -> None:
     X = np.vstack((DATA, DATA)).T
     applier = LFApplier([f_np, g_np])
     L = applier.apply(X, progress_bar=False)
     np.testing.assert_equal(L, L_EXPECTED)
 def test_lf_applier_no_labels(self) -> None:
     data_points = [SimpleNamespace(num=num) for num in DATA]
     applier = LFApplier([h])
     L = applier.apply(data_points, progress_bar=False)
     np.testing.assert_equal(L, -1)