예제 #1
0
        def subroutine(df_dict):
            explorer = get_explorer_class("finder", "text")(df_dict)
            annotator = get_explorer_class("annotator", "text")(df_dict)

            explorer.plot()

            explorer.dfs["raw"] = example_raw_df.copy()
            explorer._update_sources()

            explorer.link_selection("raw", annotator, "raw")
            explorer.link_xy_range(annotator)

            _ = explorer.view()
예제 #2
0
 def test_init(example_raw_df, example_dev_df):
     for _feature in MAIN_FEATURES:
         _cls = get_explorer_class("snorkel", _feature)
         _explorer = _cls({"raw": example_raw_df, "labeled": example_dev_df})
         _explorer.plot()
         _explorer.plot_lf(RANDOM_LABEL_LF, include=("C", "I", "M", "H"))
         _ = _explorer.view()
예제 #3
0
 def test_init(example_margin_df):
     for _feature in MAIN_FEATURES:
         _cls = get_explorer_class("margin", _feature)
         _explorer = _cls({"raw": example_margin_df}, "label_1", "label_2")
         _explorer.plot("A")
         _explorer.plot("B")
         _ = _explorer.view()
예제 #4
0
    def test_labeling(example_raw_df):
        feature = random.choice(MAIN_FEATURES)
        explorer = get_explorer_class("annotator", feature)({"raw": example_raw_df})

        # empty click
        apply_event = ButtonClick(explorer.annotator_apply)
        explorer.annotator_apply._trigger_event(apply_event)

        # test non-cumulative selection
        explorer.sources["raw"].selected.indices = [0]
        explorer._store_selection()
        assert explorer.sources["raw"].selected.indices == [0]
        explorer.sources["raw"].selected.indices = [1]
        explorer._store_selection()
        assert explorer.sources["raw"].selected.indices == [1]

        # test cumulative selection
        explorer.selection_option_box.active = [0]
        explorer.sources["raw"].selected.indices = [0]
        explorer._store_selection()
        assert explorer.sources["raw"].selected.indices == [0, 1]

        # actual labeling
        explorer.annotator_input.value = "A"
        explorer.annotator_apply._trigger_event(apply_event)
예제 #5
0
 def test_init(example_raw_df):
     # test most methods for the class corresponding to each kind of feature
     for _feature in MAIN_FEATURES:
         _cls = get_explorer_class("annotator", _feature)
         _explorer = _cls({"raw": example_raw_df})
         _explorer.plot()
         _ = _explorer.view()
예제 #6
0
 def test_init(example_soft_label_df):
     for _feature in MAIN_FEATURES:
         _cls = get_explorer_class("softlabel", _feature)
         _explorer = _cls(
             {"raw": example_soft_label_df, "train": example_soft_label_df.copy()},
             "pred_label",
             "pred_score",
         )
         _explorer.plot()
         _ = _explorer.view()
예제 #7
0
    def test_filter_text(example_raw_df):
        explorer = get_explorer_class("finder", "text")({"raw": example_raw_df})
        explorer.plot()

        # dynamically construct patterns with predictable outcome
        texts = explorer.dfs["raw"]["text"].tolist()
        first_token_of_ten = set()
        first_token_of_two = set()
        for i, _text in enumerate(texts):
            # terminate when the two sets are different
            if i >= 10 and first_token_of_ten != first_token_of_two:
                assert first_token_of_two.issubset(first_token_of_ten)
                break

            _tokens = _text.split(" ")
            # guard against empty text case
            if not _tokens:
                continue

            # add token to sets
            first_token_of_ten.add(_tokens[0])
            if i < 2:
                first_token_of_two.add(_tokens[0])

        broad_pattern = "(?i)(" + "|".join(first_token_of_ten) + ")"
        narrow_pattern = "(?i)(" + "|".join(first_token_of_two) + ")"
        assert re.search(narrow_pattern, texts[0])
        assert re.search(broad_pattern, texts[2])

        def first_condition():
            explorer.search_pos.value = broad_pattern
            return

        def second_condition():
            explorer.search_neg.value = narrow_pattern
            return

        subroutine_selection_filter(
            explorer,
            explorer.search_filter_box,
            [first_condition, second_condition],
        )
예제 #8
0
    def test_filter_score(example_soft_label_df):
        explorer = get_explorer_class("softlabel", "text")(
            {"raw": example_soft_label_df},
            "pred_label",
            "pred_score",
        )
        explorer.plot()

        def first_condition():
            explorer.score_range.value = (0.3, 0.8)
            return

        def second_condition():
            explorer.score_range.value = (0.5, 0.6)
            return

        subroutine_selection_filter(
            explorer,
            explorer.score_filter_box,
            [first_condition, second_condition],
        )
예제 #9
0
    def test_lf_labeling(example_raw_df, example_dev_df):
        explorer = get_explorer_class("snorkel", "text")(
            {
                "raw": example_raw_df,
                "labeled": example_dev_df,
            }
        )
        explorer.plot()

        # create some dummy rules for predictable outcome
        texts = explorer.dfs["raw"]["text"].tolist()
        first_six_texts = set(texts[:6])
        first_ten_texts = set(texts[:10])

        @labeling_function(targets=["A"])
        def narrow_rule_a(row):
            if row["text"] in first_six_texts:
                return "A"
            return module_config.ABSTAIN_DECODED

        @labeling_function(targets=["A"])
        def broad_rule_a(row):
            if row["text"] in first_ten_texts:
                return "A"
            return module_config.ABSTAIN_DECODED

        @labeling_function(targets=["B"])
        def narrow_rule_b(row):
            if row["text"] in first_six_texts:
                return "B"
            return module_config.ABSTAIN_DECODED

        @labeling_function(targets=["B"])
        def broad_rule_b(row):
            if row["text"] in first_ten_texts:
                return "B"
            return module_config.ABSTAIN_DECODED

        # add two rules, check menu
        explorer.plot_lf(narrow_rule_a)
        explorer.plot_lf(broad_rule_a)
        lf_names_so_far = ["narrow_rule_a", "broad_rule_a"]
        assert explorer.lf_apply_trigger.menu == lf_names_so_far
        assert explorer.lf_filter_trigger.menu == lf_names_so_far

        # add an existing rules, should trigger overwrite
        explorer.plot_lf(narrow_rule_a)
        assert explorer.lf_apply_trigger.menu == lf_names_so_far
        assert explorer.lf_filter_trigger.menu == lf_names_so_far

        # empty click: nothing selected
        _event = MenuItemClick(explorer.lf_filter_trigger, item="broad_rule_a")
        _event = MenuItemClick(explorer.lf_apply_trigger, item="narrow_rule_a")
        explorer.lf_filter_trigger._trigger_event(_event)
        explorer.lf_apply_trigger._trigger_event(_event)

        # emulate selection by user
        # slice to first ten, then assign A to first six
        all_raw_idx = list(range(explorer.dfs["raw"].shape[0]))
        explorer.sources["raw"].selected.indices = all_raw_idx[:]
        # TODO: ideally should select by event, but this is not working
        select_event = almost_global_select(explorer.figure)
        explorer.figure._trigger_event(select_event)
        assert explorer.sources["raw"].selected.indices == all_raw_idx

        # actually triggering LFs on a valid selection
        explorer.lf_filter_trigger._trigger_event(_event)
        explorer.lf_apply_trigger._trigger_event(_event)

        first_six_labels = explorer.dfs["raw"]["label"].iloc[:6].tolist()
        assert first_six_labels == ["A"] * 6

        # add more rules, check menu again
        explorer.plot_lf(narrow_rule_b)
        explorer.plot_lf(broad_rule_b)

        lf_names_so_far = [
            "narrow_rule_a",
            "broad_rule_a",
            "narrow_rule_b",
            "broad_rule_b",
        ]
        assert explorer.lf_apply_trigger.menu == lf_names_so_far
        assert explorer.lf_filter_trigger.menu == lf_names_so_far

        # slice to first ten, then assign B to first six
        explorer.sources["raw"].selected.indices = all_raw_idx[:]
        # TODO: ideally should select by event, but this is not working
        explorer.figure._trigger_event(select_event)
        _event = MenuItemClick(explorer.lf_filter_trigger, item="broad_rule_b")
        explorer.lf_filter_trigger._trigger_event(_event)
        _event = MenuItemClick(explorer.lf_apply_trigger, item="narrow_rule_b")
        explorer.lf_apply_trigger._trigger_event(_event)

        first_six_labels = explorer.dfs["raw"]["label"].iloc[:6].tolist()
        assert first_six_labels == ["B"] * 6
예제 #10
0
 def test_init(example_raw_df):
     for _feature in MAIN_FEATURES:
         _cls = get_explorer_class("finder", _feature)
         _explorer = _cls({"raw": example_raw_df})
         _explorer.plot()
         _ = _explorer.view()