Example #1
0
def get_role_probs(lf_train: pd.DataFrame,
                   filter_abstains: bool = False,
                   lfs: Optional[List[labeling_function]] = None,
                   lf_dev: pd.DataFrame = None,
                   seed: Optional[int] = None,
                   tmp_path: Union[str, Path] = None,
                   use_majority_label_voter=False) -> pd.DataFrame:
    """
    Takes "raw" data frame, builds argument role examples, (trains LabelModel), calculates event_argument_probs
    and returns merged argument role examples with event_argument_probs.
    :param use_majority_label_voter: Whether to use a majority label voter instead of the snorkel label model
    :param seed: Seed for use in label model (mu initialization)
    :param filter_abstains: Filters rows where all labeling functions abstained
    :param lf_train: Training dataset which will be labeled using Snorkel
    :param lfs: List of labeling functions
    :param lf_dev: Optional development dataset that can be used to set a prior for the class balance
    :param tmp_path: Path to temporarily store variables that are shared during random repeats
    :return: Labeled lf_train, labeling function applier, label model
    """
    df_train, L_train = None, None
    df_dev, Y_dev, L_dev = None, None, None
    tmp_train_path, tmp_dev_path = None, None

    # For random repeats try to load pickled variables from first run as they are shared
    if tmp_path:
        tmp_train_path = Path(tmp_path).joinpath("role_train.pkl")
        os.makedirs(os.path.dirname(tmp_train_path), exist_ok=True)
        if tmp_train_path.exists():
            with open(tmp_train_path, 'rb') as pickled_train:
                df_train, L_train = pickle.load(pickled_train)
        if lf_dev is not None:
            tmp_dev_path = Path(tmp_path).joinpath("role_dev.pkl")
            os.makedirs(os.path.dirname(tmp_dev_path), exist_ok=True)
            if tmp_dev_path.exists():
                with open(tmp_dev_path, 'rb') as pickled_dev:
                    df_dev, Y_dev, L_dev = pickle.load(pickled_dev)

    if lfs is None:
        lfs = get_role_list_lfs()
    applier = PandasLFApplier(lfs)

    if L_train is None or df_train is None:
        df_train, _ = build_event_role_examples(lf_train)
        logger.info("Running Event Role Labeling Function Applier")
        L_train = applier.apply(df_train)
        if tmp_path:
            with open(tmp_train_path, 'wb') as pickled_train:
                pickle.dump((df_train, L_train), pickled_train)
    if lf_dev is not None and any(element is None
                                  for element in [df_dev, Y_dev, L_dev]):
        df_dev, Y_dev = build_event_role_examples(lf_dev)
        logger.info("Running Event Role Labeling Function Applier on dev set")
        L_dev = applier.apply(df_dev)
        if tmp_path:
            with open(tmp_dev_path, 'wb') as pickled_dev:
                pickle.dump((df_dev, Y_dev, L_dev), pickled_dev)

    if use_majority_label_voter:
        logger.info(
            "Using MajorityLabelVoter to calculate role class probabilities")
        label_model = MajorityLabelVoter(cardinality=11)
    else:
        label_model = LabelModel(cardinality=11, verbose=True)
        logger.info(
            "Fitting LabelModel on the data and predicting role class probabilities"
        )
        if seed:
            label_model.fit(L_train=L_train,
                            n_epochs=5000,
                            log_freq=500,
                            seed=seed,
                            Y_dev=Y_dev)
        else:
            label_model.fit(L_train=L_train,
                            n_epochs=5000,
                            log_freq=500,
                            Y_dev=Y_dev)

    # Evaluate label model on development data
    if df_dev is not None and Y_dev is not None:
        metrics = ["accuracy", "f1_micro", "f1_macro"]
        logger.info("Evaluate on the dev set")
        label_model_metrics = label_model.score(L=L_dev,
                                                Y=Y_dev,
                                                tie_break_policy="random",
                                                metrics=metrics)
        if use_majority_label_voter:
            logger.info('Role Majority Label Voter Metrics')
        else:
            logger.info('Role Label Model Metrics')
        logger.info(
            f"{'Accuracy:':<25} {label_model_metrics['accuracy'] * 100:.1f}%")
        logger.info(
            f"{'F1 (micro averaged):':<25} {label_model_metrics['f1_micro'] * 100:.1f}%"
        )
        logger.info(
            f"{'F1 (macro averaged):':<25} {label_model_metrics['f1_macro'] * 100:.1f}%"
        )

    event_role_probs = label_model.predict_proba(L_train)

    if filter_abstains:
        df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe(
            X=df_train, y=event_role_probs, L=L_train)

        merged_event_role_examples = merge_event_role_examples(
            df_train_filtered, probs_train_filtered)
    else:
        # Multiplies probabilities of abstains with zero so that the example is treated as padding in the end model
        merged_event_role_examples = merge_event_role_examples(
            df_train, utils.zero_out_abstains(event_role_probs, L_train))
    return merged_event_role_examples
Example #2
0
# This model will ultimately produce a single set of noise-aware training labels, which are probabilistic or confidence-weighted labels. We will then use these labels to train a classifier for our task. For more technical details of this overall approach, see our [NeurIPS 2016](https://arxiv.org/abs/1605.07723) and [AAAI 2019](https://arxiv.org/abs/1810.02840) papers. For more info on the API, see the [`LabelModel` documentation](https://snorkel.readthedocs.io/en/master/packages/_autosummary/labeling/snorkel.labeling.LabelModel.html#snorkel.labeling.LabelModel).
#
# Note that no gold labels are used during the training process.
# The only information we need is the label matrix, which contains the output of the LFs on our training set.
# The `LabelModel` is able to learn weights for the labeling functions using only the label matrix as input.
# We also specify the `cardinality`, or number of classes.
# The `LabelModel` trains much more quickly than typical discriminative models since we only need the label matrix as input.

# %% {"tags": ["md-exclude-output"]}
from snorkel.labeling import LabelModel

label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train=L_train, n_epochs=500, lr=0.001, log_freq=100, seed=123)

# %%
majority_acc = majority_model.score(L=L_valid, Y=Y_valid)["accuracy"]
print(f"{'Majority Vote Accuracy:':<25} {majority_acc * 100:.1f}%")

label_model_acc = label_model.score(L=L_valid, Y=Y_valid)["accuracy"]
print(f"{'Label Model Accuracy:':<25} {label_model_acc * 100:.1f}%")

# %% [markdown]
# So our `LabelModel` improves over the majority vote baseline!
# However, it is typically **not suitable as an inference-time model** to make predictions for unseen data points, due to (among other things) some data points having all abstain labels.
# In the next section, we will use the output of the label model as  training labels to train a
# discriminative classifier to see if we can improve performance further.
# This classifier will only need the text of the comment to make predictions, making it much more suitable
# for inference over unseen comments.
# For more information on the properties of the label model and when to use it, see the [Snorkel guides]().

# %% [markdown]
# We also specify the `cardinality`, or number of classes.
# The `LabelModel` trains much more quickly than typical discriminative models since we only need the label matrix as input.

# %% {"tags": ["md-exclude-output"]}
from snorkel.labeling import LabelModel

label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train=L_train,
                n_epochs=500,
                lr=0.001,
                log_freq=100,
                seed=123)

# %%
majority_acc = majority_model.score(L=L_valid,
                                    Y=Y_valid,
                                    tie_break_policy="random")["accuracy"]
print(f"{'Majority Vote Accuracy:':<25} {majority_acc * 100:.1f}%")

label_model_acc = label_model.score(L=L_valid,
                                    Y=Y_valid,
                                    tie_break_policy="random")["accuracy"]
print(f"{'Label Model Accuracy:':<25} {label_model_acc * 100:.1f}%")

# %% [markdown]
# So our `LabelModel` improves over the majority vote baseline!
# However, it is typically **not suitable as an inference-time model** to make predictions for unseen data points, due to (among other things) some data points having all abstain labels.
# In the next section, we will use the output of the label model as  training labels to train a
# discriminative classifier to see if we can improve performance further.
# This classifier will only need the text of the comment to make predictions, making it much more suitable
# for inference over unseen comments.