def get_role_probs(lf_train: pd.DataFrame, filter_abstains: bool = False, lfs: Optional[List[labeling_function]] = None, lf_dev: pd.DataFrame = None, seed: Optional[int] = None, tmp_path: Union[str, Path] = None, use_majority_label_voter=False) -> pd.DataFrame: """ Takes "raw" data frame, builds argument role examples, (trains LabelModel), calculates event_argument_probs and returns merged argument role examples with event_argument_probs. :param use_majority_label_voter: Whether to use a majority label voter instead of the snorkel label model :param seed: Seed for use in label model (mu initialization) :param filter_abstains: Filters rows where all labeling functions abstained :param lf_train: Training dataset which will be labeled using Snorkel :param lfs: List of labeling functions :param lf_dev: Optional development dataset that can be used to set a prior for the class balance :param tmp_path: Path to temporarily store variables that are shared during random repeats :return: Labeled lf_train, labeling function applier, label model """ df_train, L_train = None, None df_dev, Y_dev, L_dev = None, None, None tmp_train_path, tmp_dev_path = None, None # For random repeats try to load pickled variables from first run as they are shared if tmp_path: tmp_train_path = Path(tmp_path).joinpath("role_train.pkl") os.makedirs(os.path.dirname(tmp_train_path), exist_ok=True) if tmp_train_path.exists(): with open(tmp_train_path, 'rb') as pickled_train: df_train, L_train = pickle.load(pickled_train) if lf_dev is not None: tmp_dev_path = Path(tmp_path).joinpath("role_dev.pkl") os.makedirs(os.path.dirname(tmp_dev_path), exist_ok=True) if tmp_dev_path.exists(): with open(tmp_dev_path, 'rb') as pickled_dev: df_dev, Y_dev, L_dev = pickle.load(pickled_dev) if lfs is None: lfs = get_role_list_lfs() applier = PandasLFApplier(lfs) if L_train is None or df_train is None: df_train, _ = build_event_role_examples(lf_train) logger.info("Running Event Role Labeling Function Applier") L_train = applier.apply(df_train) if tmp_path: with open(tmp_train_path, 'wb') as pickled_train: pickle.dump((df_train, L_train), pickled_train) if lf_dev is not None and any(element is None for element in [df_dev, Y_dev, L_dev]): df_dev, Y_dev = build_event_role_examples(lf_dev) logger.info("Running Event Role Labeling Function Applier on dev set") L_dev = applier.apply(df_dev) if tmp_path: with open(tmp_dev_path, 'wb') as pickled_dev: pickle.dump((df_dev, Y_dev, L_dev), pickled_dev) if use_majority_label_voter: logger.info( "Using MajorityLabelVoter to calculate role class probabilities") label_model = MajorityLabelVoter(cardinality=11) else: label_model = LabelModel(cardinality=11, verbose=True) logger.info( "Fitting LabelModel on the data and predicting role class probabilities" ) if seed: label_model.fit(L_train=L_train, n_epochs=5000, log_freq=500, seed=seed, Y_dev=Y_dev) else: label_model.fit(L_train=L_train, n_epochs=5000, log_freq=500, Y_dev=Y_dev) # Evaluate label model on development data if df_dev is not None and Y_dev is not None: metrics = ["accuracy", "f1_micro", "f1_macro"] logger.info("Evaluate on the dev set") label_model_metrics = label_model.score(L=L_dev, Y=Y_dev, tie_break_policy="random", metrics=metrics) if use_majority_label_voter: logger.info('Role Majority Label Voter Metrics') else: logger.info('Role Label Model Metrics') logger.info( f"{'Accuracy:':<25} {label_model_metrics['accuracy'] * 100:.1f}%") logger.info( f"{'F1 (micro averaged):':<25} {label_model_metrics['f1_micro'] * 100:.1f}%" ) logger.info( f"{'F1 (macro averaged):':<25} {label_model_metrics['f1_macro'] * 100:.1f}%" ) event_role_probs = label_model.predict_proba(L_train) if filter_abstains: df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe( X=df_train, y=event_role_probs, L=L_train) merged_event_role_examples = merge_event_role_examples( df_train_filtered, probs_train_filtered) else: # Multiplies probabilities of abstains with zero so that the example is treated as padding in the end model merged_event_role_examples = merge_event_role_examples( df_train, utils.zero_out_abstains(event_role_probs, L_train)) return merged_event_role_examples
# This model will ultimately produce a single set of noise-aware training labels, which are probabilistic or confidence-weighted labels. We will then use these labels to train a classifier for our task. For more technical details of this overall approach, see our [NeurIPS 2016](https://arxiv.org/abs/1605.07723) and [AAAI 2019](https://arxiv.org/abs/1810.02840) papers. For more info on the API, see the [`LabelModel` documentation](https://snorkel.readthedocs.io/en/master/packages/_autosummary/labeling/snorkel.labeling.LabelModel.html#snorkel.labeling.LabelModel). # # Note that no gold labels are used during the training process. # The only information we need is the label matrix, which contains the output of the LFs on our training set. # The `LabelModel` is able to learn weights for the labeling functions using only the label matrix as input. # We also specify the `cardinality`, or number of classes. # The `LabelModel` trains much more quickly than typical discriminative models since we only need the label matrix as input. # %% {"tags": ["md-exclude-output"]} from snorkel.labeling import LabelModel label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train=L_train, n_epochs=500, lr=0.001, log_freq=100, seed=123) # %% majority_acc = majority_model.score(L=L_valid, Y=Y_valid)["accuracy"] print(f"{'Majority Vote Accuracy:':<25} {majority_acc * 100:.1f}%") label_model_acc = label_model.score(L=L_valid, Y=Y_valid)["accuracy"] print(f"{'Label Model Accuracy:':<25} {label_model_acc * 100:.1f}%") # %% [markdown] # So our `LabelModel` improves over the majority vote baseline! # However, it is typically **not suitable as an inference-time model** to make predictions for unseen data points, due to (among other things) some data points having all abstain labels. # In the next section, we will use the output of the label model as training labels to train a # discriminative classifier to see if we can improve performance further. # This classifier will only need the text of the comment to make predictions, making it much more suitable # for inference over unseen comments. # For more information on the properties of the label model and when to use it, see the [Snorkel guides](). # %% [markdown]
# We also specify the `cardinality`, or number of classes. # The `LabelModel` trains much more quickly than typical discriminative models since we only need the label matrix as input. # %% {"tags": ["md-exclude-output"]} from snorkel.labeling import LabelModel label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train=L_train, n_epochs=500, lr=0.001, log_freq=100, seed=123) # %% majority_acc = majority_model.score(L=L_valid, Y=Y_valid, tie_break_policy="random")["accuracy"] print(f"{'Majority Vote Accuracy:':<25} {majority_acc * 100:.1f}%") label_model_acc = label_model.score(L=L_valid, Y=Y_valid, tie_break_policy="random")["accuracy"] print(f"{'Label Model Accuracy:':<25} {label_model_acc * 100:.1f}%") # %% [markdown] # So our `LabelModel` improves over the majority vote baseline! # However, it is typically **not suitable as an inference-time model** to make predictions for unseen data points, due to (among other things) some data points having all abstain labels. # In the next section, we will use the output of the label model as training labels to train a # discriminative classifier to see if we can improve performance further. # This classifier will only need the text of the comment to make predictions, making it much more suitable # for inference over unseen comments.