def test_get_label_buckets(self) -> None:
        y1 = np.array([[2], [1], [3], [1], [1], [3]])
        y2 = np.array([1, 2, 3, 1, 2, 3])
        buckets = get_label_buckets(y1, y2)
        expected_buckets = {(2, 1): [0], (1, 2): [1, 4], (3, 3): [2, 5], (1, 1): [3]}
        expected_buckets = {k: np.array(v) for k, v in expected_buckets.items()}
        np.testing.assert_equal(buckets, expected_buckets)

        y1_1d = np.array([2, 1, 3, 1, 1, 3])
        buckets = get_label_buckets(y1_1d, y2)
        np.testing.assert_equal(buckets, expected_buckets)
def mistakes_df(df, label_model, L_test, y_test):
    """Compute a DataFrame of all the mistakes we've seen."""
    out_dfs = []

    probs_test = label_model.predict_proba(L=L_test)
    preds_test = probs_test >= 0.5

    buckets = get_label_buckets(y_test, L_test[:, 1])
    print(buckets)

    for (actual, predicted) in buckets.keys():

        # Only shot mistakes that we actually voted on
        if actual != predicted:

            actual_name = number_to_name_dict[actual]
            predicted_name = number_to_name_dict[predicted]

            out_dfs.append(
                get_mistakes(df,
                             probs_test,
                             buckets=buckets,
                             labels=(actual, predicted),
                             label_names=(actual_name, predicted_name)))

    if len(out_dfs) > 1:
        return out_dfs[0].append(out_dfs[1:])
    else:
        return out_dfs[0]
Beispiel #3
0
def main():
    lfs = [lf_contains_link, lf_contains_co, lf_contains_sub]
    baseApp = LFApplier(lfs)
    labels = baseApp.apply(src)
    print(labels)
    print(LFAnalysis(labels, lfs).lf_summary())
    buckets = get_label_buckets(labels[:, 0], labels[:, 1])
    print(buckets)

    label_model = LabelModel(cardinality=2, verbose=True)
    label_model.fit(labels, n_epochs=500, log_freq=50, seed=123)
    pred_labels = label_model.predict(L=labels, tie_break_policy="abstain")
    print(pred_labels)
Beispiel #4
0
 def test_get_label_buckets_bad_shape(self) -> None:
     with self.assertRaisesRegex(ValueError, "same number of elements"):
         get_label_buckets(np.array([0, 1, 1]), np.array([1, 1]))
def model_analysis(label_model: LabelModel,
                   training_set: pd.DataFrame,
                   L_train: np.ndarray,
                   L_test: np.ndarray,
                   Y_test: np.ndarray,
                   lfs: list,
                   output_file="output") -> None:
    # TODO: consider using **kwargs instead of this painful list of arguments
    """Output analysis for the label model to a file

    :param label_model: The current label model which we want to output analysis for
    :type label_model: LabelModel
    :param training_set: A dataframe containing the training dataset
    :type training_set: pd.DataFrame
    :param L_train: The matrix of labels generated by the labeling functions on the training data
    :type L_train: np.ndarray
    :param L_test: The matrix of labels generated bt the labeling functions on the testing data
    :type L_test: np.ndarray
    :param Y_test: Gold labels associated with data points in L_test
    :type Y_test: np.ndarray
    :param lfs: List of labeling functions
    :type lfs: list
    :param output_file: A path where the output file should be writtent to, defaults to `PROJECT_ROOT/output`
    :type output_file: str, optional
    """
    Y_train = label_model.predict_proba(L=L_train)
    Y_pred = label_model.predict(L=L_test, tie_break_policy="abstain")
    lf_analysis_train = LFAnalysis(L=L_train, lfs=lfs).lf_summary()

    # TODO: Write this df to a output file. Ask Jennifer about how to handle this
    print(lf_analysis_train)

    # build majority label voter model
    majority_model = MajorityLabelVoter()
    majority_acc = majority_model.score(L=L_test,
                                        Y=Y_test,
                                        tie_break_policy="abstain",
                                        metrics=["f1", "accuracy"])
    label_model_acc = label_model.score(L=L_test,
                                        Y=Y_test,
                                        tie_break_policy="abstain",
                                        metrics=["f1", "accuracy"])

    # get precision and recall scores
    p_score = precision_score(y_true=Y_test, y_pred=Y_pred, average='weighted')
    r_score = recall_score(y_true=Y_test,
                           y_pred=Y_pred,
                           average='weighted',
                           labels=np.unique(Y_pred))

    # how many documents abstained
    probs_train = majority_model.predict_proba(L=L_train)
    df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe(
        X=training_set, y=probs_train, L=L_train)

    # get number of false positives
    buckets = get_label_buckets(Y_test, Y_pred)
    true_positives, false_positives, true_negatives, false_negatives = (
        buckets.get((1, 1)), buckets.get((1, 0)), buckets.get(
            (0, 0)), buckets.get((0, 1)))
    # write analysis to file
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    with open(f"{'../output/logs/'}{output_file}_run_{timestamp}.txt",
              "w") as output_file:
        output_file.write(
            f"{'Majority Vote Accuracy:':<25} {majority_acc['accuracy'] * 100:.2f}%"
        )
        output_file.write(
            f"\n{'Majority Vote F1 Score:':<25} {majority_acc['f1'] * 100:.2f}%"
        )
        output_file.write(
            f"\n{'Label Model Accuracy:':<25} {label_model_acc['accuracy'] * 100:.2f}%"
        )
        output_file.write(
            f"\n{'Label Model F1 Score:':<25} {label_model_acc['f1'] * 100:.2f}%"
        )
        output_file.write(f"\n{'Precision Score:':<25} {p_score * 100:.2f}%")
        output_file.write(f"\n{'Recall Score:':<25} {r_score * 100:.2f}%")
        output_file.write(
            f"\n{'Abstained Data Points:':<25} {len(df_train_filtered)}")
        output_file.write(
            f"\n{'True Positives:':<25} {len(true_positives) if true_positives is not None else 0}"
        )
        output_file.write(
            f"\n{'False Positives:':<25} {len(false_positives) if false_positives is not None else 0}"
        )
        output_file.write(
            f"\n{'False Negatives:':<25} {len(false_negatives) if false_negatives is not None else 0}"
        )
        output_file.write(
            f"\n{'True Negatives:':<25} {len(true_negatives) if true_negatives is not None else 0}"
        )
        output_file.write(
            f"\n{'Abstained Positives:':<25} {len(buckets[(1, -1)])}")
        output_file.write(
            f"\n{'Abstained Negatives:':<25} {len(buckets[(0, -1)])}")
Beispiel #6
0
# %%
LFAnalysis(L=L_dev, lfs=lfs).lf_summary(Y=Y_dev)

# %% [markdown]
# So even these very simple rules do quite well!
# We might want to pick the `check` rule, since both have high precision and `check` has higher coverage.
# But let's look at our data to be sure.
#
# The helper method `get_label_buckets(...)` groups data points by their predicted label and true label.
# For example, we can find the indices of data points that the LF labeled `SPAM` that actually belong to class `HAM`.
# This may give ideas for where the LF could be made more specific.

# %%
from snorkel.analysis import get_label_buckets

buckets = get_label_buckets(Y_dev, L_dev[:, 1])
df_dev.iloc[buckets[(HAM, SPAM)]]

# %% [markdown]
# There's only one row here because `check` produced only one false positive on the `dev` set.
# Now let's take a look at 10 random `train` set data points where `check` labeled `SPAM` to see if it matches our intuition or if we can identify some false positives.

# %%
df_train.iloc[L_train[:, 1] == SPAM].sample(10, random_state=1)

# %% [markdown]
# No clear false positives here, but many look like they could be labeled by `check_out` as well.
# Let's see 10 data points where `check_out` abstained, but `check` labeled.

# %%
buckets = get_label_buckets(L_train[:, 0], L_train[:, 1])
# %% [markdown]
# We might want to pick the `check` rule, since `check` has higher coverage. Let's take a look at 10 random `train` set data points where `check` labeled `SPAM` to see if it matches our intuition or if we can identify some false positives.

# %%
df_train.iloc[L_train[:, 1] == SPAM].sample(10, random_state=1)

# %% [markdown]
# No clear false positives here, but many look like they could be labeled by `check_out` as well.
#
# Let's see 10 data points where `check_out` abstained, but `check` labeled. We can use the`get_label_buckets(...)` to group data points by their predicted label and/or true labels.

# %%
from snorkel.analysis import get_label_buckets

buckets = get_label_buckets(L_train[:, 0], L_train[:, 1])
df_train.iloc[buckets[(ABSTAIN, SPAM)]].sample(10, random_state=1)

# %% [markdown]
# Most of these seem like small modifications of "check out", like "check me out" or "check it out".
# Can we get the best of both worlds?

# %% [markdown]
# ### d) Balance accuracy and coverage

# %% [markdown]
# Let's see if we can use regular expressions to account for modifications of "check out" and get the coverage of `check` plus the accuracy of `check_out`.

# %%
import re