Ejemplo n.º 1
0
    def score(self, X, y, device=None):
        """
        Uses macro-F1 as the score function. Note: this departs from
        `sklearn`, where classifiers use accuracy as their scoring
        function. Using macro-F1 is more consistent with our course.

        This function can be used to evaluate models, but its primary
        use is in cross-validation and hyperparameter tuning.

        Parameters
        ----------
        X: np.array, shape `(n_examples, n_features)`

        y: iterable, shape `len(n_examples)`
            These can be the raw labels. They will converted internally
            as needed. See `build_dataset`.

        device: str or None
            Allows the user to temporarily change the device used
            during prediction. This is useful if predictions require a
            lot of memory and so are better done on the CPU. After
            prediction is done, the model is returned to `self.device`.

        Returns
        -------
        float

        """
        preds = self.predict(X, device=device)
        return utils.safe_macro_f1(y, preds)
def wordentail_experiment(
    train_data,
    assess_data,
    vector_func,
    vector_combo_func,
    model,
    featurize_func=word_entail_featurize,
):
    """Train and evaluation code for the word-level entailment task.

    Parameters
    ----------
    train_data : list
    assess_data : list
    vector_func : function
        Any function mapping words in the vocab for `wordentail_data`
        to vector representations
    vector_combo_func : function
        Any function for combining two vectors into a new vector
        of fixed dimensionality.
    model : class with `fit` and `predict` methods
    featurize_func : function to return feature (X,y) with intended tensor

    Prints
    ------
    To standard ouput
        An sklearn classification report for all three splits.

    Returns
    -------
    dict with structure

        'model': the trained model
        'train_condition': train_condition
        'assess_condition': assess_condition
        'macro-F1': score for 'assess_condition'
        'vector_func': vector_func
        'vector_combo_func': vector_combo_func

    We pass 'vector_func' and 'vector_combo_func' through to ensure alignment
    between these experiments and the bake-off evaluation.

    """
    X_train, y_train = featurize_func(train_data, vector_func,
                                      vector_combo_func)
    X_dev, y_dev = featurize_func(assess_data, vector_func, vector_combo_func)
    model.fit(X_train, y_train)
    predictions = model.predict(X_dev)
    # Report:
    print(classification_report(y_dev, predictions, digits=3))
    macrof1 = utils.safe_macro_f1(y_dev, predictions)
    return {
        'model': model,
        'train_data': train_data,
        'assess_data': assess_data,
        'macro-F1': macrof1,
        'vector_func': vector_func,
        'vector_combo_func': vector_combo_func
    }
Ejemplo n.º 3
0
def encoder_experiment(train_data, assess_data, model):
    """Train and evaluation code for the word-level entailment task.

    Parameters
    ----------
    train_data : list
    assess_data : list
    vector_func : function
        Any function mapping words in the vocab for `wordentail_data`
        to vector representations
    model : class with `fit` and `predict` methods

    Prints
    ------
    To standard ouput
        An sklearn classification report for all three splits.

    Returns
    -------
    dict with structure

        'model': the trained model
        'train_condition': train_condition
        'assess_condition': assess_condition
        'macro-F1': score for 'assess_condition'
        'vector_func': vector_func

    We pass 'vector_func' through to ensure alignment
    between these experiments and the bake-off evaluation.

    """
    model.fit(train_data)
    predictions = model.predict(assess_data)
    # Report:
    y = []
    for input, label in assess_data():
        y.append(label)
    print(classification_report(y, predictions))
    macrof1 = utils.safe_macro_f1(y, predictions)
    return {
        'model': model,
        'train_data': train_data,
        'assess_data': assess_data,
        'macro-F1': macrof1
    }
Ejemplo n.º 4
0
def test_safe_macro_f1():
    y = [1, 1, 2, 2, 1]
    y_pred = [1, 2, 2, 1, 1]
    utils.safe_macro_f1(y, y_pred)
Ejemplo n.º 5
0
 def score(self, X, y):
     preds = self.predict(X)
     return utils.safe_macro_f1(y, preds)