def score(self, X, y, device=None): """ Uses macro-F1 as the score function. Note: this departs from `sklearn`, where classifiers use accuracy as their scoring function. Using macro-F1 is more consistent with our course. This function can be used to evaluate models, but its primary use is in cross-validation and hyperparameter tuning. Parameters ---------- X: np.array, shape `(n_examples, n_features)` y: iterable, shape `len(n_examples)` These can be the raw labels. They will converted internally as needed. See `build_dataset`. device: str or None Allows the user to temporarily change the device used during prediction. This is useful if predictions require a lot of memory and so are better done on the CPU. After prediction is done, the model is returned to `self.device`. Returns ------- float """ preds = self.predict(X, device=device) return utils.safe_macro_f1(y, preds)
def wordentail_experiment( train_data, assess_data, vector_func, vector_combo_func, model, featurize_func=word_entail_featurize, ): """Train and evaluation code for the word-level entailment task. Parameters ---------- train_data : list assess_data : list vector_func : function Any function mapping words in the vocab for `wordentail_data` to vector representations vector_combo_func : function Any function for combining two vectors into a new vector of fixed dimensionality. model : class with `fit` and `predict` methods featurize_func : function to return feature (X,y) with intended tensor Prints ------ To standard ouput An sklearn classification report for all three splits. Returns ------- dict with structure 'model': the trained model 'train_condition': train_condition 'assess_condition': assess_condition 'macro-F1': score for 'assess_condition' 'vector_func': vector_func 'vector_combo_func': vector_combo_func We pass 'vector_func' and 'vector_combo_func' through to ensure alignment between these experiments and the bake-off evaluation. """ X_train, y_train = featurize_func(train_data, vector_func, vector_combo_func) X_dev, y_dev = featurize_func(assess_data, vector_func, vector_combo_func) model.fit(X_train, y_train) predictions = model.predict(X_dev) # Report: print(classification_report(y_dev, predictions, digits=3)) macrof1 = utils.safe_macro_f1(y_dev, predictions) return { 'model': model, 'train_data': train_data, 'assess_data': assess_data, 'macro-F1': macrof1, 'vector_func': vector_func, 'vector_combo_func': vector_combo_func }
def encoder_experiment(train_data, assess_data, model): """Train and evaluation code for the word-level entailment task. Parameters ---------- train_data : list assess_data : list vector_func : function Any function mapping words in the vocab for `wordentail_data` to vector representations model : class with `fit` and `predict` methods Prints ------ To standard ouput An sklearn classification report for all three splits. Returns ------- dict with structure 'model': the trained model 'train_condition': train_condition 'assess_condition': assess_condition 'macro-F1': score for 'assess_condition' 'vector_func': vector_func We pass 'vector_func' through to ensure alignment between these experiments and the bake-off evaluation. """ model.fit(train_data) predictions = model.predict(assess_data) # Report: y = [] for input, label in assess_data(): y.append(label) print(classification_report(y, predictions)) macrof1 = utils.safe_macro_f1(y, predictions) return { 'model': model, 'train_data': train_data, 'assess_data': assess_data, 'macro-F1': macrof1 }
def test_safe_macro_f1(): y = [1, 1, 2, 2, 1] y_pred = [1, 2, 2, 1, 1] utils.safe_macro_f1(y, y_pred)
def score(self, X, y): preds = self.predict(X) return utils.safe_macro_f1(y, preds)