def test_model_graph_dimensions(digits, attr, layer_index, weight_dim): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(max_iter=1) mod.fit(X_train, y_train) mod_attr_val = getattr(mod, attr) graph_dim = mod.model[layer_index].weight.shape[weight_dim] assert mod_attr_val == graph_dim
def test_hidden_activation_in_graph(digits): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(max_iter=1, hidden_activation=nn.ReLU()) mod.fit(X_train, y_train) mod_hidden_activation = mod.hidden_activation.__class__ graph_activation_class = mod.model[1].__class__ assert mod_hidden_activation == graph_activation_class
def test_predict_functions_honor_device(digits, func): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(max_iter=2) mod.fit(X_train, y_train) prediction_func = getattr(mod, func) with pytest.raises(RuntimeError): prediction_func(X_test, device="FAKE_DEVICE")
def run_sent140_emoji(self): ''' Trained on sent140, fine-tuned on emoji, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model self.model_sent140_emoji = TorchShallowNeuralClassifier() # train combined_train_X = np.vstack((self.sent140_train_X, self.emoji_train_X)) combined_train_Y = self.sent140_train_Y + self.emoji_train_Y self.model_sent140_emoji.fit(combined_train_X, combined_train_Y) # test on sent140 sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X) sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X) else: emoji_test_preds = None return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
def test_simple_example_params(digits, param, expected): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(**{param: expected}) mod.fit(X_train, y_train) preds = mod.predict(X_test) acc = accuracy_score(y_test, preds) if not (param in ["max_iter", "batch_size"] and expected <= 1): assert acc >= 0.86
def test_build_dataset(digits, with_y, expected): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier() if with_y: dataset = mod.build_dataset(X_train, y_train) else: dataset = mod.build_dataset(X_train) result = next(iter(dataset)) assert len(result) == expected
def test_predict_restores_device(digits, func): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(max_iter=2) mod.fit(X_train, y_train) current_device = mod.device assert current_device != torch.device("cpu:0") prediction_func = getattr(mod, func) prediction_func(X_test, device="cpu:0") assert mod.device == current_device
def fit_basic_rnn(X, y, hidden_dim, max_iter, hidden_activation, eta): if hidden_dim is None: hidden_dim = 50 if max_iter is None: max_iter = 100 if hidden_activation is None: hidden_activation = nn.Tanh() if eta is None: eta = 0.01 mod = TorchShallowNeuralClassifier(hidden_dim = hidden_dim, max_iter = max_iter, hidden_activation = hidden_activation, eta = eta) mod.fit(X, y) return mod
def test_wordentail_experiment(wordentail_data, condition): nli.wordentail_experiment( train_data=wordentail_data[condition]['train'], assess_data=wordentail_data[condition]['dev'], vector_func=lambda x: np.ones(10), vector_combo_func=lambda u, v: np.concatenate((u, v)), model=TorchShallowNeuralClassifier(hidden_dim=5, max_iter=1))
def fit_shallow_neural_classifier_with_crossvalidation(X, y): basemod = TorchShallowNeuralClassifier(max_iter=50) cv = 3 param_grid = {'hidden_dim': [25, 50, 100]} best_mod = utils.fit_classifier_with_crossvalidation( X, y, basemod, cv, param_grid) return best_mod
def system_1(): # Data------------ with open(wordentail_filename) as f: wordentail_data = json.load(f) print("Distribution of labels : \n{0}".format( pd.DataFrame( wordentail_data['word_disjoint']['train'])[1].value_counts())) def vec_merge(u, v): """Merge different feature reps including array diff, max, avg etc.""" return np.concatenate((u, v, vec_diff(u, v), vec_max(u, v))) # Model----------- net = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) print(net) # Exp------------- result = nli.wordentail_experiment( train_data=wordentail_data['word_disjoint']['train'], assess_data=wordentail_data['word_disjoint']['dev'], model=net, vector_func=glove_vec, vector_combo_func=vec_merge) return result['macro-F1']
def test_save_load(XOR): X, y = XOR mod = TorchShallowNeuralClassifier(hidden_dim=4, hidden_activation=nn.ReLU(), max_iter=100, eta=0.01) mod.fit(X, y) mod.predict(X) with tempfile.NamedTemporaryFile(mode='wb') as f: name = f.name mod.to_pickle(name) mod2 = TorchShallowNeuralClassifier.from_pickle(name) mod2.predict(X) mod2.fit(X, y)
def test_model(XOR): """Just makes sure that this code will run; it doesn't check that it is creating good models. """ X, y = XOR model = TorchShallowNeuralClassifier(hidden_dim=4, hidden_activation=nn.ReLU(), max_iter=100, eta=0.01) model.fit(X, y) model.predict(X) model.predict_proba(X)
def test_bakeoff_experiment(wordentail_data): word_disjoint_experiment = nli.wordentail_experiment( train_data=wordentail_data['train'], assess_data=wordentail_data['dev'], vector_func=lambda x: np.ones(10), vector_combo_func=lambda u, v: np.concatenate((u, v)), model=TorchShallowNeuralClassifier(hidden_dim=5, max_iter=1)) test_data_filename = os.path.join('data', 'nlidata', 'bakeoff-wordentail-data', 'nli_wordentail_bakeoff_data-test.json') nli.bake_off_evaluation(word_disjoint_experiment, test_data_filename)
def run_sent140(self): ''' Trained on sent140, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model self.model_sent140 = TorchShallowNeuralClassifier() # train self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y) # test on sent140 sent140_train_preds = self.model_sent140.predict(self.sent140_train_X) sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140.predict(self.emoji_test_X) else: emoji_test_preds = None return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
def system_2(): # Data------------ with open(wordentail_filename) as f: wordentail_data = json.load(f) X_glove = pd.DataFrame(GLOVE).T print(X_glove.shape) def convert_edges_to_indices(edges, Q): lookup = dict(zip(Q.index, range(Q.shape[0]))) index_edges = defaultdict(set) for start, finish_nodes in edges.items(): s = lookup.get(start) if s: f = {lookup[n] for n in finish_nodes if n in lookup} if f: index_edges[s] = f return index_edges wn_index_edges = convert_edges_to_indices(wn_edges, X_glove) wn_retro = Retrofitter(verbose=True) X_retro = wn_retro.fit(X_glove, wn_index_edges) print(X_retro.shape) def retro_vec(w): """Return `w`'s Retrofitted representation if available, else return a random vector.""" return X_retro.loc[w].values if w in X_retro.index else randvec( w, n=glove_dim) # Model----------- net = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) print(net) # Exp------------- result = nli.wordentail_experiment( train_data=wordentail_data['word_disjoint']['train'], assess_data=wordentail_data['word_disjoint']['dev'], model=net, vector_func=retro_vec, vector_combo_func=vec_concatenate) return result['macro-F1']
def test_build_dataset_input_dim(digits, early_stopping): X_train, X_test, y_train, y_test = digits mod = TorchShallowNeuralClassifier(early_stopping=early_stopping) dataset = mod.build_dataset(X_train, y_train) assert mod.input_dim == X_train.shape[1]
def fit_nn_classifier(X, y): mod = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) mod.fit(X, y) return mod
def vec_concatenate(u, v): """Concatenate np.array instances `u` and `v` into a new np.array""" return np.concatenate((u, v)) # `vector_combo_func` could instead be vector average, vector difference, etc. (even combinations of those) – there's lots of space for experimentation here; [homework question 2](#Alternatives-to-concatenation-[1-point]) below pushes you to do some exploration. # ### Classifier model # # For a baseline model, I chose `TorchShallowNeuralClassifier`: # In[20]: net = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) # ### Baseline results # # The following puts the above pieces together, using `vector_func=glove_vec`, since `vector_func=randvec` seems so hopelessly misguided for `word_disjoint`! # In[21]: word_disjoint_experiment = nli.wordentail_experiment( train_data=wordentail_data['word_disjoint']['train'], assess_data=wordentail_data['word_disjoint']['dev'], model=net, vector_func=glove_vec, vector_combo_func=vec_concatenate) print("macro-f1: {0}".format(word_disjoint_experiment['macro-F1']))
def test_optimizer_keywords(XOR, expected): X, y = XOR mod = TorchShallowNeuralClassifier(amsgrad=expected) mod.fit(X, y) assert mod.amsgrad == expected assert mod.optimizer.param_groups[0]['amsgrad'] == expected
def fit_hf_shallow_network(X, y): mod = TorchShallowNeuralClassifier( max_iter=100, hidden_dim=300) mod.fit(X, y) return mod
class Shallow_Neural_Classifier: ''' Modified torch shallow neural classifier wrapper class for initial fitting and then fine tuning of weights. ''' def __init__(self, sent140_train_X, sent140_dev_X, sent140_train_Y, sent140_dev_Y, emoji_train_X, emoji_dev_X, emoji_test_X, emoji_train_Y, emoji_dev_Y, emoji_test_Y, emojiless_train_X, emojiless_dev_X, emojiless_test_X, emojiless_train_Y, emojiless_dev_Y, emojiless_test_Y, testing): ''' Pass in initial data for fitting to constructor. Later adding passing logisitic regression parameters into constructor. ''' self.testing = testing self.sent140_train_X = sent140_train_X self.sent140_train_Y = sent140_train_Y self.sent140_dev_X = sent140_dev_X self.sent140_dev_Y = sent140_dev_Y self.emoji_train_X = emoji_train_X self.emoji_train_Y = emoji_train_Y self.emoji_dev_X = emoji_dev_X self.emoji_dev_Y = emoji_dev_Y if self.testing: self.emoji_test_X = emoji_test_X self.emoji_test_Y = emoji_test_Y self.emojiless_train_X = emojiless_train_X self.emojiless_train_Y = emojiless_train_Y self.emojiless_dev_X = emojiless_dev_X self.emojiless_dev_Y = emojiless_dev_Y if self.testing: self.emojiless_test_X = emojiless_test_X self.emojiless_test_Y = emojiless_test_Y # pass in model parameters for to constructor? def run_sent140(self): ''' Trained on sent140, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model self.model_sent140 = TorchShallowNeuralClassifier() # train self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y) # test on sent140 sent140_train_preds = self.model_sent140.predict(self.sent140_train_X) sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140.predict(self.emoji_test_X) else: emoji_test_preds = None return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emojiless(self): ''' Trained on sent140, fine-tuned on emojiless, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model self.model_sent140_emojiless = TorchShallowNeuralClassifier() # train # combine features combined_train_X = np.vstack((self.sent140_train_X, self.emojiless_train_X)) combined_train_Y = self.sent140_train_Y + self.emojiless_train_Y self.model_sent140_emojiless.fit(combined_train_X, combined_train_Y) # test on sent140 sent140_train_preds = self.model_sent140_emojiless.predict(self.sent140_train_X) sent140_dev_preds = self.model_sent140_emojiless.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emojiless.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emojiless.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emojiless.predict(self.emoji_test_X) else: emoji_test_preds = None return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emoji(self): ''' Trained on sent140, fine-tuned on emoji, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model self.model_sent140_emoji = TorchShallowNeuralClassifier() # train combined_train_X = np.vstack((self.sent140_train_X, self.emoji_train_X)) combined_train_Y = self.sent140_train_Y + self.emoji_train_Y self.model_sent140_emoji.fit(combined_train_X, combined_train_Y) # test on sent140 sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X) sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X) else: emoji_test_preds = None return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
def system_3(): # Data------------ with open(wordentail_filename) as f: wordentail_data = json.load(f) x_train = wordentail_data['word_disjoint']['train'] print("Existing distribution of labels : \n{0}".format( pd.DataFrame(x_train)[1].value_counts())) # get wordnet edges def get_wordnet_edges(): edges = defaultdict(set) for ss in wn.all_synsets(): lem_names = {lem.name() for lem in ss.lemmas()} for lem in lem_names: edges[lem] |= lem_names return edges wn_edges = get_wordnet_edges() # data augmentation of positive entailments. positive_entailments = [] for premise_hypothesis, label in x_train: if label == 1: positive_entailments.append(premise_hypothesis) print("Current count of positives: {0}".format( len(positive_entailments))) positive_entailments_ex = [] for premise_hypothesis in positive_entailments: premise = premise_hypothesis[0] hypothesis = premise_hypothesis[1] for wn_premise in wn_edges[premise]: if premise == wn_premise: continue for wn_hypothesis in wn_edges[hypothesis]: if wn_hypothesis == hypothesis: continue positive_entailments_ex.append([wn_premise, wn_hypothesis]) print("New count of positives to add: {0}".format( len(positive_entailments_ex))) x_train.extend([[item, 1] for item in positive_entailments_ex]) print("New distribution of labels : \n{0}".format( pd.DataFrame( wordentail_data['word_disjoint']['train'])[1].value_counts())) # Model----------- net = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) # Exp------------- result = nli.wordentail_experiment( train_data=wordentail_data['word_disjoint']['train'], assess_data=wordentail_data['word_disjoint']['dev'], model=net, vector_func=glove_vec, vector_combo_func=vec_concatenate) return result['macro-F1']
def test_params(param, expected): mod = TorchShallowNeuralClassifier(**{param: expected}) result = getattr(mod, param) assert result == expected
get_ipython().run_line_magic('time', 'X_hf_train = [hugging_face_bert_classifier_phi(tree) for tree in X_hf_tree_train]') # In[24]: get_ipython().run_line_magic('time', 'X_hf_dev = [hugging_face_bert_classifier_phi(tree) for tree in X_hf_tree_dev]') # Now that all the examples are featurized, we can fit a model and evaluate it: # In[25]: hf_mod = TorchShallowNeuralClassifier(max_iter=100, hidden_dim=300) # In[26]: get_ipython().run_line_magic('time', '_ = hf_mod.fit(X_hf_train, y_hf_train)') # In[27]: hf_preds = hf_mod.predict(X_hf_dev) # In[28]: