def run_epoch(self, sess): train_se = 0.0 prog = Progbar(target=1 + self.train_x.shape[0] / self.config.batch_size) for i, (train_x, train_y, train_sentLen, mask) in enumerate( minibatches(self.train_x, self.train_y, self.train_sentLen, self.train_mask, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y, mask, train_sentLen) train_se += self.evaluate_on_batch(sess, train_x, train_y, mask, train_sentLen) prog.update(i + 1, [("train loss", loss)]) train_obs = self.train_x.shape[0] train_mse = train_se / train_obs print 'Training MSE is {0}'.format(train_mse) print "Evaluating on dev set", dev_se = 0.0 for i, (dev_x, dev_y, dev_sentLen, dev_mask) in enumerate( minibatches(self.dev_x, self.dev_y, self.dev_sentLen, self.dev_mask, self.config.batch_size)): dev_se += self.evaluate_on_batch(sess, dev_x, dev_y, dev_mask, dev_sentLen) dev_obs = self.dev_x.shape[0] dev_mse = dev_se / dev_obs print "- dev MSE: {:.2f}".format(dev_mse) return dev_mse
def run_epoch(self, sess): train_se = 0.0 prog = Progbar(target=1 + self.train_x.shape[0] / self.config.batch_size) for i, (train_x, train_y, train_sentLen, mask) in enumerate( minibatches(self.train_x, self.train_y, self.train_sentLen, self.train_mask, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y, mask, train_sentLen) train_se += self.evaluate_on_batch(sess, train_x, train_y, mask, train_sentLen) prog.update(i + 1, [("train loss", loss)]) train_obs = self.train_x.shape[0] train_mse = train_se / train_obs print 'Training MSE is {0}'.format(train_mse) print "Evaluating on dev set", dev_se = 0.0 for i, (dev_x, dev_y, dev_sentLen, dev_mask) in enumerate( minibatches(self.dev_x, self.dev_y, self.dev_sentLen, self.dev_mask, self.config.batch_size)): dev_se += self.evaluate_on_batch(sess, dev_x, dev_y, dev_mask, dev_sentLen) dev_obs = self.dev_x.shape[0] dev_mse = dev_se / dev_obs print "- dev MSE: {0}".format(dev_mse) print 'Evaluating on test set' test_se = 0.0 test_correct = 0 test_totalPred = 0 for i, (test_x, test_y, test_sentLen, test_mask, test_rat) in enumerate( get_minibatches_test(self.test_x, self.test_y, self.test_sentLen, self.test_mask, self.rationals, self.config.batch_size, False)): se, predCorrect, predTotal = self.run_test_batch( sess, test_x, test_y, test_mask, test_sentLen, test_rat) test_se += se test_correct += predCorrect test_totalPred += predTotal precision = float(predCorrect) / float(predTotal) test_obs = self.test_x.shape[0] test_mse = test_se / test_obs print '- test MSE: {0}'.format(test_mse) print '- test precision: {0}'.format(precision) print '- test predictions count: {0}'.format(test_totalPred) return dev_mse
def train_for_epoch(model, train_data, dev_data, optimizer, loss_func, batch_size): model.train() # Places model in "train" mode, i.e. apply dropout layer n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): optimizer.zero_grad() # remove any baggage in the optimizer loss = 0. # store loss for this batch here train_x = torch.from_numpy(train_x).long() train_y = torch.from_numpy(train_y.nonzero()[1]).long() logits = parser.model.forward(train_x) loss = loss_func(logits, train_y) loss.backward() optimizer.step() prog.update(1) loss_meter.update(loss.item()) print("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set", ) model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size): """ Train the neural dependency parser for single epoch. Note: In PyTorch we can signify train versus test and automatically have the Dropout Layer applied and removed, accordingly, by specifying whether we are training, `model.train()`, or evaluating, `model.eval()` @param parser (Parser): Neural Dependency Parser @param train_data (): @param dev_data (): @param optimizer (nn.Optimizer): Adam Optimizer @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function @param batch_size (int): batch size @param lr (float): learning rate @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data """ parser.model.train( ) # Places model in "train" mode, i.e. apply dropout layer n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): optimizer.zero_grad() # remove any baggage in the optimizer loss = 0. # store loss for this batch here train_x = torch.from_numpy(train_x).long() train_y = torch.from_numpy(train_y.nonzero()[1]).long() ### YOUR CODE HERE (~5-10 lines) ### TODO: ### 1) Run train_x forward through model to produce `logits` ### 2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function. ### This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss ### between softmax(`logits`) and `train_y`. Remember that softmax(`logits`) ### are the predictions (y^ from the PDF). ### 3) Backprop losses ### 4) Take step with the optimizer ### Please see the following docs for support: ### Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step logits = parser.model.forward(train_x) loss = loss_func(logits, train_y) loss.backward() optimizer.step() ### END YOUR CODE prog.update(1) loss_meter.update(loss.item()) print("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set", ) parser.model.eval( ) # Places model in "eval" mode, i.e. don't apply dropout layer dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def run_epoch(self, sess, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / self.config.batch_size) for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def run_epoch(self, sess, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / self.config.batch_size) for i, (train_x, train_y) in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def run_epoch(self, sess, parser, train_examples, dev_set): n_minibatches = 1 + len(train_examples) / self.config.batch_size prog = tf.keras.utils.Progbar(target=n_minibatches) for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) #, force=i + 1 == n_minibatches) print ("Evaluating on dev set") dev_UAS, _ = parser.parse(dev_set) print ("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def train_for_epoch(parser, train_data, dev_data, batch_size): """ Train the neural dependency parser for single epoch. @param parser (Parser): Neural Dependency Parser @param train_data (): @param dev_data (): @param batch_size (int): batch size @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data """ n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): loss = 0. # store loss for this batch here ### YOUR CODE HERE (~11+ Lines) ### 1) Run train_x forward through model to produce outputs ### 2) Calculate the cross-entropy loss ### 3) Backprop losses ### 4) Update the model weights model = parser.model hidden1_output, hidden2_output, y_hat = model.forward(train_x) x_input = model.embedding_lookup(train_x) x_input = np.insert(x_input, x_input.shape[1], 1, axis=1) # cross-entropy loss loss -= np.sum(train_y * np.log(y_hat), axis=1).mean() # Backprop losses outputs_delta = y_hat - train_y output_gradient = np.dot(hidden2_output.T, outputs_delta) hidden2_delta = (np.dot(outputs_delta, model.output_weights[:-1, :].T) * d_relu(np.dot(hidden1_output, model.hidden_weights2))) hidden2_gradient = np.dot(hidden1_output.T, hidden2_delta) hidden1_delta = (np.dot(hidden2_delta, model.hidden_weights2[:-1, :].T) * d_relu(np.dot(x_input, model.hidden_weights1))) hidden1_gradient = np.dot(x_input.T, hidden1_delta) # Update the model weights model.output_weights -= model.lr * output_gradient model.hidden_weights2 -= model.lr * hidden2_gradient model.hidden_weights1 -= model.lr * hidden1_gradient ### END YOUR CODE loss_meter.update(loss) print ("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set",) dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def run_epoch(self, parser, train_examples, dev_set): for i, (train_x, train_y) in enumerate( minibatches(train_examples, self.config.batch_size)): dy.renew_cg() loss = self.train_on_batch(train_x, train_y) loss.forward() loss.backward() self.trainer.update() print "Training Loss: ", loss.value() print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size): """ Train the neural dependency parser for single epoch. Note: In PyTorch we can signify train versus test and automatically have the Dropout Layer applied and removed, accordingly, by specifying whether we are training, `model.train()`, or evaluating, `model.eval()` @param parser (Parser): Neural Dependency Parser @param train_data (): @param dev_data (): @param optimizer (nn.Optimizer): Adam Optimizer @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function @param batch_size (int): batch size @param lr (float): learning rate @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data """ # parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer # dev_UAS, _ = parser.parse(dev_data) parser.model.train() # Places model in "train" mode, i.e. apply dropout layer n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): optimizer.zero_grad() # remove any baggage in the optimizer loss = 0. # store loss for this batch here train_x = torch.from_numpy(train_x).long() train_y = torch.from_numpy(train_y.nonzero()[1]).long() logits = parser.model(train_x) loss = loss_func(logits, train_y) loss.backward() optimizer.step() ### END YOUR CODE prog.update(1) loss_meter.update(loss.item()) print ("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set",) parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def run_epoch(self, sess, parser, train_examples, dev_set): n_minibatches = 1 + len(train_examples) / self.config.batch_size # TODO: check why prog is causing bug # https://www.tensorflow.org/api_docs/python/tf/keras/utils/Progbar prog = tf.keras.utils.Progbar(target=n_minibatches) for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) #, force=i + 1 == n_minibatches) # TODO delete after testing print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def train_for_epoch(parser, train_data, dev_data, batch_size): """ Train the neural dependency parser for single epoch. @param parser (Parser): Neural Dependency Parser @param train_data (): @param dev_data (): @param batch_size (int): batch size @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data """ n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): loss = 0. # store loss for this batch here ### YOUR CODE HERE (~11+ Lines) ### TODO: ### 1) Run train_x forward through model to produce outputs ### 2) Calculate the cross-entropy loss ### 3) Backprop losses ### 4) Update the model weights model = parser.model outputs = model.forward(train_x) predicted_y = outputs[-1] loss = np.sum(train_y * np.log(predicted_y) * -1, axis=1).mean() ### derivative matrix of the output layer delta_out = predicted_y - train_y derivative_out = np.dot(outputs[2].T, delta_out) ### derivative matrix of the 2nd hidden layer delta_2 = np.dot(delta_out, model.u.T) * d_relu(outputs[2]) derivative_2 = np.dot(outputs[1].T, delta_2) ### derivative matrix of the 1st hidden layer delta_1 = np.dot(delta_2, model.w2.T) * d_relu(outputs[1]) derivative_1 = np.dot(outputs[0].T, delta_1) ### update weights in all layers model.u = model.u - model.lr * derivative_out model.w2 = model.w2 - model.lr * derivative_2 model.w1 = model.w1 - model.lr * derivative_1 ### END YOUR CODE loss_meter.update(loss) print("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set", ) dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def train(self, train_data): with tf.Session() as session: pred = self.add_prediction() loss = self.add_loss(pred) train_op = self.add_train(loss) init = tf.global_variables_initializer() session.run(init) for epoch in tqdm(range(10)): total_loss = 0 for i, (train_x, train_y) in enumerate( minibatches(train_data, batch_size=1024)): feed = self.create_feed_dict(train_x, train_y, self.dropout_prob) _, l = session.run([train_op, loss], feed) if i % 500 == 0: print("epoch {}, train loss: {}".format(epoch, l))
def run_epoch(self, optimizer, parser, train_examples, dev_set): losses = [] for i, (train_x, train_y) in tqdm( enumerate(minibatches(train_examples, self.config.batch_size)), total=1 + len(train_examples) / self.config.batch_size): pred = self(train_x) loss = torch.sum(-train_y * F.log_softmax(pred, -1), -1).mean() loss.backward() optimizer.step() losses.append(np.mean(loss.detach().numpy())) print "loss: {:.3f}".format(np.mean(losses)) print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def run_epoch(self, sess, parser, train_examples, dev_set): n_minibatches = 1 + len(train_examples) / self.config.batch_size prog = tf.keras.utils.Progbar(target=n_minibatches) for i, (train_x, train_y) in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) # Revise: delete `force=i + 1 == n_minibatches`. # Refer: # 1. Updated tf.keras: https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/python/keras/utils/generic_utils.py#L188; # 2. numpy.pad: https://docs.scipy.org/doc/numpy/reference/generated/numpy.pad.html. prog.update(i + 1, [("train loss", np.pad(loss, (self.config.batch_size - len(loss), 0), 'constant', constant_values=(0, )) if loss.shape != (self.config.batch_size, ) else loss)]) print("Evaluating on dev set") dev_UAS, _ = parser.parse(dev_set) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size): """ Train the neural dependency parser for single epoch. @parser (Parser): Neural Dependency Parser @train_data (): @dev_data (): @optimizer (nn.Optimizer): Adam Optimizer @loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function @batch_size (int): batch size @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data """ parser.model.train() # In "train" mode n_minibatches = math.ceil(len(train_data) / batch_size) loss_meter = AverageMeter() with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): optimizer.zero_grad() loss = 0.0 # store loss train_x = torch.from_numpy(train_x).long() train_y = torch.from_numpy(train_y.nonzero()[1]).long() logits = parser.model.forward(train_x) loss = loss_func(logits, train_y) loss.backward() optimizer.step() prog.update(1) loss_meter.update(loss.item()) print("Average Train Loss: {}".format(loss_meter.avg)) print("Evaluating on dev set", ) parser.model.eval() # "eval" mode dev_UAS, _ = parser.parse(dev_data) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) return dev_UAS
def run_epoch(model, config, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / config.batch_size) flag = False for i, (train_x, train_y) in enumerate( minibatches(train_examples, config.batch_size)): dy.renew_cg() losses = [] for x, y in zip(train_x, train_y): pred = model.create_network_return_pred(np.array(x).reshape( 1, config.n_features), drop=True) loss = model.create_network_return_loss(pred, y) losses.append(loss) loss = dy.esum(losses) / config.batch_size loss.forward() loss.backward() model.trainer.update() print "Training Loss: ", loss.value() print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS