def run_epoch(self, sess): train_se = 0.0 prog = Progbar(target=1 + self.train_x.shape[0] / self.config.batch_size) for i, (train_x, train_y, train_sentLen, mask) in enumerate( minibatches(self.train_x, self.train_y, self.train_sentLen, self.train_mask, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y, mask, train_sentLen) train_se += self.evaluate_on_batch(sess, train_x, train_y, mask, train_sentLen) prog.update(i + 1, [("train loss", loss)]) train_obs = self.train_x.shape[0] train_mse = train_se / train_obs print 'Training MSE is {0}'.format(train_mse) print "Evaluating on dev set", dev_se = 0.0 for i, (dev_x, dev_y, dev_sentLen, dev_mask) in enumerate( minibatches(self.dev_x, self.dev_y, self.dev_sentLen, self.dev_mask, self.config.batch_size)): dev_se += self.evaluate_on_batch(sess, dev_x, dev_y, dev_mask, dev_sentLen) dev_obs = self.dev_x.shape[0] dev_mse = dev_se / dev_obs print "- dev MSE: {:.2f}".format(dev_mse) return dev_mse
def run_train_epoch(self, sess, train_inputs, train_labels): # Iterate through the train inputs, and train the weights prog = Progbar(target=1 + len(train_labels) / self.config.batch_size) iterator = get_minibatches([train_inputs, train_labels], self.config.batch_size) for i, (train_x, train_y) in enumerate(iterator): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)])
def run_epoch(self, sess, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / self.config.batch_size) for i, (train_x, train_y) in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def run_epoch(self, sess, config, dataset, train_writer, merged):#按批次运行 prog = Progbar(target=1 + len(dataset.train_inputs[0]) / config.batch_size) for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets], config.batch_size, is_multi_feature_input=True)): # print "input, outout: {}, {}".format(np.array(train_x).shape, np.array(train_y).shape) summary, loss = self.train_on_batch(sess, train_x, train_y, merged)#训练主函数 prog.update(i + 1, [("train loss", loss)]) # train_writer.add_summary(summary, global_step=i) return summary, loss # Last batch
def run_epoch(self, sess, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / self.config.batch_size) for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def run_epoch(self, sess): train_se = 0.0 prog = Progbar(target=1 + self.train_x.shape[0] / self.config.batch_size) for i, (train_x, train_y, train_sentLen, mask) in enumerate( minibatches(self.train_x, self.train_y, self.train_sentLen, self.train_mask, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y, mask, train_sentLen) train_se += self.evaluate_on_batch(sess, train_x, train_y, mask, train_sentLen) prog.update(i + 1, [("train loss", loss)]) train_obs = self.train_x.shape[0] train_mse = train_se / train_obs print 'Training MSE is {0}'.format(train_mse) print "Evaluating on dev set", dev_se = 0.0 for i, (dev_x, dev_y, dev_sentLen, dev_mask) in enumerate( minibatches(self.dev_x, self.dev_y, self.dev_sentLen, self.dev_mask, self.config.batch_size)): dev_se += self.evaluate_on_batch(sess, dev_x, dev_y, dev_mask, dev_sentLen) dev_obs = self.dev_x.shape[0] dev_mse = dev_se / dev_obs print "- dev MSE: {0}".format(dev_mse) print 'Evaluating on test set' test_se = 0.0 test_correct = 0 test_totalPred = 0 for i, (test_x, test_y, test_sentLen, test_mask, test_rat) in enumerate( get_minibatches_test(self.test_x, self.test_y, self.test_sentLen, self.test_mask, self.rationals, self.config.batch_size, False)): se, predCorrect, predTotal = self.run_test_batch( sess, test_x, test_y, test_mask, test_sentLen, test_rat) test_se += se test_correct += predCorrect test_totalPred += predTotal precision = float(predCorrect) / float(predTotal) test_obs = self.test_x.shape[0] test_mse = test_se / test_obs print '- test MSE: {0}'.format(test_mse) print '- test precision: {0}'.format(precision) print '- test predictions count: {0}'.format(test_totalPred) return dev_mse
def run_dev_epoch(self, sess, dev_inputs, dev_labels): # Iterate through the dev inputs and print the accuracy prf = [] print "Evaluating on dev set" prog = Progbar(target=1 + len(dev_labels) / self.config.batch_size) iterator = get_minibatches([dev_inputs, dev_labels], self.config.batch_size) for i, (train_x, train_y) in enumerate(iterator): prf.append(self.evaluate_on_batch(sess, train_x, train_y)) prog.update(i + 1) prf = np.mean(np.array(prf), axis=0) print "Precision={:.2f}, Recall={:.2f}, F1={:.2f}".format( prf[0], prf[1], prf[2]) return prf[2]
def run_epoch(self, sess, dataset): train_examples, dev_set = dataset.get_train_and_dev_data( self.config.dev_sample_percentage) prog = Progbar(target=1 + len(train_examples[1]) / self.config.batch_size) for i, (train_x, train_y) in enumerate( get_minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, train_x, train_y) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set", dev_precision = self.evaluate(sess, dev_set) print "- dev precision: {:.2f}".format(dev_precision * 100.0) return dev_precision
def run_epoch(self, sess, parser, train_examples, dev_set): total_loss = [] prog = Progbar(target=1 + len(train_examples) / self.config.batch_size) for i, train_ex in enumerate( minibatches(train_examples, self.config.batch_size, parser.pad_instance, self.config)): loss, summary = self.train_on_batch(sess, train_ex) self.writer.add_summary(summary, self.batch_cnt) prog.update(i + 1, [("train loss", loss)]) total_loss += [loss] print() print("Evaluating on dev set") dev_UAS, dev_LAS, POS_acc, _ = parser.parse( dev_set, pjoin(self.args.model_path, "dev.result")) print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) print("- dev LAS: {:.2f}".format(dev_LAS * 100.0)) print("- dev POS acc: {:.2f}".format(POS_acc * 100.0)) return dev_UAS, dev_LAS, POS_acc, np.mean(total_loss)
def run_epoch(self, sess, inputs, labels): """Runs an epoch of training. Args: sess: tf.Session() object inputs: np.ndarray of shape (n_samples, n_features) labels: np.ndarray of shape (n_samples, n_classes) Returns: average_loss: scalar. Average minibatch loss of model on epoch. """ n_minibatches, total_loss = 0, 0 inputs_shape = inputs.shape prog = Progbar(target=1 + inputs_shape[0] / self.config.batch_size) for input_batch, labels_batch in get_minibatches( [inputs, labels], self.config.batch_size): n_minibatches += 1 loss = self.train_on_batch(sess, input_batch, labels_batch) total_loss += loss prog.update(n_minibatches, [("train loss", loss)]) return total_loss / n_minibatches
def run_epoch(self, sess, config, dataset, train_writer, merged): prog = Progbar(target=1 + len(dataset.train_inputs[0]) / config.batch_size) for i, (train_x, train_y) in enumerate( get_minibatches([dataset.train_inputs, dataset.train_targets], config.batch_size, is_multi_feature_input=True)): print "word input, char input, outout: {}, {}, {}".format( np.array(train_x[0]).shape, np.array(train_x[1]).shape, np.array(train_y).shape) summary, loss = self.train_on_batch(sess, train_x, train_y, merged) prog.update(i + 1, [("train loss", loss)]) # feed = self.create_feed_dict(dataset.train_inputs, labels_batch=dataset.train_targets, # keep_prob_word=self.config.keep_prob, keep_prob_fc=self.config.keep_prob_fc, # is_training=False) # train_accuracy = sess.run(self.accuracy, feed_dict=feed) # print "- train Accuracy: {:.2f}".format(train_accuracy * 100.0) return summary, loss # returns for Last batch
def run_epoch(model, config, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / config.batch_size) flag = False for i, (train_x, train_y) in enumerate( minibatches(train_examples, config.batch_size)): dy.renew_cg() losses = [] for x, y in zip(train_x, train_y): pred = model.create_network_return_pred(np.array(x).reshape( 1, config.n_features), drop=True) loss = model.create_network_return_loss(pred, y) losses.append(loss) loss = dy.esum(losses) / config.batch_size loss.forward() loss.backward() model.trainer.update() print "Training Loss: ", loss.value() print "Evaluating on dev set", dev_UAS, _ = parser.parse(dev_set) print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) return dev_UAS
def sgd(f, x0, lr, iterations, postprocessing=None, useSaved=False, save_every=False, save_path='./'): """ Stochastic Gradient Descent Implement the stochastic gradient descent method in this function. Arguments: f -- the function to optimize, it should take a single argument and yield two outputs, a cost and the gradient with respect to the arguments x0 -- the initial point to start SGD from step -- the step size for SGD iterations -- total iterations to run SGD for postprocessing -- postprocessing function for the parameters if necessary. In the case of word2vec we will need to normalize the word vectors to have unit length. PRINT_EVERY -- specifies how many iterations to output loss Return: x -- the parameter value after SGD finishes """ # Anneal learning rate every several iterations ANNEAL_EVERY = 20000 # auto save if save every is true SAVE_EVERY = 10000 if useSaved: start_iter, oldx, state = load_saved_params() if start_iter > 0: x0 = oldx lr *= lr**(start_iter / ANNEAL_EVERY) if state: random.setstate(state) else: start_iter = 0 x = x0 if not postprocessing: postprocessing = lambda x: x expcost = None prog = Progbar(target=iterations) for iter in xrange(start_iter + 1, iterations + 1): # Don't forget to apply the postprocessing after every iteration! # You might want to print the progress every few iterations. cost, grad = f(x) x -= grad * lr postprocessing(grad) if not expcost: expcost = cost else: expcost = .95 * expcost + .05 * cost prog.update(iter, [("expcost", expcost)]) if save_every and iter % SAVE_EVERY == 0: save_params(save_path, iter, x) if iter % ANNEAL_EVERY == 0: lr *= 0.5 return x
def run_epoch(self, sess, parser, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples) / self.config.batch_size)
def run_iter(data, learning_rate=0.05, x_max=100, alpha=0.75): """ Run a single iteration of GloVe training using the given cooccurrence data and the previously computed weight vectors / biases and accompanying gradient histories. `data` is a pre-fetched data / weights list where each element is of the form (v_main, v_context, b_main, b_context, gradsq_W_main, gradsq_W_context, gradsq_b_main, gradsq_b_context, cooccurrence) as produced by the `train_glove` function. Each element in this tuple is an `ndarray` view into the data structure which contains it. See the `train_glove` function for information on the shapes of `W`, `biases`, `gradient_squared`, `gradient_squared_biases` and how they should be initialized. The parameters `x_max`, `alpha` define our weighting function when computing the cost for two word pairs; see the GloVe paper for more details. Returns the cost associated with the given weight assignments and updates the weights by online AdaGrad in place. """ shuffle(data) global_cost = 0 prog = Progbar(target=len(data) // 1000) i = 1 for v_main, v_context, b_main, b_context, gradsq_W_main, gradsq_W_context,\ gradsq_b_main, gradsq_b_context, cooccurrence in data: # weight of the sample w = weight(cooccurrence, x_max, alpha) # compute loss L = np.dot(v_main.T, v_context) + b_main[0] + b_context[0] - np.log(cooccurrence) sample_cost = L**2 sample_cost *= w # gradients of vectors and bias grad_v_main = 2 * L * v_context * w grad_v_context = 2 * L * v_main * w grad_b_main = 2 * L * w grad_b_context = 2 * L * w # adagrad square cache gradsq_W_main += grad_v_main**2 gradsq_W_context += grad_v_context**2 gradsq_b_main += grad_b_main**2 gradsq_b_context += grad_b_context**2 # update vector and bias v_main -= learning_rate * grad_v_main / np.sqrt(gradsq_W_main) v_context -= learning_rate * grad_v_context / np.sqrt(gradsq_W_context) b_main -= learning_rate * grad_b_main / np.sqrt(gradsq_b_main) b_context -= learning_rate * grad_b_context / np.sqrt(gradsq_b_context) global_cost += sample_cost if i % 1000 == 0: prog.update(i // 1000, [("train loss", global_cost / i)]) i += 1 return global_cost