def _run_feg(self, X, X_val): """Calculate difference between average free energies of subsets of validation and training sets to monitor overfitting, as proposed in [2]. If the model is not overfitting at all, this quantity should be close to zero. Once this value starts growing, the model is overfitting and the value ("free energy gap") represents the amount of overfitting. """ self._free_energy_op = tf.get_collection('free_energy_op')[0] train_fes = [] for _, X_b in zip(range(self.metrics_config['n_batches_for_feg']), batch_iter(X, batch_size=self.batch_size)): train_fe = self._tf_session.run( self._free_energy_op, feed_dict=self._make_tf_feed_dict(X_b)) train_fes.append(train_fe) val_fes = [] for _, X_vb in zip(range(self.metrics_config['n_batches_for_feg']), batch_iter(X_val, batch_size=self.batch_size)): val_fe = self._tf_session.run( self._free_energy_op, feed_dict=self._make_tf_feed_dict(X_vb)) val_fes.append(val_fe) feg = np.mean(val_fes) - np.mean(train_fes) summary_value = [ summary_pb2.Summary.Value(tag=self._metrics_names_map['feg'], simple_value=feg) ] feg_s = summary_pb2.Summary(value=summary_value) self._tf_val_writer.add_summary(feg_s, self.iter_) return feg
def evaluate_ppl(self, dev_data, batch_size: int=32, encoder_only=False, decoder_only=False, **kwargs): cum_loss = 0. cum_tgt_words = 0. # you may want to wrap the following code using a context manager provided # by the NN library to signal the backend to not to keep gradient information # e.g., `torch.no_grad()` if encoder_only: cum_src_words = 0. for src_sents, tgt_sents, key in batch_iter(dev_data, batch_size): loss = self.encode_to_loss(src_sents, update_params=False) src_word_num_to_predict = sum(len(s[1:]) for s in src_sents) # omitting the leading `<s>` cum_src_words += src_word_num_to_predict cum_loss += loss ppl = np.exp(cum_loss / cum_src_words) return ppl for src_sents, tgt_sents, key in batch_iter(dev_data, batch_size): if decoder_only: loss = self.decode_to_loss(tgt_sents, update_params=False) else: loss = self(src_sents, tgt_sents, key=key, update_params=False) cum_loss += loss tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting the leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) return ppl
def train_per_epoch(model, sess, train_data, train_labels, test_data, test_labels, epoch, loss, batch_size=64, model_type='rnn'): loss_meter = AverageMeter() n_minibatches = math.ceil(len(train_data) / batch_size) print(f'Epoch{epoch}') if model_type == 'rnn': with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_x_lengths, train_y) in enumerate( batch_iter(train_data, train_labels, batch_size, use_for=model_type)): loss_train, train_acc, _ = sess.run( [loss, 'accuracy:0', 'train_step'], feed_dict={ 'sent:0': train_x, 'sent_lengths:0': train_x_lengths, 'y_true:0': train_y }) prog.update(1) loss_meter.update(loss_train.item()) else: with tqdm(total=(n_minibatches)) as prog: for i, (train_x, train_y) in enumerate( batch_iter(train_data, train_labels, batch_size, use_for=model_type)): loss_train, train_acc, _ = sess.run( [loss, 'accuracy:0', 'train_step'], feed_dict={ 'sent:0': train_x, 'y_true:0': train_y }) prog.update(1) loss_meter.update(loss_train.item()) print("Average Train Loss: {}".format(loss_meter.avg)) print('- train_accuracy: {:.2f}'.format(train_acc * 100.0)) print("Evaluating on dev set", ) if model.train == True: model.train = False valid_acc, valid_loss = evaluate(sess, test_data, test_labels, loss, model_type=model_type) model.train = True print("- valid_accuracy: {:.2f}".format(valid_acc * 100.0)) print("- valid_loss: {:.2f}".format(valid_loss)) return loss_meter.avg, train_acc, valid_loss, valid_acc
def main(_): embeddings, train_vec, test_vec = init() bz = config.batch_size with tf.Graph().as_default(): with tf.name_scope("Train"): with tf.variable_scope("Model", reuse=None): m_train = Model(config, embeddings, is_training=True) # tf.summary.scalar("Training_Loss", m_train.loss) # tf.summary.scalar("Training_acc", m_train.acc) with tf.name_scope("Valid"): with tf.variable_scope("Model", reuse=True): m_test = Model(config, embeddings, is_training=False) # tf.summary.scalar("test_acc", m_test.acc) sv = tf.train.Supervisor(logdir=config.save_path, global_step=m_train.global_step) with sv.managed_session() as session: if config.test_only: test_iter = utils.batch_iter(list(zip(*test_vec)), bz, shuffle=False) test_acc = run_epoch(session, m_test, test_iter, is_training=False) print("test acc: %.3f" % test_acc) else: for epoch in range(config.num_epoches): # lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) # m.assign_lr(session, config.learning_rate * lr_decay) train_iter = utils.batch_iter(list(zip(*train_vec)), bz, shuffle=True) test_iter = utils.batch_iter(list(zip(*test_vec)), bz, shuffle=False) train_acc = run_epoch(session, m_train, train_iter, verbose=False) test_acc = run_epoch(session, m_test, test_iter, is_training=False) logging.info("Epoch: %d Train: %.2f%% Test: %.2f%%" % (epoch + 1, train_acc * 100, test_acc * 100)) if config.save_path: sv.saver.save(session, config.save_path, global_step=sv.global_step)
def load_data(path): if os.path.exists(path): batches_train, batches_val, batches_test = pickle.load(open( path, 'rb')) else: batches_train = process('train') batches_val = process('valid') batches_test = process('test') # batches_train=batches_val=batches_test = process('valid') pickle.dump([batches_train, batches_val, batches_test], open(path, 'wb')) global train_step_per_epoch, val_step_per_epoch, test_step_per_epoch train_step_per_epoch = int( (len(batches_train) - .1) / model_config.batch_size) + 1 val_step_per_epoch = int( (len(batches_val) - .1) / model_config.batch_size) + 1 test_step_per_epoch = int( (len(batches_test) - .1) / model_config.batch_size) + 1 batches_train = utils.batch_iter(batches_train, model_config.batch_size, num_epochs) batches_val = utils.batch_iter(batches_val, model_config.batch_size, num_epochs, shuffle=False) batches_test = utils.batch_iter(batches_test, model_config.batch_size, num_epochs, shuffle=False) law_list = utils.law_to_list(law_path) laws = utils.cut_law(law_list, filter=law_class, cut_sentence=True) # pickle.dump(law_list,open('data/law.pkl','wb')) model_config.n_law = len(laws) laws = list(zip(*laws)) # pickle.dump({laws[1][i]:laws[0][i] for i in range(len(laws[0]))},open('data/accu2law_dict.pkl','wb')) # law_set=laws[0] laws_doc_len = [ len(i) if len(i) < model_config.law_doc_len else model_config.law_doc_len for i in laws[-1] ] laws_sent_len = utils.trun_n_words(laws[-1], model_config.law_sent_len) laws_sent_len = utils.align_flatten2d(laws_sent_len, model_config.law_doc_len, flatten=False) laws = utils.lookup_index_for_sentences(laws[-2], word2id, model_config.law_doc_len, model_config.law_sent_len) return batches_train, batches_val, batches_test, laws, laws_doc_len, laws_sent_len
def test_batch_iter_3(self): """ Check that successive calls shuffle in a different order. """ data = list(range(16)) out1 = [] for x in batch_iter(data, batch_size=4, shuffle=True): out1.extend(x) out2 = [] for x in batch_iter(data, batch_size=4, shuffle=True): out2.extend(x) self.assertEqual(set(out1), set(out2)) with self.assertRaises(AssertionError): np.testing.assert_array_equal(out1, out2)
def test(args): print("load model from {}".format(args["MODEL_PATH"]), file=sys.stderr) model = NMT.load(args["MODEL_PATH"]) if args["--cuda"]: model = model.to(torch.device("cuda:0")) binary = int(args["--num-classes"]) == 2 test_data = load_test_data(binary=binary) batch_size = int(args["--batch-size"]) cum_correct = 0 cum_score = 0 with torch.no_grad(): for sentences, sentiments in batch_iter(test_data, batch_size): correct = model.compute_accuracy(sentences, sentiments) * len(sentences) cum_correct += correct score = -model(sentences, sentiments).sum() cum_score += score print("test dataset size: %d" % len(test_data)) print("accuracy: %f" % (cum_correct / len(test_data))) print("loss: %f" % (cum_score / len(test_data)))
def validate(model, dev_src, dev_tgt, lang, batch_size=32): """ validate model on dev set @param model @param dev_src (list(list[str])): list of source sentences (list of tokens) @param dev_tgt (list[str]): list of target sentences @param lang: target language @return dev_loss (float): cross entropy loss on dev set """ was_training = model.training model.eval() cum_loss = .0 cum_tgt_words = 0 with torch.no_grad(): for src_sents, tgt_nodes, tgt_tokens, tgt_actions in batch_iter( dev_src, dev_tgt, lang, batch_size): num_words_to_predict = sum(len(actions) for actions in tgt_actions) loss = -model(src_sents, tgt_nodes, tgt_tokens, tgt_actions).sum() cum_loss += loss cum_tgt_words += num_words_to_predict dev_loss = cum_loss / cum_tgt_words if was_training: model.train() return dev_loss
def test(test_data, labels, model, device, batch=1, training=0, embeddings=None): model.eval() if embeddings == None: embeddings = loadEmbeddings(model.vocab, model.embed_size, './data/word2vec.6B.100d.txt') count, correct_count = 0, 0 with torch.no_grad(): for test_x, test_y in batch_iter(test_data, labels, batch): test_x = model.vocab.to_input_tensor(test_x) test_x = embeddings(test_x).to(device) output = model.search(test_x) test_y = test_y[0] for i in range(len(test_y)): count += 1 if test_y[i] == output[i]: correct_count += 1 correct_rate = 1.*correct_count/count print('the corrent rate is : ', correct_rate) if training: model.train() return correct_rate
def train(): # save = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) global_steps = 0 if not os.path.exists(model_dir): os.mkdir(model_dir) for epoch in range(config.num_epochs): print("Epoch: {}".format(epoch + 1)) batch_data = batch_iter(x_train, y_train, config.batch_size) for batch_x, batch_y in batch_data: feed_dict = feed_data(batch_x, batch_y, config.dropout_prob) if global_steps % config.print_per_batch == 0: feed_dict[model.dropout] = 1.0 train_acc, train_loss = session.run([model.acc, model.loss], feed_dict=feed_dict) message = "train acc: {0}" print(message.format(train_acc)) feed_dict[model.dropout] = config.dropout_prob session.run(model.optm, feed_dict) global_steps += 1
def _parallel_train(self, fold_n, x_train, x_test): print('fold_n:{}'.format(fold_n)) opt = Adam(0.01) self.model, self.emb = self.create_model() self.model.compile(optimizer=opt, loss=self.loss) # self.model.compile(optimizer=opt, loss='binary_crossentropy') patient = 0 best_score = 0 for epoch in range(self.epoch): # batch generator = utils.batch_iter(x_train, self.batch_size, 1) for index in generator: self.model.train_on_batch(x_train[index], x_train[index]) # save best reconsitution model and embedding model score, best_score, patient = self.save_best_model( best_score, x_test, patient, fold_n) if (patient > 25 and best_score > 0.7) or patient > 50: break print(score, best_score) print("fold_n:{}, score:{}".format(fold_n + 1, best_score)) self.model = load_model('dataset/output/model' + str(fold_n) + '.h5', custom_objects={'loss_high_order': self.loss}) return self.embedding(fold_n)
def evaluate_ppl(self, dev_data: List[PairedData], batch_size: int=32): """ Evaluate perplexity on dev sentences Args: dev_data: a list of dev sentences batch_size: batch size Returns: ppl: the perplexity on dev sentences """ cum_loss = 0. cum_tgt_words = 0. output = [] all_tgt_sents = [] with torch.no_grad(): for src_lang, tgt_lang, src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss, best_sents = self(src_lang, tgt_lang, src_sents, tgt_sents) output += best_sents all_tgt_sents += tgt_sents cum_loss += loss.sum() tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting the leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) return ppl, output, all_tgt_sents
def test_batch_iter_1(self): """ Check that batch_iter gives us exactly the right data back. """ l1 = list(range(16)) l2 = list(range(15)) l3 = list(range(13)) for l in [l1, l2, l3]: for shuffle in [True, False]: expected_data = l actual_data = set() expected_n_batches = ceil(len(l) / 4) actual_n_batches = 0 for batch_n, x in enumerate(batch_iter(l, batch_size=4, shuffle=shuffle)): if batch_n == expected_n_batches - 1 and len(l) % 4 != 0: self.assertEqual(len(x), len(l) % 4) else: self.assertEqual(len(x), 4) self.assertEqual(len(actual_data.intersection(set(x))), 0) actual_data = actual_data.union(set(x)) actual_n_batches += 1 self.assertEqual(actual_n_batches, expected_n_batches) np.testing.assert_array_equal(list(actual_data), expected_data)
def test(args): test_path = args['--test-src'] model_path = args['--model-path'] batch_size = int(args['--batch-size']) total_examples = 0 total_correct = 0 vocab_path = args['--vocab-src'] softmax = torch.nn.Softmax(dim=1) if args['--data'] == 'quora': test_data = utils.read_data(test_path, 'quora') vocab_data = utils.load_vocab(vocab_path) network = Model(args, vocab_data, 2) network.model = torch.load(model_path) if args['--cuda'] == str(1): network.model = network.model.cuda() softmax = softmax.cuda() network.model.eval() for labels, p1, p2, idx in utils.batch_iter(test_data, batch_size): total_examples += len(labels) print(total_examples) pred, _ = network.forward(labels, p1, p2) pred = softmax(pred) _, pred = pred.max(dim=1) label = network.get_label(labels) total_correct += (pred == label).sum().float() final_acc = total_correct / total_examples print('Accuracy of the model is %.2f' % (final_acc), file=sys.stderr)
def _train_epoch(self, X): results = [[] for _ in range(len(self._train_metrics_map))] for X_batch in batch_iter(X, self.batch_size, verbose=self.verbose): self.iter_ += 1 #print_op = tf.print('Batch iteration = ', self.iter_) if self.iter_ % self.metrics_config[ 'train_metrics_every_iter'] == 0: # if want train metrics, combine metric operations and training operations # the self._train_op should be the main training step run_ops = [ v for _, v in sorted(self._train_metrics_map.items()) ] run_ops += [self._tf_merged_summaries, self._train_op] #run_ops += [print_op] outputs = \ self._tf_session.run(run_ops, feed_dict=self._make_tf_feed_dict(X_batch)) values = outputs[:len(self._train_metrics_map)] for i, v in enumerate(values): results[i].append(v) train_s = outputs[len(self._train_metrics_map)] self._tf_train_writer.add_summary(train_s, self.iter_) else: # else, only run the training operations run_ops = [self._train_op] #run_ops += [print_op] self._tf_session.run( run_ops, feed_dict=self._make_tf_feed_dict(X_batch)) # aggregate and return metrics values results = map(lambda r: np.mean(r) if r else None, results) return dict(zip(sorted(self._train_metrics_map), results))
def evaluate_ppl(model, dev_data, batch_size=32): """ Evaluate perplexity on dev sentences :param NMT model: NMT Model :param List[tuple(src_sent, tgt_sent)] dev_data: list of tuples containing source and target sentences :param int batch_size: size of the batch :return float: perplexity on dev sentences """ was_training = model.training model.eval() cum_loss = 0. cum_tgt_words = 0. # no_grad() signals backend to throw away all gradients with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = -model(src_sents, tgt_sents).sum() cum_loss += loss.item() tgt_word_num_to_predict = sum( len(s[1:]) for s in tgt_sents) # omitting leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) if was_training: model.train() return ppl
def evaluate_ppl(self, dev_data: List[Any], batch_size: int = 32): """ Evaluate perplexity on dev sentences Args: dev_data: a list of dev sentences batch_size: batch size Returns: ppl: the perplexity on dev sentences """ self.set_model_to_eval() cum_loss = 0. cum_tgt_words = 0. # you may want to wrap the following code using a context manager provided # by the NN library to signal the backend to not to keep gradient information # e.g., `torch.no_grad()` for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss, num_words = self.__call__(src_sents, tgt_sents, keep_grad=False) loss = loss.detach().cpu().numpy() cum_loss += loss * num_words tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting the leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) return cum_loss, ppl
def train_step(model, loss_fn, optimizer, train_data, batch_size=32, device="cpu"): """ Train the model for 1 epoch. """ total_loss = 0.0 model.train() start_time = time.time() total_step = math.ceil(len(train_data) / batch_size) for step, batch in enumerate(batch_iter(train_data, batch_size=batch_size, shuffle=True)): if step % 250 == 0 and not step == 0: elapsed_since = time.time() - start_time logger.info("Batch {}/{}\tElapsed since: {}".format(step, total_step, str(datetime.timedelta(seconds=round(elapsed_since))))) # batch = (b.to(device) for b in batch) sents, tags = batch optimizer.zero_grad() train_loss = model.loss(sents, tags) total_loss += train_loss.item() train_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() avg_train_loss = total_loss / total_step return avg_train_loss
def test(test_data, model, device, weight, training=0): # use the perplexity as a evaluation indicator model.eval() count, correct_count = 0, 0 Hp = 0. with torch.no_grad(): for ndata in batch_iter(test_data, 1): # ndata = [[model.vocab.start_token] + sent + [model.vocab.end_token] for sent in ndata] ndata = model.vocab.to_input_tensor(ndata).to(device) test_x = ndata[:, :-1] test_y = ndata[:, 1:] output = model(test_x) loss = loss_function(output, test_y, weight).item() Hp = Hp + loss m = len(test_data) # Hp = 1./m*Hp Hp = math.pow(math.e, 1. / m * Hp) print('the perplexity is : ', Hp) if training: model.train() return Hp
def evaluate_ppl(model, valid_data, batch_size=32): """ Evaluate the perplexity on valid sentences model: Seq2Seq Model valid_data: list of tuples containing source and target sentence batch_size: batch size """ was_training = model.training model.eval() cum_loss = 0. cum_tgt_words = 0. # no_grad() signals backend to throw away all gradients with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = -model(src_sents, tgt_sents).sum() cum_loss += float(loss) tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) if was_training: model.train() return ppl
def evaluate_ppl(model, dev_data, batch_size=32): """ Evaluate perplexity on dev sentences @param model (NMT): NMT Model @param dev_data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence @param batch_size (batch size) @returns ppl (perplixty on dev sentences) """ was_training = model.training model.eval() cum_loss = 0. cum_examples = 0. cum_tgt_words = 0. # no_grad() signals backend to throw away all gradients with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = -model(src_sents, tgt_sents).sum() cum_loss += loss.item() cum_examples += batch_size tgt_word_num_to_predict = sum( len(s[1:]) for s in tgt_sents) # omitting leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) if was_training: model.train() return ppl
def train(model, train_data, val_data, args): model.train() optimizer = torch.optim.SGD(model.parameters(), args.lr) loss_fn = nn.CrossEntropyLoss() for epoch in range(args.max_epochs): optimizer.zero_grad() train_iter = 0 for contexts, words, senses in batch_iter(train_data, args.batch_size, shuffle=True): # forward pass scores = model(contexts, words) example_losses = loss_fn(scores, senses) batch_loss = example_losses.sum() loss = batch_loss / args.batch_size # backprop and weight update loss.backward() # gradient clipping grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.print_iter == 0: print("Epoch {}, iter {}: loss {}".format( epoch, train_iter, loss)) if train_iter % args.val_iter == 0: validate(model, val_data, epoch, train_iter) train_iter += 1
def evaluate_ppl(self, dev_data, batch_size: int=32): """ Evaluate perplexity on dev sentences Args: dev_data: a list of dev sentences : List[Any] batch_size: batch size Returns: ppl: the perplexity on dev sentences """ cum_loss = 0. cum_tgt_words = 0. # you may want to wrap the following code using a context manager provided # by the NN library to signal the backend to not to keep gradient information # e.g., `torch.no_grad()` for src_sents, tgt_sents in batch_iter(dev_data, batch_size): # loss = -model(src_sents, tgt_sents).sum() src_encodings, decoder_init_state = self.encode(src_sents) loss = self.decode(src_encodings, decoder_init_state, tgt_sents).sum() cum_loss += loss tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting the leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) return ppl
def evaluate_ppl(self, dev_data, batch_size=32): """ Evaluate perplexity on dev sentences Args: dev_data: a list of dev sentences batch_size: batch size Returns: ppl: the perplexity on dev sentences """ cum_loss = 0. cum_tgt_words = 0. # you may want to wrap the following code using a context manager provided # by the NN library to signal the backend to not to keep gradient information # e.g., `torch.no_grad()` with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = self.forward(src_sents, tgt_sents, is_training = False) cum_loss += loss.item() tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting the leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) return ppl
def evaluate(args, criterion, model, dev_data, vocab): model.eval() total_loss = 0. total_step = 0. preds = None out_label_ids = None with torch.no_grad(): #不需要更新模型,不需要梯度 for src_sents, labels in batch_iter(dev_data, args.train_batch_size): src_sents = split_sents(src_sents, vocab, args.device) logits = model(src_sents) labels = torch.tensor(labels, device=args.device) example_losses = criterion(logits, labels) total_loss += example_losses.item() total_step += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = labels.detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0) torch.cuda.empty_cache() preds = np.argmax(preds, axis=1) result = acc_and_f1(preds, out_label_ids) model.train() print("Evaluation loss", total_loss / total_step) print('Evaluation result', result) return total_loss / total_step, result
def test(args, criterion, model, te_data, vocab): model.eval() #total_loss = 0. #total_step = 0. preds = None #out_label_ids = None #不需要更新模型,不需要梯度 with torch.no_grad(): for src_sents in batch_iter(te_data, args.test_batch_size, test_batch=True): src_sents = split_sents(src_sents, vocab, args.device) logits = model(src_sents) #labels = torch.tensor(labels,device=args.device) #example_losses = criterion(logits,labels) #total_loss += example_losses.item() #total_step += 1 if preds is None: preds = logits.detach().cpu().numpy() #out_label_ids = labels.detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) #out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0) torch.cuda.empty_cache() #preds = np.argmax(preds, axis=1) #result = acc_and_f1(preds, out_label_ids) #model.train() #print("Evaluation loss", total_loss/total_step) #print('Evaluation result', result) return preds
def evaluate_ppl(model, dev_data, batch_size=32): """ 在验证集上评估困惑度 @param model (NMT): NMT 模型 @param dev_data (list of (src_sent, tgt_sent)): 元组列表,包含源句子和目标句子 @param batch_size (batch size) @returns ppl (验证集上的困惑度) """ was_training = model.training model.eval() cum_loss = 0. cum_tgt_words = 0. # no_grad() signals backend to throw away all gradients with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = -model(src_sents, tgt_sents).sum() cum_loss += loss.item() tgt_word_num_to_predict = sum( len(s[1:]) for s in tgt_sents) # omitting leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) # 困惑度=exp(累积损失/累积词数) if was_training: model.train() return ppl
def main(): """ Main func. """ # args = '1d' # Check Python & PyTorch Versions assert (sys.version_info >= (3, 5)), "Please update your installation of Python to version >= 3.5" # assert(torch.__version__ == "1.0.0"), "Please update your installation of PyTorch. You have {} and you should have version 1.0.0".format(torch.__version__) # Seed the Random Number Generators seed = 1234 torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed * 13 // 7) # Load training data & vocabulary train_data_src = read_corpus('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/train_sanity_check.es', 'src') train_data_tgt = read_corpus('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/train_sanity_check.en', 'tgt') train_data = list(zip(train_data_src, train_data_tgt)) for src_sents, tgt_sents in batch_iter(train_data, batch_size=BATCH_SIZE, shuffle=True): src_sents = src_sents tgt_sents = tgt_sents break vocab = Vocab.load('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/vocab_sanity_check.json') # Create NMT Model model = NMT( embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, dropout_rate=DROPOUT_RATE, vocab=vocab)
def cal_dev_loss(model, dev_data, batch_size, sent_vocab, tag_vocab, device): """ Calculate loss on the development data Args: model: the model being trained dev_data: development data batch_size: batch size sent_vocab: sentence vocab tag_vocab: tag vocab device: torch.device on which the model is trained Returns: the average loss on the dev data """ is_training = model.training model.eval() loss, n_sentences = 0, 0 with torch.no_grad(): for sentences, tags in utils.batch_iter(dev_data, batch_size, shuffle=False): sentences, sent_lengths = utils.pad(sentences, sent_vocab[sent_vocab.PAD], device) tags, _ = utils.pad(tags, tag_vocab[sent_vocab.PAD], device) batch_loss = model(sentences, tags, sent_lengths) # shape: (b,) loss += batch_loss.sum().item() n_sentences += len(sentences) model.train(is_training) return loss / n_sentences
def evaluate(model, data, batch_size): """ Evaluate the model on the data @param model (AvgSim): AvgSim Model @param data (list[tuple(sent1, sent2, score)]): list of sent_pairs, sim_score @param batch_size (int): batch size @return mean_loss (float): MSE loss on the scores_pred vs scores @return corr (float): correlation b/w scores_pred vs scores """ was_training = model.training model.eval() total_loss = .0 cum_scores = [] cum_scores_pred = [] with torch.no_grad(): for sents1, sents2, scores in batch_iter(data, batch_size, shuffle=False, result=True): scores = torch.tensor(scores, dtype=torch.float, device=device) scores_pred = model(sents1, sents2) loss = F.mse_loss(scores_pred, scores, reduction='sum') total_loss += loss.item() cum_scores.extend(scores.tolist()) cum_scores_pred.extend(scores_pred.tolist()) mean_loss = total_loss / len(data) corr, p_val = pearsonr(cum_scores_pred, cum_scores) if was_training: model.train() return mean_loss, corr
def evaluate(self, dataset, batch_size, is_devset=True): accuracies = [] for words, labels in batch_iter(dataset, batch_size): feed_dict = self._get_feed_dict(words, labels, lr=None, is_train=False) accuracy = self.sess.run(self.accuracy, feed_dict=feed_dict) accuracies.append(accuracy) acc = np.mean(accuracies) * 100 self.logger.info("Testing model over {} dataset: accuracy - {:05.3f}".format('DEVELOPMENT' if is_devset else 'TEST', acc)) return acc
def evaluate_ppl(model, dev_data, batch_size=32): """ Evaluate perplexity on dev sentences Args: dev_data: a list of dev sentences batch_size: batch size Returns: ppl: the perplexity on dev sentences """ was_training = model.training model.eval() cum_loss = 0. cum_tgt_words = 0. # you may want to wrap the following code using a context manager provided # by the NN library to signal the backend to not to keep gradient information # e.g., `torch.no_grad()` with torch.no_grad(): for src_sents, tgt_sents in batch_iter(dev_data, batch_size): loss = -model(src_sents, tgt_sents).sum() cum_loss += loss.item() tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting leading `<s>` cum_tgt_words += tgt_word_num_to_predict ppl = np.exp(cum_loss / cum_tgt_words) if was_training: model.train() return ppl
def train(self, trainset, devset, testset, batch_size=64, epochs=50, shuffle=True): self.logger.info('Start training...') init_lr = self.cfg.lr # initial learning rate, used for decay learning rate best_score = 0.0 # record the best score best_score_epoch = 1 # record the epoch of the best score obtained no_imprv_epoch = 0 # no improvement patience counter for epoch in range(self.start_epoch, epochs + 1): self.logger.info('Epoch %2d/%2d:' % (epoch, epochs)) progbar = Progbar(target=(len(trainset) + batch_size - 1) // batch_size) # number of batches if shuffle: np.random.shuffle(trainset) # shuffle training dataset each epoch # training each epoch for i, (words, labels) in enumerate(batch_iter(trainset, batch_size)): feed_dict = self._get_feed_dict(words, labels, lr=self.cfg.lr, is_train=True) _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict) progbar.update(i + 1, [("train loss", train_loss)]) if devset is not None: self.evaluate(devset, batch_size) cur_score = self.evaluate(testset, batch_size, is_devset=False) # learning rate decay if self.cfg.decay_lr: self.cfg.lr = init_lr / (1 + self.cfg.lr_decay * epoch) # performs model saving and evaluating on test dataset if cur_score > best_score: no_imprv_epoch = 0 self.save_session(epoch) best_score = cur_score best_score_epoch = epoch self.logger.info(' -- new BEST score on TEST dataset: {:05.3f}'.format(best_score)) else: no_imprv_epoch += 1 if no_imprv_epoch >= self.cfg.no_imprv_patience: self.logger.info('early stop at {}th epoch without improvement for {} epochs, BEST score: ' '{:05.3f} at epoch {}'.format(epoch, no_imprv_epoch, best_score, best_score_epoch)) break self.logger.info('Training process done...')
def train(args: Dict): train_data_src = read_corpus(args['--train-src'], source='src') train_data_tgt = read_corpus(args['--train-tgt'], source='tgt') dev_data_src = read_corpus(args['--dev-src'], source='src') dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt') train_data = list(zip(train_data_src, train_data_tgt)) dev_data = list(zip(dev_data_src, dev_data_tgt)) train_batch_size = int(args['--batch-size']) clip_grad = float(args['--clip-grad']) valid_niter = int(args['--valid-niter']) log_every = int(args['--log-every']) model_save_path = args['--save-to'] vocab = Vocab.load(args['--vocab']) model = NMT(embed_size=int(args['--embed-size']), hidden_size=int(args['--hidden-size']), dropout_rate=float(args['--dropout']), input_feed=args['--input-feed'], label_smoothing=float(args['--label-smoothing']), vocab=vocab) model.train() uniform_init = float(args['--uniform-init']) if np.abs(uniform_init) > 0.: print('uniformly initialize parameters [-%f, +%f]' % (uniform_init, uniform_init), file=sys.stderr) for p in model.parameters(): p.data.uniform_(-uniform_init, uniform_init) vocab_mask = torch.ones(len(vocab.tgt)) vocab_mask[vocab.tgt['<pad>']] = 0 device = torch.device("cuda:0" if args['--cuda'] else "cpu") print('use device: %s' % device, file=sys.stderr) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr'])) num_trial = 0 train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0 cum_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() print('begin Maximum Likelihood training') while True: epoch += 1 for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True): train_iter += 1 optimizer.zero_grad() batch_size = len(src_sents) # (batch_size) example_losses = -model(src_sents, tgt_sents) batch_loss = example_losses.sum() loss = batch_loss / batch_size loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), clip_grad) optimizer.step() batch_losses_val = batch_loss.item() report_loss += batch_losses_val cum_loss += batch_losses_val tgt_words_num_to_predict = sum(len(s[1:]) for s in tgt_sents) # omitting leading `<s>` report_tgt_words += tgt_words_num_to_predict cum_tgt_words += tgt_words_num_to_predict report_examples += batch_size cum_examples += batch_size if train_iter % log_every == 0: print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \ 'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter, report_loss / report_examples, math.exp(report_loss / report_tgt_words), cum_examples, report_tgt_words / (time.time() - train_time), time.time() - begin_time), file=sys.stderr) train_time = time.time() report_loss = report_tgt_words = report_examples = 0. # perform validation if train_iter % valid_niter == 0: print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter, cum_loss / cum_examples, np.exp(cum_loss / cum_tgt_words), cum_examples), file=sys.stderr) cum_loss = cum_examples = cum_tgt_words = 0. valid_num += 1 print('begin validation ...', file=sys.stderr) # compute dev. ppl and bleu dev_ppl = evaluate_ppl(model, dev_data, batch_size=128) # dev batch size can be a bit larger valid_metric = -dev_ppl print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl), file=sys.stderr) is_better = len(hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores) hist_valid_scores.append(valid_metric) if is_better: patience = 0 print('save currently the best model to [%s]' % model_save_path, file=sys.stderr) model.save(model_save_path) # also save the optimizers' state torch.save(optimizer.state_dict(), model_save_path + '.optim') elif patience < int(args['--patience']): patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == int(args['--patience']): num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == int(args['--max-num-trial']): print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * float(args['--lr-decay']) print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(model_save_path, map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) model = model.to(device) print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict(torch.load(model_save_path + '.optim')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0 if epoch == int(args['--max-epoch']): print('reached maximum number of epochs!', file=sys.stderr) exit(0)