loss = base_model.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for _, batch in enumerate(dev_batch): preds, _, loss = base_model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) max_dev_f1, max_dev_id = (dev_f1, epoch) if max_dev_f1 < dev_f1 else (max_dev_f1, max_dev_id) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] # eval on test print("Evaluating on test set...") predictions = []
print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True, pattern_file=opt['pattern_file']) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = base_model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] _, _, _ = scorer.score(batch.gold(), predictions, verbose=True) # save probability scores # if len(args.out) > 0: # outfile = 'saved_models/' + args.model_id + '/' + args.out # with open(outfile, 'w') as fw: # for prob in all_probs: # fw.write(json.dumps([round(p, 4) for p in prob])) # fw.write('\r\n') # print("Prediction scores saved to {}.".format(outfile)) print("Evaluation ended.")