def evaluate(model, data_loader, global_stats, mode='train'): # Use precision for classify eval_time = util.Timer() start_acc = util.AverageMeter() # Make predictions examples = 0 for ex in data_loader: batch_size = ex[0].size(0) pred_s = model.predict(ex) answer = ex[5] # We get metrics for independent start/end and joint start/end start_acc.update(Evaluate.accuracies(pred_s, answer.cpu().data.numpy()), 1) # If getting train accuracies, sample max 10k examples += batch_size if mode == 'train' and examples >= 1e4: break logger.info('%s valid unofficial use Accuracy: Epoch = %d | acc = %.2f | ' % (mode, global_stats['epoch'], start_acc.avg) + ' = %d | ' % (examples) + 'valid time = %.2f (s)' % eval_time.time()) return {'acc': start_acc.avg}
def main(args): # data, word2ids = util.load_train_data(train_file, word2vec_file) data, word2ids, embed_arr = util.load_data() feature_dict = util.build_feature_dict(args, data) model = init_model(words_dict=word2ids, feature_dict=feature_dict, args=args) model.quick_load_embed(embed_arr) data_loader = make_dataset(data, model) start_epoch = 0 # TRAIN/VALID LOOP logger.info('-' * 100) logger.info('Train now! Output loss every %d batch...' % args.display_iter) stats = {'timer': util.Timer(), 'epoch': 0, 'best_valid': 0} for epoch in range(start_epoch, args.num_epochs): stats['epoch'] = epoch train(args, data_loader, model, stats) result = evaluate(model, data_loader, global_stats=stats) if result[args.valid_metric] > stats['best_valid']: logger.info('Best valid: %s = %.2f (epoch %d, %d updates)' % (args.valid_metric, result[args.valid_metric], stats['epoch'], model.updates)) model.save(args.model_file) stats['best_valid'] = result[args.valid_metric]
def train(args, data_loader, model, global_stats): """Run through one epoch of model training with the provided data loader.""" # Initialize meters + timers train_loss = util.AverageMeter() epoch_time = util.Timer() # Run one epoch for idx, ex in enumerate(data_loader): train_loss.update(*model.update(ex)) # run on one batch if idx % args.display_iter == 0: logger.info('train: Epoch = %d | iter = %d/%d | ' % (global_stats['epoch'], idx, len(data_loader)) + 'loss = %.2f | elapsed time = %.2f (s)' % (train_loss.avg, global_stats['timer'].time())) train_loss.reset() logger.info('train: Epoch %d done. Time for epoch = %.2f (s)' % (global_stats['epoch'], epoch_time.time())) # Checkpoint if args.checkpoint: model.checkpoint(args.model_file + '.checkpoint', global_stats['epoch'] + 1)