def evaluation(args, model, data_loader, metric): model.eval() metric.reset() for batch in data_loader: input_ids, segment_ids, labels = batch logits = model(input_ids, segment_ids) if args.task_name in ['atis_intent', 'mrda', 'swda']: correct = metric.compute(logits, labels) metric.update(correct) else: metric.update(logits, labels) model.train() metric_out = metric.accumulate() print('Total samples: %d' % (len(data_loader) * args.test_batch_size)) if args.task_name == 'udc': print('R1@10: %.4f - R2@10: %.4f - R5@10: %.4f\n' % (metric_out[0], metric_out[1], metric_out[2])) return metric_out[0] elif args.task_name == 'dstc2': print('Joint_acc: %.4f\n' % metric_out) return metric_out elif args.task_name == 'atis_slot': print('F1_micro: %.4f\n' % metric_out) return metric_out elif args.task_name in ['atis_intent', 'mrda', 'swda']: print('Acc: %.4f\n' % metric_out) return metric_out
def print_logs(args, step, logits, labels, loss, total_time, metric): if args.task_name in ['udc', 'atis_intent', 'mrda', 'swda']: if args.task_name == 'udc': metric = Accuracy() metric.reset() correct = metric.compute(logits, labels) metric.update(correct) acc = metric.accumulate() print('step %d - loss: %.4f - acc: %.4f - %.3fs/step' % (step, loss, acc, total_time / args.logging_steps)) elif args.task_name == 'dstc2': metric.reset() metric.update(logits, labels) joint_acc = metric.accumulate() print('step %d - loss: %.4f - joint_acc: %.4f - %.3fs/step' % (step, loss, joint_acc, total_time / args.logging_steps)) elif args.task_name == 'atis_slot': metric.reset() metric.update(logits, labels) f1_micro = metric.accumulate() print('step %d - loss: %.4f - f1_micro: %.4f - %.3fs/step' % (step, loss, f1_micro, total_time / args.logging_steps))