total += len(y) val_loss = sum(losses) / max(1, len(losses)) writer.add_scalar('val_loss', val_loss, epoch) writer.add_scalar('val_acc', correct / total, epoch) print("\tValidation: Loss={:.2f}\t Accuracy={:.2f}\t".format( val_loss, correct / total)) # Evaluation Loop End # Update "best.pth" model if val_loss in current epoch is lower than the best validation loss if val_loss < best_val: best_val = val_loss torch.save( { "epoch": epoch + 1, "model_state_dict": model.state_dict(), "backbone": args.backbone, "optimizer_state_dict": optimizer.state_dict() }, os.path.join(args.out_path, "best.pth")) # Save model based on the frequency defined by "args.save_after" if (epoch + 1) % args.save_after == 0: torch.save( { "epoch": epoch + 1, "model_state_dict": model.state_dict(), "backbone": args.backbone, "optimizer_state_dict": optimizer.state_dict() }, os.path.join(args.out_path, "epoch_{}.pth".format(epoch + 1)))
running_loss = running_loss / running_num positive_dist = positive_dist / running_num negative_dist = negative_dist / running_num print('Epoch: {:d}, training loss {:.5f}'.format(epoch + 1, running_loss)) print('Epoch: {:d}, positive distance {:.3f}, negative distance {:.3f}'. format(epoch + 1, positive_dist, negative_dist)) dist_ratio = negative_dist / (positive_dist + 0.000001) print('Epoch: {:d}, training distance ratio {:.2f}'.format( epoch + 1, dist_ratio)) # save model if (epoch + 1) % 10 == 0: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() }, save_name) print('save model to : {}'.format(save_name)) # move net to cpu net = net.to('cpu') save_checkpoint( { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() }, save_name) print('save model to : {}'.format(save_name)) print('Finished training')