예제 #1
0
            total += len(y)

        val_loss = sum(losses) / max(1, len(losses))
        writer.add_scalar('val_loss', val_loss, epoch)
        writer.add_scalar('val_acc', correct / total, epoch)

        print("\tValidation: Loss={:.2f}\t Accuracy={:.2f}\t".format(
            val_loss, correct / total))
        # Evaluation Loop End

        # Update "best.pth" model if val_loss in current epoch is lower than the best validation loss
        if val_loss < best_val:
            best_val = val_loss
            torch.save(
                {
                    "epoch": epoch + 1,
                    "model_state_dict": model.state_dict(),
                    "backbone": args.backbone,
                    "optimizer_state_dict": optimizer.state_dict()
                }, os.path.join(args.out_path, "best.pth"))

        # Save model based on the frequency defined by "args.save_after"
        if (epoch + 1) % args.save_after == 0:
            torch.save(
                {
                    "epoch": epoch + 1,
                    "model_state_dict": model.state_dict(),
                    "backbone": args.backbone,
                    "optimizer_state_dict": optimizer.state_dict()
                }, os.path.join(args.out_path,
                                "epoch_{}.pth".format(epoch + 1)))
예제 #2
0
    running_loss = running_loss / running_num
    positive_dist = positive_dist / running_num
    negative_dist = negative_dist / running_num
    print('Epoch: {:d}, training loss {:.5f}'.format(epoch + 1, running_loss))
    print('Epoch: {:d}, positive distance {:.3f}, negative distance {:.3f}'.
          format(epoch + 1, positive_dist, negative_dist))
    dist_ratio = negative_dist / (positive_dist + 0.000001)
    print('Epoch: {:d}, training distance ratio {:.2f}'.format(
        epoch + 1, dist_ratio))

    # save model
    if (epoch + 1) % 10 == 0:
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }, save_name)
        print('save model to : {}'.format(save_name))

# move net to cpu
net = net.to('cpu')
save_checkpoint(
    {
        'epoch': epoch + 1,
        'state_dict': net.state_dict(),
        'optimizer': optimizer.state_dict()
    }, save_name)
print('save model to : {}'.format(save_name))
print('Finished training')