Beispiel #1
0
def train(args, io):
    train_loader = DataLoader(ModelNet40(partition='train',
                                         num_points=args.num_points),
                              num_workers=8,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points),
                             num_workers=8,
                             batch_size=args.test_batch_size,
                             shuffle=True,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))

    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = cal_loss

    best_test_acc = 0
    for epoch in range(args.epochs):
        scheduler.step()
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data)
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (
            epoch, train_loss * 1.0 / count,
            metrics.accuracy_score(train_true, train_pred),
            metrics.balanced_accuracy_score(train_true, train_pred))
        io.cprint(outstr)

        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            logits = model(data)
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(label.cpu().numpy())
            test_pred.append(preds.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            test_true, test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (
            epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc)
        io.cprint(outstr)
        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
Beispiel #2
0
def train(modelin=args.model, modelout=args.out,device=args.device,opt=args.opt):

    # define model, dataloader, 3dmm eigenvectors, optimization method
    calib_net = PointNet(n=1)
    sfm_net = PointNet(n=199)
    if modelin != "":
        calib_path = os.path.join('model','calib_' + modelin)
        sfm_path = os.path.join('model','sfm_' + modelin)
        pretrained1 = torch.load(calib_path)
        pretrained2 = torch.load(sfm_path)
        calib_dict = calib_net.state_dict()
        sfm_dict = sfm_net.state_dict()

        pretrained1 = {k: v for k,v in pretrained1.items() if k in calib_dict}
        pretrained2 = {k: v for k,v in pretrained2.items() if k in sfm_dict}
        calib_dict.update(pretrained1)
        sfm_dict.update(pretrained2)

        calib_net.load_state_dict(pretrained1)
        sfm_net.load_state_dict(pretrained2)

    calib_net.to(device=device)
    sfm_net.to(device=device)
    opt1 = torch.optim.Adam(calib_net.parameters(),lr=1e-3)
    opt2 = torch.optim.Adam(sfm_net.parameters(),lr=1e-3)

    # dataloader
    data = dataloader.Data()
    loader = data.batchloader
    batch_size = data.batchsize

    # mean shape and eigenvectors for 3dmm
    mu_lm = torch.from_numpy(data.mu_lm).float()#.to(device=device)
    mu_lm[:,2] = mu_lm[:,2] * -1
    mu_lm = torch.stack(batch_size * [mu_lm.to(device=device)])
    shape = mu_lm
    lm_eigenvec = torch.from_numpy(data.lm_eigenvec).float().to(device=device)
    lm_eigenvec = torch.stack(batch_size * [lm_eigenvec])

    M = data.M
    N = data.N

    # main training loop
    for epoch in itertools.count():
        for j,batch in enumerate(loader):

            # get the input and gt values
            x_cam_gt = batch['x_cam_gt'].to(device=device)
            shape_gt = batch['x_w_gt'].to(device=device)
            fgt = batch['f_gt'].to(device=device)
            x_img = batch['x_img'].to(device=device)
            #beta_gt = batch['beta_gt'].to(device=device)
            #x_img_norm = batch['x_img_norm']
            x_img_gt = batch['x_img_gt'].to(device=device).permute(0,2,1,3)
            batch_size = fgt.shape[0]

            one = torch.ones(batch_size,M*N,1).to(device=device)
            x_img_one = torch.cat([x_img,one],dim=2)
            x_cam_pt = x_cam_gt.permute(0,1,3,2).reshape(batch_size,6800,3)
            x = x_img.permute(0,2,1)
            #x = x_img.permute(0,2,1).reshape(batch_size,2,M,N)

            ptsI = x_img_one.reshape(batch_size,M,N,3).permute(0,1,3,2)[:,:,:2,:]

            # if just optimizing
            if not opt:
                # calibration
                f = calib_net(x) + 300
                K = torch.zeros((batch_size,3,3)).float().to(device=device)
                K[:,0,0] = f.squeeze()
                K[:,1,1] = f.squeeze()
                K[:,2,2] = 1

                # sfm
                betas = sfm_net(x)
                betas = betas.unsqueeze(-1)
                shape = mu_lm + torch.bmm(lm_eigenvec,betas).squeeze().view(batch_size,N,3)

                opt1.zero_grad()
                opt2.zero_grad()
                f_error = torch.mean(torch.abs(f - fgt))
                #error2d = torch.mean(torch.abs(pred - x_img_gt))
                error3d = torch.mean(torch.abs(shape - shape_gt))
                error = f_error + error3d
                error.backward()
                opt1.step()
                opt2.step()

                print(f"f_error: {f_error.item():.3f} | error3d: {error3d.item():.3f} | f/fgt: {f[0].item():.1f}/{fgt[0].item():.1f} | f/fgt: {f[1].item():.1f}/{fgt[1].item():.1f} | f/fgt: {f[2].item():.1f}/{fgt[2].item():.1f} | f/fgt: {f[3].item():.1f}/{fgt[3].item():.1f} ")
                continue

            # get shape error from image projection
            print(f"f/fgt: {f[0].item():.3f}/{fgt[0].item():.3f} | rmse: {rmse:.3f} | f_rel: {f_error.item():.4f}  | loss1: {loss1.item():.3f} | loss2: {loss2.item():.3f}")

        # save model and increment weight decay
        print("saving!")
        torch.save(sfm_net.state_dict(), os.path.join('model','sfm_'+modelout))
        torch.save(calib_net.state_dict(), os.path.join('model','calib_'+modelout))
        test(modelin=args.out,outfile=args.out,optimize=False)
def startcustomtraining(args, io):
    ft_loader = DataLoader(FT10(num_points=args.num_points),
                           num_workers=8,
                           batch_size=args.test_batch_size,
                           shuffle=True,
                           drop_last=True)
    ft_test_loader = DataLoader(FT11(num_points=args.num_points),
                                num_workers=8,
                                batch_size=args.test_batch_size,
                                shuffle=True,
                                drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))

    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = cal_loss
    best_ft_test_acc = 0.0

    i = 0
    train_accs = []
    test_accs = []
    epochs = []

    for epoch in range(args.epochs):
        i += 1
        scheduler.step()
        ft_loss = 0.0
        count = 0
        model.train()
        ft_pred = []
        ft_true = []
        for data, label in ft_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data)
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            ft_loss += loss.item() * batch_size
            ft_true.append(label.cpu().numpy())
            ft_pred.append(preds.detach().cpu().numpy())
            #print(data.shape, label.shape, logits.shape, preds.shape)
            #print('LABELS:', label)
            #print('PREDS:', preds)
            #print('LOGITS:', logits)
        ft_true = np.concatenate(ft_true)
        ft_pred = np.concatenate(ft_pred)
        ft_acc = metrics.accuracy_score(ft_true, ft_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(ft_true, ft_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (
            epoch, ft_loss * 1.0 / count, ft_acc, avg_per_class_acc)
        io.cprint(outstr)
        train_accs.append(ft_acc)

        ft_test_loss = 0.0
        count = 0
        model.eval()
        ft_test_pred = []
        ft_test_true = []
        for data, label in ft_test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            logits = model(data)
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            ft_test_loss += loss.item() * batch_size
            ft_test_true.append(label.cpu().numpy())
            ft_test_pred.append(preds.detach().cpu().numpy())
            #print(data.shape, label.shape, logits.shape, preds.shape)
            #print('LABELS:', label)
            #print('PREDS:', preds)
            #print('LOGITS:', logits)
        ft_test_true = np.concatenate(ft_test_true)
        ft_test_pred = np.concatenate(ft_test_pred)
        ft_test_acc = metrics.accuracy_score(ft_test_true, ft_test_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            ft_test_true, ft_test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (
            epoch, ft_test_loss * 1.0 / count, ft_test_acc, avg_per_class_acc)
        io.cprint(outstr)
        if ft_test_acc > best_ft_test_acc:
            print('save now')
            best_ft_test_acc = ft_test_acc
            torch.save(model.state_dict(), 'pretrained/custommodel.t7')
        #torch.save(model.state_dict(), 'pretrained/custommodel.t7')

        epochs.append(i)
        test_accs.append(ft_test_acc)

        fig, ax = plt.subplots()
        ax.plot(epochs, train_accs, color='blue', label='train acc')
        ax.plot(epochs, test_accs, color='red', label='test acc')
        ax.set(xlabel='epoch',
               ylabel='accuracy',
               title='accuracy values per epoch')
        ax.grid()
        ax.legend()
        fig.savefig("accuracy.png")
        plt.show()
Beispiel #4
0
def train(args, io):
    train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8,
                              batch_size=args.batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8,
                             batch_size=args.test_batch_size, shuffle=True, drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    elif args.model == 'semigcn':
        model = SemiGCN(args).to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))

    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
        
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            opt.load_state_dict(checkpoint['opt'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            
    #scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr, last_epoch=args.start_epoch-1)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.8)#0.7
    #scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.9825, last_epoch=args.start_epoch-1)
    
    criterion = cal_loss

    best_test_acc = 0
    for epoch in range(args.start_epoch, args.epochs):
        #scheduler.step()
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data)
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        scheduler.step()
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (epoch,
                                                                                 train_loss*1.0/count,
                                                                                 metrics.accuracy_score(
                                                                                     train_true, train_pred),
                                                                                 metrics.balanced_accuracy_score(
                                                                                     train_true, train_pred))
        io.cprint(outstr)
        if epoch%10 == 0:
            # save running checkpoint per 10 epoch
            torch.save({'epoch': epoch + 1,
                        'arch': args.model,
                        'state_dict': model.state_dict(),
                        'opt' : opt.state_dict()},
                        'checkpoints/%s/models/checkpoint_latest.pth.tar' % args.exp_name)
        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            data = data.permute(0, 2, 1)
            batch_size = data.size()[0]
            logits = model(data)
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(label.cpu().numpy())
            test_pred.append(preds.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (epoch,
                                                                              test_loss*1.0/count,
                                                                              test_acc,
                                                                              avg_per_class_acc)
        io.cprint(outstr)
        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            torch.save({'epoch': epoch + 1,
                        'arch': args.model,
                        'state_dict': model.state_dict(),
                        'opt' : opt.state_dict()},
                        'checkpoints/%s/models/checkpoint_best.pth.tar' % args.exp_name)
Beispiel #5
0
def train(args, io):
    train_loader = DataLoader(ModelNet40(partition='train',
                                         num_points=args.num_points),
                              num_workers=8,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points),
                             num_workers=8,
                             batch_size=args.test_batch_size,
                             shuffle=True,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    elif args.model == 'ssg':
        model = PointNet2SSG(output_classes=40, dropout_prob=args.dropout)
        model.to(device)
    elif args.model == 'msg':
        model = PointNet2MSG(output_classes=40, dropout_prob=args.dropout)
        model.to(device)
    elif args.model == 'ognet':
        # [64,128,256,512]
        model = Model_dense(20,
                            args.feature_dims, [512],
                            output_classes=40,
                            init_points=768,
                            input_dims=3,
                            dropout_prob=args.dropout,
                            id_skip=args.id_skip,
                            drop_connect_rate=args.drop_connect_rate,
                            cluster='xyzrgb',
                            pre_act=args.pre_act,
                            norm=args.norm_layer)
        if args.efficient:
            model = ModelE_dense(20,
                                 args.feature_dims, [512],
                                 output_classes=40,
                                 init_points=768,
                                 input_dims=3,
                                 dropout_prob=args.dropout,
                                 id_skip=args.id_skip,
                                 drop_connect_rate=args.drop_connect_rate,
                                 cluster='xyzrgb',
                                 pre_act=args.pre_act,
                                 norm=args.norm_layer,
                                 gem=args.gem,
                                 ASPP=args.ASPP)
        model.to(device)
    elif args.model == 'ognet-small':
        # [48,96,192,384]
        model = Model_dense(20,
                            args.feature_dims, [512],
                            output_classes=40,
                            init_points=768,
                            input_dims=3,
                            dropout_prob=args.dropout,
                            id_skip=args.id_skip,
                            drop_connect_rate=args.drop_connect_rate,
                            cluster='xyzrgb',
                            pre_act=args.pre_act,
                            norm=args.norm_layer)
        model.to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))

    model = nn.DataParallel(model)
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=0.01 * args.lr)

    criterion = cal_loss

    best_test_acc = 0
    best_avg_per_class_acc = 0

    warm_up = 0.1  # We start from the 0.1*lrRate
    warm_iteration = round(
        len(ModelNet40(partition='train', num_points=args.num_points)) /
        args.batch_size) * args.warm_epoch  # first 5 epoch
    for epoch in range(args.epochs):
        scheduler.step()
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            batch_size = data.size()[0]
            opt.zero_grad()
            if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg':
                logits = model(data, data)
            else:
                data = data.permute(0, 2, 1)
                logits = model(data)
            loss = criterion(logits, label)
            if epoch < args.warm_epoch:
                warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                loss *= warm_up
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (
            epoch, train_loss * 1.0 / count,
            metrics.accuracy_score(train_true, train_pred),
            metrics.balanced_accuracy_score(train_true, train_pred))
        io.cprint(outstr)

        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            batch_size = data.size()[0]
            if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg':
                logits = model(data, data)
            else:
                data = data.permute(0, 2, 1)
                logits = model(data)
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(label.cpu().numpy())
            test_pred.append(preds.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            test_true, test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (
            epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc)
        io.cprint(outstr)
        if test_acc + avg_per_class_acc >= best_test_acc + best_avg_per_class_acc:
            best_test_acc = test_acc
            best_avg_per_class_acc = avg_per_class_acc
            print('This is the current best.')
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
Beispiel #6
0
def train(args, io):
    train_loader = DataLoader(ModelNet40(partition='train'),
                              num_workers=8,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
    test_loader = DataLoader(ModelNet40(partition='test'),
                             num_workers=8,
                             batch_size=args.batch_size,
                             shuffle=True,
                             drop_last=False)

    device = torch.device("cuda:0")

    model = PointNet().to(device)
    print(str(model))

    print("Use Adam")
    opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = cal_loss

    best_test_acc = 0
    for epoch in range(args.epochs):
        scheduler.step()
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        for data, label in train_loader:
            data, label = data.to(device), label.to(device).squeeze()
            batch_size = data.size()[0]
            opt.zero_grad()
            logits = model(data.float())
            loss = criterion(logits, label)
            loss.backward()
            opt.step()
            preds = logits.max(dim=1)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(label.cpu().numpy())
            train_pred.append(preds.detach().cpu().numpy())
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)
        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (
            epoch, train_loss * 1.0 / count,
            metrics.accuracy_score(train_true, train_pred),
            metrics.balanced_accuracy_score(train_true, train_pred))
        io.cprint(outstr)

        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device).squeeze()
            batch_size = data.size()[0]
            logits = model(data.float())
            loss = criterion(logits, label)
            preds = logits.max(dim=1)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(label.cpu().numpy())
            test_pred.append(preds.detach().cpu().numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)
        avg_per_class_acc = metrics.balanced_accuracy_score(
            test_true, test_pred)
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (
            epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc)
        io.cprint(outstr)
        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            print('Saving ckpt with acc: %f' % best_test_acc)