Exemple #1
0
def Objective(trial):

    dim = trial.suggest_categorical('dim', [32, 64, 128])
    #patch_size = trial.suggest_int('patch_size',7, 14, 7)
    patch_size = 7
    depth = trial.suggest_categorical('depth', [8, 16, 32])
    heads = trial.suggest_categorical('heads', [8, 16, 32])
    mlp_dim = trial.suggest_categorical('mlp_dim', [128, 512, 1024])
    optimizer_name = trial.suggest_categorical("optimizer",
                                               ["Adam", "RMSprop"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    print('dim:', dim, 'mlp_dim:', mlp_dim, 'depth:', depth, 'heads:', heads)
    model = ViT(
        dim=dim,
        image_size=28,
        patch_size=patch_size,
        num_classes=10,
        depth=depth,  # number of transformer blocks
        heads=heads,  # number of multi-channel attention
        mlp_dim=mlp_dim,
        channels=1,
        #dropout=0.2,
    )

    # vanila cnn : 0.96
    # model = Net()
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    # optimizer
    #optimizer = optim.Adam(model.parameters(), lr=0.001)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # scheduler
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    for epoch in range(1, epochs + 1):
        train(model, criterion, device, train_loader, optimizer, epoch)
        val_acc = test(model, device, test_loader)
        scheduler.step()

        if 0:
            torch.save(model.state_dict(), "mnist_cnn.pt")

        trial.report(val_acc, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    wandb.log({'val_acc': val_acc})
    return val_acc
                update_freq_schedule=args.kfac_update_freq_schedule)
    else:
        preconditioner = None

    print(f"======== optimizer={optimizer}\n\n======== MODEL={model.name_()}\n======== preconditioner={preconditioner}")
    # KFAC guarentees grads are equal across ranks before opt.step() is called
    # so if we do not use kfac we need to wrap the optimizer with horovodcon
    if isHVD:
        compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
        optimizer = hvd.DistributedOptimizer(optimizer, 
                                            named_parameters=model.named_parameters(),
                                            compression=compression,
                                            op=hvd.Average,
                                            backward_passes_per_step=args.batches_per_allreduce)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)

    if len(lr_scheduler)==0:
        #...5...[100, 150]...
        lrs = create_lr_schedule(num_replicas, args.warmup_epochs, args.lr_decay)
        lr_scheduler = [LambdaLR(optimizer, lrs)]
    if use_kfac:
        lr_scheduler.append(LambdaLR(preconditioner, lrs))
    for ls in lr_scheduler:
        print(f"======== lr_scheduler={ls.state_dict()}")
    start = time.time()

    for epoch in range(config.epochs):
        train(epoch)
        test(epoch)
def main():

    parser = argparse.ArgumentParser(description='ViT')
    parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats')
    parser.add_argument('--dataset', default='dvsc')
    parser.add_argument('--exp_id', default='sdvsc-adam')
    parser.add_argument('--mode', default='normal')
    parser.add_argument('--batch', default=128)
    parser.add_argument('--epochs', default=10)
    parser.add_argument('--cuda', default=True)
    parser.add_argument('--optim', default='SGD')
    args = parser.parse_args()

    os.system('mkdir -p weights')

    dataset = {'smnist': SMNIST, 'dvsc': DVSC}
    if args.dataset == 'smnist':
        image_size = 60
        patch_size = 10
        num_classes = 10
        samp = 6
    elif args.dataset == 'dvsc':
        image_size = 384
        patch_size = 32
        num_classes = 2
        samp = 12

    if args.mode == 'normal':
        model = ViT(image_size=image_size,
                    patch_size=patch_size,
                    num_classes=num_classes,
                    dim=512,
                    depth=4,
                    heads=8,
                    mlp_dim=512,
                    dropout=0.1,
                    emb_dropout=0.1)
    else:
        model = ViT_sphere(
            image_size=image_size,
            patch_size=patch_size,
            num_classes=num_classes,
            dim=512,
            depth=4,
            heads=8,
            mlp_dim=512,
            base_order=1,
            mode=args.mode,  # face, vertex and regular
            samp=samp,
            dropout=0.1,
            emb_dropout=0.1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])

    print("Trainable parameters", params)

    cuda = args.cuda
    epochs = args.epochs
    batch = args.batch
    path = 'weights/'

    train_data = dataset[args.dataset](args.data_dir, 'train', image_size,
                                       image_size, None)
    valid_data = dataset[args.dataset](args.data_dir, 'valid', image_size,
                                       image_size, None)

    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data,
                              batch_size=batch,
                              shuffle=True)

    if cuda:
        model = model.cuda()
    model.train()
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=1e-3)  #, momentum=0.9)

    cla_loss = torch.nn.CrossEntropyLoss()

    valid_loss = 1000
    valid_acc = 0

    print("Training Start")
    T_L = []
    V_L = []
    V_a = []
    for i in range(epochs):
        print("Epoch", i + 1)
        model.train()
        L = []
        for i, data in enumerate(tqdm(train_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            output = cla_loss(preds, target)
            L.append(output.cpu().item())
            output.backward()
            optimizer.step()
            optimizer.zero_grad()

        T_L.append(np.mean(L))
        print("train loss:", np.mean(L))

        sum_acc = 0
        total = len(valid_data)
        model.eval()
        for i, data in enumerate(tqdm(valid_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            L.append(cla_loss(preds, target).item())
            probabilities = torch.nn.functional.softmax(preds, dim=1)
            preds = torch.argmax(probabilities, dim=1)
            acc = torch.sum(
                torch.where(preds == target,
                            torch.tensor(1, device=preds.device),
                            torch.tensor(0, device=preds.device)))
            sum_acc += acc

        v_l = np.mean(L)
        v_a = sum_acc.item() / total * 100

        if v_a > valid_acc:
            valid_acc = v_a
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_acc.pth')

        if v_l < valid_loss:
            valid_loss = v_l
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_loss.pth')

        V_L.append(v_l)
        V_a.append(v_a)
        print("val loss:", v_l)
        print("val acc:", v_a)

    print(T_L)
    plt.plot(T_L, label='Total_loss', color='blue')
    plt.plot(V_L, label='Valid_loss', color='red')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("loss")
    plt.savefig(path + args.exp_id + 'Learning_Curves.png')
    plt.clf()
    plt.plot(V_a, label='Valid_acc', color='cyan')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("accuracy")
    plt.savefig(path + args.exp_id + 'Val_acc.png')

    torch.save(
        {
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, path + args.exp_id + 'model_last.pth')
Exemple #4
0
        test_preds = v(test_img).view(-1, 32, 2)

        for k in range(test_preds.shape[0]):
            test_pred, test_pred_label = confident_strategy(test_preds[k])
            all_test_preds.append(test_pred_label)
            all_labels.append(int(test_label[k].cpu()))
            total_test_loss += bce(test_pred, test_label[k]).detach() #total_test_loss += bce(sigmoid(test_pred), test_label[k]).detach()
            TP_test, FN_test, FP_test, TN_test = confusion_matrix_c(test_pred_label, int(test_label[k].cpu()), TP_test,
                                                                    FN_test, FP_test, TN_test)
    '''       
    test_loss = criterion(test_preds, test_label)
    test_output = torch.argmax(test_preds, dim=1)
    test_correct = (test_output == test_label).float().sum()
    '''
    if best_log_loss > (total_test_loss/len(test_dataloader)):
        torch.save(v.state_dict(), 'best_model.pt')
        best_log_loss = (total_test_loss/len(test_dataloader))

    test_accuracy = (TP_test + TN_test) / (TP_test + FN_test + FP_test + TN_test + 2e-5)
    test_precision = TP_test / (TP_test + FP_test + 2e-5)
    test_recall = TP_test / (TP_test + FN_test + 2e-5)
    test_f1_score = 2 * ((test_precision * test_recall) / (test_precision + test_recall + 2e-5))
    # writer
    print("{} Test Log Loss: {:.3f}, Accuracy: {:.3f}, Precision: {:.3f}, Recall: {:.3f}".format(i+1,
                                                                                                 (total_test_loss/len(test_dataloader)),
                                                                                                 test_accuracy,
                                                                                                 test_precision,
                                                                                                 test_recall))
    writer.add_scalar('test_epoch_log_loss', (total_test_loss/len(test_dataloader)), i + 1)
    writer.add_scalar('test_epoch_accuracy', test_accuracy, i + 1)
    writer.add_scalar('test_epoch_precision', test_precision, i + 1)
                    val_loss += criterion(val_preds, val_labels)

                    ##### TP, FN, FP, TN #####
                    TP_val, FN_val, FP_val, TN_val = confusion_matrix_c(
                        val_preds, val_labels)

                    total_acc += TP_val + TN_val
                    val_accuracy = (TP_val + TN_val) / (TP_val + FN_val +
                                                        FP_val + TN_val)
                    print("[Validation] {} Loss: {:.3f}, Accuracy: {:.3f}".
                          format(k, val_loss.data, val_accuracy))

                ##### Save best model #####:
                total_acc /= len(val_set)
                if best_acc < total_acc:
                    torch.save(v.state_dict(),
                               'best_model+' + str(iter) + '.pt')
                    best_acc = total_acc
                    print("===> Best model saved in epoch:", i, ", iter:",
                          iter, ", acc:", total_acc)

                # writer
                # writer.add_scalar('test_epoch_loss', test_loss.data, j)
                writer.add_scalar('val_accuracy', total_acc, iter)
                # writer.add_scalar('test_epoch_precision', test_precision, j)
                # writer.add_scalar('test_epoch_recall', test_recall, j)
                # writer.add_scalar('test_epoch_f1score', test_f1_score, j)
                # writer.add_hparams({"test_TP": TP_test, "test_TN": TN_test, "test_FP": FP_test, "test_FN": FN_test})

train_iter = iter
##### TEST #####
Exemple #6
0
patch_size = 16
num_layers = 12
# print(pretain_tf_model.keys())
# print_size(pretain_tf_model['pre_logits'])

v = ViT(image_size=input_size,
        patch_size=patch_size,
        num_classes=1000,
        depth=num_layers,
        heads=12,
        mlp_dim=3072,
        dropout=0.1,
        emb_dropout=0.1)

print("Model's state_dict:")
for param_tensor in v.state_dict():
    print(param_tensor, "\t", v.state_dict()[param_tensor].size())

## copy embedding
tf_dict = {}

embedding_weight_shape = pretain_tf_model['embedding']['kernel'].shape
embedding_weight = np.array(
    jnp.transpose(pretain_tf_model['embedding']['kernel'], (3, 2, 0, 1)))
# embedding_weight = pretain_tf_model['embedding']['kernel'].reshape([embedding_weight_shape[3],embedding_weight_shape[2],embedding_weight_shape[1],embedding_weight_shape[0]])
tf_dict['embedding.weight'] = torch.from_numpy(embedding_weight)
tf_dict['embedding.bias'] = torch.from_numpy(
    pretain_tf_model['embedding']['bias'])

## copy mlp_head