model_frozen.eval()
        model.freeze_encoder()
        loss_function = return_loss_function(model_frozen)
        mean_epoch_loss, validation_loss = \
            decoder_step(model, loss_function, optimizer, data_loader_train, data_loader_val, device)
        print('         autoencoder loss: {0:2.5f}, BCE val: {1:2.5f}'.format(
            mean_epoch_loss, validation_loss))


if __name__ == "__main__":
    device = conf['train']['device']

    model = AutoEncoder(in_channels=1,
                        dec_channels=1,
                        latent_size=conf['model']['latent_size'])
    model = model.to(device)
    model.load_state_dict(torch.load(load_path))

    dataset_path = os.path.join(conf['data']['dataset_path'],
                                conf['data']['dataset_file'])
    dspites_dataset = Dspites(dataset_path)
    train_val = train_val_split(dspites_dataset)
    val_test = train_val_split(train_val['val'], val_split=0.2)

    data_loader_train = DataLoader(train_val['train'],
                                   batch_size=conf['train']['batch_size'],
                                   shuffle=True,
                                   num_workers=2)
    data_loader_val = DataLoader(val_test['val'],
                                 batch_size=200,
                                 shuffle=False,
config = Config()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

autoencoder = AutoEncoder(config)
siamese_network = SiameseNetwork(config)

autoencoder_file = '/autoencoder_epoch175_loss1.1991.pth'
siamese_file = '/siamese_network_epoch175_loss1.1991.pth'

if config.load_model:
    autoencoder.load_state_dict(torch.load(config.saved_models_folder + autoencoder_file))
    siamese_network.load_state_dict(torch.load(config.saved_models_folder + siamese_file))

autoencoder.to(device)
autoencoder.train()

siamese_network.to(device)
siamese_network.train()

params = list(autoencoder.parameters()) + list(siamese_network.parameters())

optimizer = torch.optim.Adam(params, lr=config.lr, betas=(0.9, 0.999))

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    # transforms.RandomCrop(size=128),
    # transforms.RandomRotation(degrees=10),
    transforms.ToTensor(),
])
def main():
    with open("config.json") as json_file:
        conf = json.load(json_file)
    dataset_path = os.path.join(conf['data']['dataset_path'],
                                conf['data']['dataset_file'])
    device = conf['train']['device']

    model = AutoEncoder(in_channels=1,
                        dec_channels=1,
                        latent_size=conf['model']['latent_size'])
    model = model.to(device)
    model.load_state_dict(torch.load(load_path))

    dspites_dataset = Dspites(dataset_path)
    train_val = train_val_split(dspites_dataset)
    val_test = train_val_split(train_val['val'], val_split=0.2)

    data_loader_train = DataLoader(train_val['train'],
                                   batch_size=conf['train']['batch_size'],
                                   shuffle=True,
                                   num_workers=2)
    data_loader_val = DataLoader(val_test['val'],
                                 batch_size=200,
                                 shuffle=False,
                                 num_workers=1)
    data_loader_test = DataLoader(val_test['train'],
                                  batch_size=200,
                                  shuffle=False,
                                  num_workers=1)

    print('autoencoder training')
    print('frozen encoder: ', freeze_encoder)
    print('train dataset length: ', len(train_val['train']))
    print('val dataset length: ', len(val_test['val']))
    print('test dataset length: ', len(val_test['train']))

    print('latent space size:', conf['model']['latent_size'])
    print('batch size:', conf['train']['batch_size'])

    loss_function = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    model.train()
    if freeze_encoder:
        model.freeze_encoder()

    for epoch in range(25):
        if epoch > 15:
            for param in optimizer.param_groups:
                param['lr'] = max(0.00001,
                                  param['lr'] / conf['train']['lr_decay'])
                print('lr: ', param['lr'])

        loss_list = []
        model.train()

        for batch_i, batch in enumerate(data_loader_train):
            augment_transform = np.random.choice(augment_transform_list1)
            batch1 = image_batch_transformation(batch, augment_transform)
            loss = autoencoder_step(model, batch, device, loss_function)
            loss_list.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        mean_epoch_loss = sum(loss_list) / len(loss_list)
        model.eval()
        validation_loss = autoencoder_validation(data_loader_val, model,
                                                 device, loss_function)
        if epoch == 0:
            min_validation_loss = validation_loss
        else:
            min_validation_loss = min(min_validation_loss, validation_loss)
        print('epoch {0}, loss: {1:2.5f}, validation: {2:2.5f}'.format(
            epoch, mean_epoch_loss, validation_loss))
        if min_validation_loss == validation_loss:
            #pass
            torch.save(model.state_dict(), save_path)

    model.load_state_dict(torch.load(save_path))
    test_results = autoencoder_validation(data_loader_test, model, device,
                                          loss_function)
    print('test result: ', test_results)
Example #4
0
def main():

    save = "./experiments"
    log_dir = "./experiments"
    input_path = "./data//mnist"
    batch_size = 16
    lr = 1e-3
    latent_size = 12
    n_iter = 10

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    checkpoint_dir = f"{save}/checkpoints/autoencoder"
    os.makedirs(checkpoint_dir, exist_ok=True)

    if device.type == 'cuda':
        log_dir = f"{log_dir}/logs/autoencoder/cuda"
    else:
        log_dir = f"{log_dir}/logs/autoencoder/cpu"
    os.makedirs(log_dir, exist_ok=True)

    checkpoint_path = f'{checkpoint_dir}/checkpoint_' + datetime.now(
    ).strftime('%d_%m_%Y_%H:%M:%S')

    writer = SummaryWriter(log_dir)

    writer = SummaryWriter(log_dir)

    data = MNIST(transform=True,
                 test_size=0.1,
                 train_batch_size=batch_size,
                 input_path=input_path)
    traindata, valdata, testdata = data.data()
    train, val, test = data.loader()

    n = 300
    x, labels = testdata[np.random.randint(0, len(testdata), n)]
    images, labels = torch.from_numpy(x.reshape(
        n, 1, 28, 28)), torch.from_numpy(labels).to(device)
    img_grid = torchvision.utils.make_grid(images)
    # matplotlib_imshow(img_grid, one_channel=True)
    writer.add_image(f'{n}_mnist_images', img_grid)

    images, labels = images.to(device), labels.to(device)

    model = AutoEncoder(28 * 28, latent_size)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    model = model.to(device)

    writer.add_graph(model, images.view(len(images), 28 * 28))

    losses = train_autoencoder(train,
                               test,
                               model,
                               criterion,
                               optimizer,
                               device,
                               checkpoint_path,
                               writer,
                               n_iter=n_iter)

    with torch.no_grad():
        projection = model.encodeur(images.view(len(images), 28 * 28))

    writer.add_embedding(projection, metadata=labels, label_img=images)
Example #5
0
def run(batch_size, epochs, val_split, num_workers, print_every,
        trainval_csv_path, test_csv_path, model_type, tasks, lr, weight_decay,
        momentum, dataset_dir):

    train_dataset = CustomDatasetFromImages(trainval_csv_path,
                                            data_dir=dataset_dir)
    # test_dataset = CustomDatasetFromImages(test_csv_path, data_dir = dataset_dir)

    dset_len = len(train_dataset)
    val_size = int(val_split * dset_len)
    test_size = int(0.15 * dset_len)
    train_size = dset_len - val_size - test_size

    train_data, val_dataset, test_dataset = torch.utils.data.random_split(
        train_dataset, [train_size, val_size, test_size])
    train_loader_small = torch.utils.data.DataLoader(dataset=train_data,
                                                     batch_size=batch_size,
                                                     pin_memory=False,
                                                     drop_last=True,
                                                     shuffle=True,
                                                     num_workers=num_workers)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=2 * batch_size,
                                               pin_memory=False,
                                               drop_last=True,
                                               shuffle=True,
                                               num_workers=num_workers)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=batch_size,
                                             pin_memory=False,
                                             drop_last=True,
                                             shuffle=True,
                                             num_workers=num_workers)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              pin_memory=False,
                                              drop_last=True,
                                              shuffle=True,
                                              num_workers=num_workers)

    if model_type == 'densenet121':
        model = models.densenet121(pretrained=False)
    elif model_type == 'resnet101':
        model = models.resnet101(pretrained=False)
    elif model_type == 'resnet50':
        model = models.resnet50(pretrained=False)
    elif model_type == 'resnet34':
        model = models.resnet34(pretrained=False)
    elif model_type == 'vgg19':
        model = models.vgg19(pretrained=False)

    model = AutoEncoder(model_type, model=model)
    model = nn.DataParallel(model)

    print(model)

    model = model.to('cuda')

    criterion = nn.MSELoss(reduction='sum')

    # =============================== PRE-TRAIN MODEL ========================
    optimizer = torch.optim.SGD(model.parameters(),
                                weight_decay=weight_decay,
                                momentum=momentum,
                                lr=lr,
                                nesterov=True)
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.5,
                                  patience=3,
                                  min_lr=1e-7,
                                  verbose=True)
    trainset_percent = (1 - val_split - 0.15)
    trainer = AutoTrainer(model,
                          optimizer,
                          scheduler,
                          criterion,
                          epochs,
                          print_every=print_every,
                          trainset_split=trainset_percent)
    trainer.train(train_loader, val_loader)
    val_loss = trainer.validate(test_loader)

    with open(trainer.output_log, 'a+') as out:
        print('Test Loss', val_loss, file=out)
Example #6
0
                            shuffle=False,
                            num_workers=opt.workers,
                            pin_memory=True,
                            drop_last=True)

    if opt.model == 'ae':
        net = AutoEncoder(3, n_classes=1, filters=opt.filters)
    elif opt.model == 'unet':
        net = UNet(3, n_classes=1, filters=opt.filters)
    elif opt.model == 'unet3plus':
        net = UNet3Plus(3, n_classes=1, filters=opt.filters)

    if device == torch.device('cuda'):
        net = nn.DataParallel(net, device_ids=[0, 1, 2, 3])
        logger.info(f'use gpu: {net.device_ids}')
    net.to(device=device)

    # optimizer = optim.RMSprop(net.parameters(), lr=opt.lr, weight_decay=1e-8, momentum=0.9)
    optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9)
    criterion = nn.BCEWithLogitsLoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'max',
                                                     patience=opt.patience)

    train_net(net=net,
              device=device,
              train_loader=train_loader,
              val_loader=val_loader,
              epochs=opt.epochs,
              optimizer=optimizer,
              criterion=criterion,
Example #7
0
def main():
    loss_function = nn.BCELoss()

    with open("config.json") as json_file:
        conf = json.load(json_file)
    device = conf['train']['device']

    dataset_path = os.path.join(conf['data']['dataset_path'],
                                conf['data']['dataset_file'])
    dspites_dataset = Dspites(dataset_path)
    train_val = train_val_split(dspites_dataset)
    val_test = train_val_split(train_val['val'], val_split=0.2)

    data_loader_train = DataLoader(train_val['train'],
                                   batch_size=conf['train']['batch_size'],
                                   shuffle=True,
                                   num_workers=2)
    data_loader_val = DataLoader(val_test['val'],
                                 batch_size=200,
                                 shuffle=False,
                                 num_workers=1)
    data_loader_test = DataLoader(val_test['train'],
                                  batch_size=200,
                                  shuffle=False,
                                  num_workers=1)

    print('metric learning')
    print('train dataset length: ', len(train_val['train']))
    print('val dataset length: ', len(val_test['val']))
    print('test dataset length: ', len(val_test['train']))

    print('latent space size:', conf['model']['latent_size'])
    print('batch size:', conf['train']['batch_size'])
    print('margin:', conf['train']['margin'])

    loss_list = []
    model = AutoEncoder(in_channels=1,
                        dec_channels=1,
                        latent_size=conf['model']['latent_size'])
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=conf['train']['lr'])

    model.train()
    if load_path:
        model.load_state_dict(torch.load(load_path))

    for epoch in range(10):
        for param in optimizer.param_groups:
            param['lr'] = max(0.00001, param['lr'] / conf['train']['lr_decay'])
            print('lr: ', param['lr'])
        loss_list = []

        for batch_i, batch in enumerate(data_loader_train):
            # if batch_i == 1000:
            #     break
            batch = batch['image']
            batch = batch.type(torch.FloatTensor)
            batch = batch.to(device)
            loss = triplet_step(model, batch, transform1, transform2)
            loss_list.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        recall, recall10 = recall_validation(model, data_loader_val,
                                             transform1, transform2, device)
        if epoch == 0:
            min_validation_recall = recall
        else:
            min_validation_recall = min(min_validation_recall, recall)
        if min_validation_recall == recall and save_path:
            torch.save(model.state_dict(), save_path)
        print('epoch {0}, loss {1:2.4f}'.format(
            epoch,
            sum(loss_list) / len(loss_list)))
        print('recall@3: {0:2.4f}, recall 10%: {1:2.4f}'.format(
            recall, recall10))

    model.load_state_dict(torch.load(save_path))
    recall, recall10 = recall_validation(model, data_loader_test, transform1,
                                         transform2)
    print('test recall@3: {0:2.4f}, recall@3 10%: {1:2.4f}'.format(
        recall, recall10))
Example #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--filename', required=True, help='Name/path of file')
    parser.add_argument('--save_dir', default='./outputs', help='Path to dictionary where will be save results.')

    parser.add_argument('--pretrain_epochs', type=int, default=100, help="Number of epochs to pretrain model AE")
    parser.add_argument('--epochs', type=int, default=100, help="Number of epochs to train AE and classifier")
    parser.add_argument('--dims_layers_ae', type=int, nargs='+', default=[500, 100, 10],
                        help="Dimensional of layers in AE")
    parser.add_argument('--dims_layers_classifier', type=int, nargs='+', default=[10, 5],
                        help="Dimensional of layers in classifier")
    parser.add_argument('--batch_size', type=int, default=50)
    parser.add_argument('--lr', type=float, default=0.001, help="Learning rate")
    parser.add_argument('--use_dropout', action='store_true', help="Use dropout")

    parser.add_argument('--no-cuda', action='store_true', help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1234, help='random seed (default: 1)')

    parser.add_argument('--procedure', nargs='+', choices=['pre-training_ae', 'training_classifier', 'training_all'],
                        help='Procedure which you can use. Choice from: pre-training_ae, training_all, '
                             'training_classifier')
    parser.add_argument('--criterion_classifier', default='BCELoss', choices=['BCELoss', 'HingeLoss'],
                        help='Kind of loss function')
    parser.add_argument('--scale_loss', type=float, default=1., help='Weight for loss of classifier')
    parser.add_argument('--earlyStopping', type=int, default=None, help='Number of epochs to early stopping')
    parser.add_argument('--use_scheduler', action='store_true')
    args = parser.parse_args()
    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    loaded = np.load(args.filename)
    x_train = loaded[f'data_train']
    x_test = loaded[f'data_test']
    y_train = loaded[f'lab_train']
    y_test = loaded[f'lab_test']
    del loaded

    name_target = PurePosixPath(args.filename).parent.stem
    n_split = PurePosixPath(args.filename).stem
    save_dir = f'{args.save_dir}/tensorboard/{name_target}_{n_split}'
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    if args.dims_layers_classifier[0] == -1:
        args.dims_layers_classifier[0] = x_test.shape[1]

    model_classifier = Classifier(args.dims_layers_classifier, args.use_dropout).to(device)
    if args.criterion_classifier == 'HingeLoss':
        criterion_classifier = nn.HingeEmbeddingLoss()
        print('Use "Hinge" loss.')
    else:
        criterion_classifier = nn.BCEWithLogitsLoss()

    model_ae = None
    criterion_ae = None
    if 'training_classifier' != args.procedure[0]:
        args.dims_layers_ae = [x_train.shape[1]] + args.dims_layers_ae
        assert args.dims_layers_ae[-1] == args.dims_layers_classifier[0], \
            'Dimension of latent space must be equal with dimension of input classifier!'

        model_ae = AutoEncoder(args.dims_layers_ae, args.use_dropout).to(device)
        criterion_ae = nn.MSELoss()
        optimizer = torch.optim.Adam(list(model_ae.parameters()) + list(model_classifier.parameters()), lr=args.lr)
    else:
        optimizer = torch.optim.Adam(model_classifier.parameters(), lr=args.lr)

    scheduler = None
    if args.use_scheduler:
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ep: 0.95)

    writer = SummaryWriter(save_dir)

    total_scores = {'roc_auc': 0, 'acc': 0, 'mcc': 0, 'bal': 0, 'recall': 0,
                    'max_roc_auc': 0, 'max_acc': 0, 'max_mcc': 0, 'max_bal': 0, 'max_recall': 0,
                    'pre-fit_time': 0, 'pre-score_time': 0, 'fit_time': 0, 'score_time': 0
                    }

    dir_model_ae = f'{args.save_dir}/models_AE'
    Path(dir_model_ae).mkdir(parents=True, exist_ok=True)
    # dir_model_classifier = f'{args.save_dir}/models_classifier'
    # Path(dir_model_classifier).mkdir(parents=True, exist_ok=True)

    path_ae = f'{dir_model_ae}/{name_target}_{n_split}.pth'
    # path_classifier = f'{dir_model_classifier}/{name_target}_{n_split}.pth'

    if 'pre-training_ae' in args.procedure:
        min_val_loss = np.Inf
        epochs_no_improve = 0

        epoch_tqdm = tqdm(range(args.pretrain_epochs), desc="Epoch pre-train loss")
        for epoch in epoch_tqdm:
            loss_train, time_trn = train_step(model_ae, None, criterion_ae, None, optimizer, scheduler, x_train,
                                              y_train, device, writer, epoch, args.batch_size, 'pre-training_ae')
            loss_test, _ = test_step(model_ae, None, criterion_ae, None, x_test, y_test, device, writer, epoch,
                                     args.batch_size, 'pre-training_ae')

            if not np.isfinite(loss_train):
                break

            total_scores['pre-fit_time'] += time_trn

            if loss_test < min_val_loss:
                torch.save(model_ae.state_dict(), path_ae)
                epochs_no_improve = 0
                min_val_loss = loss_test
            else:
                epochs_no_improve += 1
            epoch_tqdm.set_description(
                f"Epoch pre-train loss: {loss_train:.5f}, test loss: {loss_test:.5f} (minimal val-loss: {min_val_loss:.5f}, stop: {epochs_no_improve}|{args.earlyStopping})")
            if args.earlyStopping is not None and epoch >= args.earlyStopping and epochs_no_improve == args.earlyStopping:
                print('\033[1;31mEarly stopping in pre-training model\033[0m')
                break
        print(f"\033[1;5;33mLoad model AE form '{path_ae}'\033[0m")
        if device.type == "cpu":
            model_ae.load_state_dict(torch.load(path_ae, map_location=lambda storage, loc: storage))
        else:
            model_ae.load_state_dict(torch.load(path_ae))
        model_ae = model_ae.to(device)
        model_ae.eval()

    min_val_loss = np.Inf
    epochs_no_improve = 0

    epoch = None
    stage = 'training_classifier' if 'training_classifier' in args.procedure else 'training_all'
    epoch_tqdm = tqdm(range(args.epochs), desc="Epoch train loss")
    for epoch in epoch_tqdm:
        loss_train, time_trn = train_step(model_ae, model_classifier, criterion_ae, criterion_classifier, optimizer,
                                          scheduler, x_train, y_train, device, writer, epoch, args.batch_size,
                                          stage, args.scale_loss)
        loss_test, scores_val, time_tst = test_step(model_ae, model_classifier, criterion_ae, criterion_classifier,
                                                    x_test, y_test, device, writer, epoch, args.batch_size, stage,
                                                    args.scale_loss)

        if not np.isfinite(loss_train):
            break

        total_scores['fit_time'] += time_trn
        total_scores['score_time'] += time_tst
        if total_scores['max_roc_auc'] < scores_val['roc_auc']:
            for key, val in scores_val.items():
                total_scores[f'max_{key}'] = val

        if loss_test < min_val_loss:
            # torch.save(model_ae.state_dict(), path_ae)
            # torch.save(model_classifier.state_dict(), path_classifier)
            epochs_no_improve = 0
            min_val_loss = loss_test
            for key, val in scores_val.items():
                total_scores[key] = val
        else:
            epochs_no_improve += 1
        epoch_tqdm.set_description(
            f"Epoch train loss: {loss_train:.5f}, test loss: {loss_test:.5f} (minimal val-loss: {min_val_loss:.5f}, stop: {epochs_no_improve}|{args.earlyStopping})")
        if args.earlyStopping is not None and epoch >= args.earlyStopping and epochs_no_improve == args.earlyStopping:
            print('\033[1;31mEarly stopping!\033[0m')
            break
    total_scores['score_time'] /= epoch + 1
    writer.close()

    save_file = f'{args.save_dir}/{name_target}.txt'
    head = 'idx;params'
    temp = f'{n_split};pretrain_epochs:{args.pretrain_epochs},dims_layers_ae:{args.dims_layers_ae},' \
           f'dims_layers_classifier:{args.dims_layers_classifier},batch_size:{args.batch_size},lr:{args.lr}' \
           f'use_dropout:{args.use_dropout},procedure:{args.procedure},scale_loss:{args.scale_loss},' \
           f'earlyStopping:{args.earlyStopping}'
    for key, val in total_scores.items():
        head = head + f';{key}'
        temp = temp + f';{val}'

    not_exists = not Path(save_file).exists()
    with open(save_file, 'a') as f:
        if not_exists:
            f.write(f'{head}\n')
        f.write(f'{temp}\n')