def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    root = "./datasets"

    train_sampler = None

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            root, split="unlabeled", download=True, transform=TransformsSimCLR()
        )
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            root, download=True, transform=TransformsSimCLR()
        )
    else:
        raise NotImplementedError

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    model, optimizer, scheduler = load_model(args, train_loader)

    tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"])
    os.makedirs(tb_dir)
    writer = SummaryWriter(log_dir=tb_dir)

    mask = mask_correlated_samples(args)
    criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        lr = optimizer.param_groups[0]['lr']
        loss_epoch = train(args, train_loader, model, criterion, optimizer, writer)

        if scheduler:
            scheduler.step()

        if epoch % 10 == 0:
            save_model(args, model, optimizer)

        writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch)
        writer.add_scalar("Misc/learning_rate", lr, epoch)
        print(
            f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}"
        )
        args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
예제 #2
0
파일: main.py 프로젝트: kmalhotra30/SimCLR
def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    root = "./datasets"
    model = load_model(args)
    model = model.to(args.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)  # TODO: LARS

    train_sampler = None
    train_dataset = torchvision.datasets.STL10(root,
                                               split="unlabeled",
                                               download=True,
                                               transform=TransformsSimCLR())

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"])
    os.makedirs(tb_dir)
    writer = SummaryWriter(log_dir=tb_dir)

    mask = mask_correlated_samples(args)
    criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        loss_epoch = train(args, train_loader, model, criterion, optimizer,
                           writer)

        writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch)
        if epoch % 10 == 0:
            save_model(args, model, optimizer)

        print(
            f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}"
        )
        args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
예제 #3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="SimCLR")
    config = yaml_config_hook("./config/config.yaml")
    for k, v in config.items():
        parser.add_argument(f"--{k}", default=v, type=type(v))

    args = parser.parse_args()
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            args.dataset_dir,
            split="train",
            download=True,
            transform=TransformsSimCLR(size=args.image_size).test_transform,
        )
        test_dataset = torchvision.datasets.STL10(
            args.dataset_dir,
            split="test",
            download=True,
            transform=TransformsSimCLR(size=args.image_size).test_transform,
        )
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            args.dataset_dir,
            train=True,
            download=True,
            transform=TransformsSimCLR(size=args.image_size).test_transform,
        )
        test_dataset = torchvision.datasets.CIFAR10(
예제 #4
0
#pprint(vars(args))

image_folder = 'data'
annotation_csv = 'data/annotation.csv'

unlabeled_scene_index = np.arange(106)

root = "./datasets"

train_sampler = None

if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(root,
                                               split="unlabeled",
                                               download=True,
                                               transform=TransformsSimCLR())
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(root,
                                                 download=True,
                                                 transform=TransformsSimCLR())
elif args.dataset == "road":
    train_dataset = SimclrUnlabeledDataset(image_folder=image_folder,
                                           scene_index=unlabeled_scene_index,
                                           first_dim='sample',
                                           transform=TransformsSimCLR())
else:
    raise NotImplementedError

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
예제 #5
0
def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    root = "./datasets"
    train_sampler = None
    valid_sampler = None

    if args.dataset == "STL10":
        dataset = torchvision.datasets.STL10(
            root,
            split="train",
            download=True,
            transform=TransformsSimCLR(size=224).test_transform,
        )
        test_dataset = torchvision.datasets.STL10(
            root,
            split="test",
            download=True,
            transform=TransformsSimCLR(size=224).test_transform,
        )
    elif args.dataset == "CIFAR10":
        dataset = torchvision.datasets.CIFAR10(
            root,
            train=True,
            download=True,
            transform=TransformsSimCLR(size=224).test_transform,
        )
        test_dataset = torchvision.datasets.CIFAR10(
            root,
            train=False,
            download=True,
            transform=TransformsSimCLR(size=224).test_transform,
        )
    elif args.dataset == "MATEK":
        dataset, train_sampler, valid_sampler = MatekDataset(
            root=root,
            transforms=TransformsSimCLR(size=128).test_transform,
            test_size=args.test_size).get_dataset()
    elif args.dataset == "JURKAT":
        dataset, train_sampler, valid_sampler = JurkatDataset(
            root=root,
            transforms=TransformsSimCLR(size=64).test_transform,
            test_size=args.test_size).get_dataset()
    elif args.dataset == "PLASMODIUM":
        dataset, train_sampler, valid_sampler = PlasmodiumDataset(
            root=root,
            transforms=TransformsSimCLR(size=128).test_transform,
            test_size=args.test_size).get_dataset()
    else:
        raise NotImplementedError

    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.logistic_batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    test_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.logistic_batch_size,
        shuffle=(valid_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=valid_sampler,
    )

    simclr_model, _, _ = load_model(args, train_loader, reload_model=True)
    simclr_model = simclr_model.to(args.device)
    simclr_model.eval()

    # Logistic Regression
    n_classes = args.n_classes  # stl-10
    model = LogisticRegression(simclr_model.n_features, n_classes)
    model = model.to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    criterion = torch.nn.CrossEntropyLoss()

    print("### Creating features from pre-trained context model ###")
    (train_X, train_y, test_X, test_y) = get_features(simclr_model,
                                                      train_loader,
                                                      test_loader, args.device)

    arr_train_loader, arr_test_loader = create_data_loaders_from_arrays(
        train_X, train_y, test_X, test_y, args.logistic_batch_size)

    for epoch in range(args.logistic_epochs):
        loss_epoch, accuracy_epoch = train(args, arr_train_loader,
                                           simclr_model, model, criterion,
                                           optimizer)
        print(
            f"Epoch [{epoch}/{args.logistic_epochs}]\t Loss: {loss_epoch / len(train_loader)}\t Accuracy: {accuracy_epoch / len(train_loader)}"
        )

    # final testing
    loss_epoch, accuracy_epoch, report = test(args, arr_test_loader,
                                              simclr_model, model, criterion,
                                              optimizer)
    print(
        f"[FINAL]\t Loss: {loss_epoch / len(test_loader)}\t Accuracy: {accuracy_epoch / len(test_loader)}"
    )

    print(report)
예제 #6
0
파일: main.py 프로젝트: AhmadQasim/SimCLR
def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")
    args.n_gpu = torch.cuda.device_count()

    train_sampler = None

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            root=args.dataset_root,
            split="unlabeled",
            download=True,
            transform=TransformsSimCLR(size=96))
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            root=args.dataset_root,
            download=True,
            transform=TransformsSimCLR(size=32))
    elif args.dataset == "MATEK":
        train_dataset, _ = MatekDataset(
            root=args.dataset_root,
            transforms=TransformsSimCLR(size=128)).get_dataset()
    elif args.dataset == "JURKAT":
        train_dataset, _ = JurkatDataset(
            root=args.dataset_root,
            transforms=TransformsSimCLR(size=64)).get_dataset()
    elif args.dataset == "PLASMODIUM":
        train_dataset, _ = PlasmodiumDataset(
            root=args.dataset_root,
            transforms=TransformsSimCLR(size=128)).get_dataset()
    else:
        raise NotImplementedError

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    model, optimizer, scheduler = load_model(args, train_loader)

    print(f"Using {args.n_gpu}'s")
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
        model = convert_model(model)
        model = model.to(args.device)

    print(model)

    tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"])
    os.makedirs(tb_dir)
    writer = SummaryWriter(log_dir=tb_dir)

    criterion = NT_Xent(args.batch_size, args.temperature, args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        lr = optimizer.param_groups[0]['lr']
        loss_epoch = train(args, train_loader, model, criterion, optimizer,
                           writer)

        if scheduler:
            scheduler.step()

        if epoch % 10 == 0:
            save_model(args, model, optimizer)

        writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch)
        writer.add_scalar("Misc/learning_rate", lr, epoch)
        print(
            f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}"
        )
        args.current_epoch += 1

    save_model(args, model, optimizer)
예제 #7
0
def main(gpu, args):
    rank = args.nr * args.gpus + gpu
    dist.init_process_group("nccl", rank=rank, world_size=args.world_size)

    torch.manual_seed(0)
    torch.cuda.set_device(gpu)

    # dataset
    train_dataset = datasets.CIFAR10(
        args.dataset_dir,
        download=True,
        transform=TransformsSimCLR(size=args.image_size),  # paper 224
    )

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=args.world_size, rank=rank)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.num_workers,
        pin_memory=True,
        sampler=train_sampler,
    )

    # model
    if args.resnet_version == "resnet18":
        resnet = models.resnet18(pretrained=False)
    elif args.resnet_version == "resnet50":
        resnet = models.resnet50(pretrained=False)
    else:
        raise NotImplementedError("ResNet not implemented")

    model = BYOL(resnet, image_size=args.image_size, hidden_layer="avgpool")
    model = model.cuda(gpu)

    # distributed data parallel
    model = DDP(model, device_ids=[gpu], find_unused_parameters=True)

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # TensorBoard writer

    if gpu == 0:
        writer = SummaryWriter()

    # solver
    global_step = 0
    for epoch in range(args.num_epochs):
        metrics = defaultdict(list)
        for step, ((x_i, x_j), _) in enumerate(train_loader):
            x_i = x_i.cuda(non_blocking=True)
            x_j = x_j.cuda(non_blocking=True)

            loss = model(x_i, x_j)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            model.module.update_moving_average(
            )  # update moving average of target encoder

            if step % 1 == 0 and gpu == 0:
                print(
                    f"Step [{step}/{len(train_loader)}]:\tLoss: {loss.item()}")

            if gpu == 0:
                writer.add_scalar("Loss/train_step", loss, global_step)
                metrics["Loss/train"].append(loss.item())
                global_step += 1

        if gpu == 0:
            # write metrics to TensorBoard
            for k, v in metrics.items():
                writer.add_scalar(k, np.array(v).mean(), epoch)

            if epoch % args.checkpoint_epochs == 0:
                if gpu == 0:
                    print(f"Saving model at epoch {epoch}")
                    torch.save(resnet.state_dict(), f"./model-{epoch}.pt")

                # let other workers wait until model is finished
                # dist.barrier()

    # save your improved network
    if gpu == 0:
        torch.save(resnet.state_dict(), "./model-final.pt")

    cleanup()
예제 #8
0
def main(gpu, args):
    rank = args.nr * args.gpus + gpu

    if args.nodes > 1:
        dist.init_process_group("nccl", rank=rank, world_size=args.world_size)
        torch.cuda.set_device(gpu)

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            args.dataset_dir,
            split="unlabeled",
            download=True,
            transform=TransformsSimCLR(size=args.image_size),
        )
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            args.dataset_dir,
            download=True,
            transform=TransformsSimCLR(size=args.image_size),
        )
    else:
        raise NotImplementedError

    if args.nodes > 1:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=args.world_size,
            rank=rank,
            shuffle=True)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    # initialize ResNet
    encoder = get_resnet(args.resnet, pretrained=False)
    n_features = encoder.fc.in_features  # get dimensions of fc layer

    # initialize model
    model = SimCLR(args, encoder, n_features)
    if args.reload:
        model_fp = os.path.join(args.model_path,
                                "checkpoint_{}.tar".format(args.epoch_num))
        model.load_state_dict(
            torch.load(model_fp, map_location=args.device.type))
    model = model.to(args.device)

    # optimizer / loss
    optimizer, scheduler = load_optimizer(args, model)
    criterion = NT_Xent(args.batch_size, args.temperature, args.device,
                        args.world_size)

    # DDP / DP
    if args.dataparallel:
        model = convert_model(model)
        model = DataParallel(model)
    else:
        if args.nodes > 1:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = DDP(model, device_ids=[gpu])

    model = model.to(args.device)

    writer = None
    if args.nr == 0:
        writer = SummaryWriter()

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        lr = optimizer.param_groups[0]["lr"]
        loss_epoch = train(args, train_loader, model, criterion, optimizer,
                           writer)

        if args.nr == 0 and scheduler:
            scheduler.step()

        if args.nr == 0 and epoch % 10 == 0:
            save_model(args, model, optimizer)

        if args.nr == 0:
            writer.add_scalar("Loss/train", loss_epoch / len(train_loader),
                              epoch)
            writer.add_scalar("Misc/learning_rate", lr, epoch)
            print(
                f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}"
            )
            args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
예제 #9
0
def main(gpu, args):
    rank = args.nr * args.gpus + gpu

    if args.nodes > 1:
        dist.init_process_group("nccl", rank=rank, world_size=args.world_size)
        torch.cuda.set_device(gpu)

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            args.dataset_dir,
            split="unlabeled",
            download=True,
            transform=TransformsSimCLR(size=args.image_size),
        )
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            args.dataset_dir,
            download=True,
            transform=TransformsSimCLR(size=args.image_size),
        )
    else:
        raise NotImplementedError

    if args.nodes > 1:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=args.world_size,
            rank=rank,
            shuffle=True)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    # initialize ResNet
    encoder = get_resnet(args.resnet, pretrained=False)
    n_features = encoder.fc.in_features  # get dimensions of fc layer

    # initialize model
    model = SimCLR(args, encoder, n_features)
    if args.reload:
        model_fp = os.path.join(args.model_path,
                                "checkpoint_{}.tar".format(args.epoch_num))
        print(model_fp)
        model.load_state_dict(
            torch.load(model_fp, map_location=args.device.type))
    model = model.to(args.device)

    # optimizer / loss
    optimizer, scheduler = load_optimizer(args, model)
    criterion = NT_Xent(args.batch_size, args.temperature, args.device,
                        args.world_size)

    # DDP / DP
    if args.dataparallel:
        model = convert_model(model)
        model = DataParallel(model)
    else:
        if args.nodes > 1:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = DDP(model, device_ids=[gpu])

    model = model.to(args.device)

    writer = None
    if args.nr == 0:
        writer = SummaryWriter()

    #added by @IvanKruzhilov
    decoder = Decoder(3, 3, args.image_size)
    optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=0.001)
    #decoder.load_state_dict(torch.load('save/decoder_my_algorithm_augmented.pt'))
    decoder = decoder.to(args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        lr = optimizer.param_groups[0]["lr"]
        scatter_radius = 0.2
        random_fake = None  #set in train fucntion now

        loss_epoch, loss_epoch_decoder, penalty_epoch = \
            train(args, train_loader, model, decoder, criterion, optimizer, \
            optimizer_decoder, writer, random_fake, scatter_radius)

        loss_mean, bce_mean = train_autoencoder(model, decoder, train_loader, None, \
                                                optimizer_decoder, freeze_encoder=True)

        if args.nr == 0 and scheduler:
            scheduler.step()

        if args.nr == 0 and epoch % 5 == 0:
            save_model(args, model, optimizer)
            torch.save(
                decoder.state_dict(),
                os.path.join(args.model_path, 'decoder{0}.pt'.format(epoch)))

        if epoch % 10 == 0:
            decoder = Decoder(3, 3, args.image_size)
            optimizer_decoder = torch.optim.Adam(decoder.parameters(),
                                                 lr=0.001)
            decoder = decoder.to(args.device)

        if args.nr == 0:
            writer.add_scalar("Loss/train", loss_epoch / len(train_loader),
                              epoch)
            writer.add_scalar("Misc/learning_rate", lr, epoch)
            mean_loss = loss_epoch / len(train_loader)
            mean_loss_decoder = loss_epoch_decoder / len(train_loader)
            mean_penalty = penalty_epoch / len(train_loader)
            print(
                f"Epoch [{epoch}/{args.epochs}]\t Loss: {mean_loss}\t decoder loss: {mean_loss_decoder}\t \
                penalty: {mean_penalty}\t lr: {round(lr, 5)}")
            print('loss: ', loss_mean, 'mse: ', bce_mean)
            args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
예제 #10
0
    # load pre-trained model from checkpoint
    simclr_model = SimCLR(args, encoder, n_features)
    model_fp = os.path.join(args.model_path,
                            "model{}.tar".format(args.model_num))
    simclr_model.load_state_dict(
        torch.load(model_fp, map_location=args.device.type))
    simclr_model = simclr_model.to(args.device)
    simclr_model.eval()

    n_classes = 3  # covid, healthy, other
    patience = 20

    # 5-fold cross validation
    merge_data = torchvision.datasets.ImageFolder(
        '/home/opticho/source/SimCLR/datasets/dataset2(1)/train',
        transform=TransformsSimCLR(size=(args.image_size,
                                         args.image_size)).test_transform)
    test_dataset = torchvision.datasets.ImageFolder(
        '/home/opticho/source/SimCLR/datasets/dataset2(1)/test',
        transform=TransformsSimCLR(size=(args.image_size,
                                         args.image_size)).test_transform)

    k_fold_split = KFoldSplit(5, merge_data, random_state=args.seed)
    test_loss, test_accuracy = [], []
    models = []

    for fold, (train_dataset, valid_dataset) in enumerate(k_fold_split):

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.logistic_batch_size,
            shuffle=True,
예제 #11
0
import numpy as np
from data_helper import SimclrUnlabeledDataset
from helper import convert_map_to_lane_map, convert_map_to_road_map, collate_fn, draw_box

unlabeled_scene_index = np.arange(106)

"""### Load dataset into train loader"""

root = "./datasets"

train_sampler = None

if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(
        root, split="unlabeled", download=True, transform=TransformsSimCLR()
    )
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(
        root, download=True, transform=TransformsSimCLR()
    )
elif args.dataset == "road":
    train_dataset = SimclrUnlabeledDataset(image_folder=image_folder, 
      scene_index=unlabeled_scene_index, first_dim='sample', transform=TransformsSimCLR())
else:
    raise NotImplementedError

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=(train_sampler is None),