def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") root = "./datasets" train_sampler = None if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( root, split="unlabeled", download=True, transform=TransformsSimCLR() ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( root, download=True, transform=TransformsSimCLR() ) else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) model, optimizer, scheduler = load_model(args, train_loader) tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"]) os.makedirs(tb_dir) writer = SummaryWriter(log_dir=tb_dir) mask = mask_correlated_samples(args) criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): lr = optimizer.param_groups[0]['lr'] loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) if scheduler: scheduler.step() if epoch % 10 == 0: save_model(args, model, optimizer) writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) writer.add_scalar("Misc/learning_rate", lr, epoch) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}" ) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") root = "./datasets" model = load_model(args) model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) # TODO: LARS train_sampler = None train_dataset = torchvision.datasets.STL10(root, split="unlabeled", download=True, transform=TransformsSimCLR()) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"]) os.makedirs(tb_dir) writer = SummaryWriter(log_dir=tb_dir) mask = mask_correlated_samples(args) criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) if epoch % 10 == 0: save_model(args, model, optimizer) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}" ) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
if __name__ == "__main__": parser = argparse.ArgumentParser(description="SimCLR") config = yaml_config_hook("./config/config.yaml") for k, v in config.items(): parser.add_argument(f"--{k}", default=v, type=type(v)) args = parser.parse_args() args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( args.dataset_dir, split="train", download=True, transform=TransformsSimCLR(size=args.image_size).test_transform, ) test_dataset = torchvision.datasets.STL10( args.dataset_dir, split="test", download=True, transform=TransformsSimCLR(size=args.image_size).test_transform, ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( args.dataset_dir, train=True, download=True, transform=TransformsSimCLR(size=args.image_size).test_transform, ) test_dataset = torchvision.datasets.CIFAR10(
#pprint(vars(args)) image_folder = 'data' annotation_csv = 'data/annotation.csv' unlabeled_scene_index = np.arange(106) root = "./datasets" train_sampler = None if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10(root, split="unlabeled", download=True, transform=TransformsSimCLR()) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10(root, download=True, transform=TransformsSimCLR()) elif args.dataset == "road": train_dataset = SimclrUnlabeledDataset(image_folder=image_folder, scene_index=unlabeled_scene_index, first_dim='sample', transform=TransformsSimCLR()) else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size,
def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") root = "./datasets" train_sampler = None valid_sampler = None if args.dataset == "STL10": dataset = torchvision.datasets.STL10( root, split="train", download=True, transform=TransformsSimCLR(size=224).test_transform, ) test_dataset = torchvision.datasets.STL10( root, split="test", download=True, transform=TransformsSimCLR(size=224).test_transform, ) elif args.dataset == "CIFAR10": dataset = torchvision.datasets.CIFAR10( root, train=True, download=True, transform=TransformsSimCLR(size=224).test_transform, ) test_dataset = torchvision.datasets.CIFAR10( root, train=False, download=True, transform=TransformsSimCLR(size=224).test_transform, ) elif args.dataset == "MATEK": dataset, train_sampler, valid_sampler = MatekDataset( root=root, transforms=TransformsSimCLR(size=128).test_transform, test_size=args.test_size).get_dataset() elif args.dataset == "JURKAT": dataset, train_sampler, valid_sampler = JurkatDataset( root=root, transforms=TransformsSimCLR(size=64).test_transform, test_size=args.test_size).get_dataset() elif args.dataset == "PLASMODIUM": dataset, train_sampler, valid_sampler = PlasmodiumDataset( root=root, transforms=TransformsSimCLR(size=128).test_transform, test_size=args.test_size).get_dataset() else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( dataset, batch_size=args.logistic_batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) test_loader = torch.utils.data.DataLoader( dataset, batch_size=args.logistic_batch_size, shuffle=(valid_sampler is None), drop_last=True, num_workers=args.workers, sampler=valid_sampler, ) simclr_model, _, _ = load_model(args, train_loader, reload_model=True) simclr_model = simclr_model.to(args.device) simclr_model.eval() # Logistic Regression n_classes = args.n_classes # stl-10 model = LogisticRegression(simclr_model.n_features, n_classes) model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) criterion = torch.nn.CrossEntropyLoss() print("### Creating features from pre-trained context model ###") (train_X, train_y, test_X, test_y) = get_features(simclr_model, train_loader, test_loader, args.device) arr_train_loader, arr_test_loader = create_data_loaders_from_arrays( train_X, train_y, test_X, test_y, args.logistic_batch_size) for epoch in range(args.logistic_epochs): loss_epoch, accuracy_epoch = train(args, arr_train_loader, simclr_model, model, criterion, optimizer) print( f"Epoch [{epoch}/{args.logistic_epochs}]\t Loss: {loss_epoch / len(train_loader)}\t Accuracy: {accuracy_epoch / len(train_loader)}" ) # final testing loss_epoch, accuracy_epoch, report = test(args, arr_test_loader, simclr_model, model, criterion, optimizer) print( f"[FINAL]\t Loss: {loss_epoch / len(test_loader)}\t Accuracy: {accuracy_epoch / len(test_loader)}" ) print(report)
def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") args.n_gpu = torch.cuda.device_count() train_sampler = None if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( root=args.dataset_root, split="unlabeled", download=True, transform=TransformsSimCLR(size=96)) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( root=args.dataset_root, download=True, transform=TransformsSimCLR(size=32)) elif args.dataset == "MATEK": train_dataset, _ = MatekDataset( root=args.dataset_root, transforms=TransformsSimCLR(size=128)).get_dataset() elif args.dataset == "JURKAT": train_dataset, _ = JurkatDataset( root=args.dataset_root, transforms=TransformsSimCLR(size=64)).get_dataset() elif args.dataset == "PLASMODIUM": train_dataset, _ = PlasmodiumDataset( root=args.dataset_root, transforms=TransformsSimCLR(size=128)).get_dataset() else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) model, optimizer, scheduler = load_model(args, train_loader) print(f"Using {args.n_gpu}'s") if args.n_gpu > 1: model = torch.nn.DataParallel(model) model = convert_model(model) model = model.to(args.device) print(model) tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"]) os.makedirs(tb_dir) writer = SummaryWriter(log_dir=tb_dir) criterion = NT_Xent(args.batch_size, args.temperature, args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): lr = optimizer.param_groups[0]['lr'] loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) if scheduler: scheduler.step() if epoch % 10 == 0: save_model(args, model, optimizer) writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) writer.add_scalar("Misc/learning_rate", lr, epoch) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}" ) args.current_epoch += 1 save_model(args, model, optimizer)
def main(gpu, args): rank = args.nr * args.gpus + gpu dist.init_process_group("nccl", rank=rank, world_size=args.world_size) torch.manual_seed(0) torch.cuda.set_device(gpu) # dataset train_dataset = datasets.CIFAR10( args.dataset_dir, download=True, transform=TransformsSimCLR(size=args.image_size), # paper 224 ) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=args.world_size, rank=rank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, drop_last=True, num_workers=args.num_workers, pin_memory=True, sampler=train_sampler, ) # model if args.resnet_version == "resnet18": resnet = models.resnet18(pretrained=False) elif args.resnet_version == "resnet50": resnet = models.resnet50(pretrained=False) else: raise NotImplementedError("ResNet not implemented") model = BYOL(resnet, image_size=args.image_size, hidden_layer="avgpool") model = model.cuda(gpu) # distributed data parallel model = DDP(model, device_ids=[gpu], find_unused_parameters=True) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # TensorBoard writer if gpu == 0: writer = SummaryWriter() # solver global_step = 0 for epoch in range(args.num_epochs): metrics = defaultdict(list) for step, ((x_i, x_j), _) in enumerate(train_loader): x_i = x_i.cuda(non_blocking=True) x_j = x_j.cuda(non_blocking=True) loss = model(x_i, x_j) optimizer.zero_grad() loss.backward() optimizer.step() model.module.update_moving_average( ) # update moving average of target encoder if step % 1 == 0 and gpu == 0: print( f"Step [{step}/{len(train_loader)}]:\tLoss: {loss.item()}") if gpu == 0: writer.add_scalar("Loss/train_step", loss, global_step) metrics["Loss/train"].append(loss.item()) global_step += 1 if gpu == 0: # write metrics to TensorBoard for k, v in metrics.items(): writer.add_scalar(k, np.array(v).mean(), epoch) if epoch % args.checkpoint_epochs == 0: if gpu == 0: print(f"Saving model at epoch {epoch}") torch.save(resnet.state_dict(), f"./model-{epoch}.pt") # let other workers wait until model is finished # dist.barrier() # save your improved network if gpu == 0: torch.save(resnet.state_dict(), "./model-final.pt") cleanup()
def main(gpu, args): rank = args.nr * args.gpus + gpu if args.nodes > 1: dist.init_process_group("nccl", rank=rank, world_size=args.world_size) torch.cuda.set_device(gpu) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( args.dataset_dir, split="unlabeled", download=True, transform=TransformsSimCLR(size=args.image_size), ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( args.dataset_dir, download=True, transform=TransformsSimCLR(size=args.image_size), ) else: raise NotImplementedError if args.nodes > 1: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=args.world_size, rank=rank, shuffle=True) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) # initialize ResNet encoder = get_resnet(args.resnet, pretrained=False) n_features = encoder.fc.in_features # get dimensions of fc layer # initialize model model = SimCLR(args, encoder, n_features) if args.reload: model_fp = os.path.join(args.model_path, "checkpoint_{}.tar".format(args.epoch_num)) model.load_state_dict( torch.load(model_fp, map_location=args.device.type)) model = model.to(args.device) # optimizer / loss optimizer, scheduler = load_optimizer(args, model) criterion = NT_Xent(args.batch_size, args.temperature, args.device, args.world_size) # DDP / DP if args.dataparallel: model = convert_model(model) model = DataParallel(model) else: if args.nodes > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = DDP(model, device_ids=[gpu]) model = model.to(args.device) writer = None if args.nr == 0: writer = SummaryWriter() args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): lr = optimizer.param_groups[0]["lr"] loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) if args.nr == 0 and scheduler: scheduler.step() if args.nr == 0 and epoch % 10 == 0: save_model(args, model, optimizer) if args.nr == 0: writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) writer.add_scalar("Misc/learning_rate", lr, epoch) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}" ) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
def main(gpu, args): rank = args.nr * args.gpus + gpu if args.nodes > 1: dist.init_process_group("nccl", rank=rank, world_size=args.world_size) torch.cuda.set_device(gpu) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( args.dataset_dir, split="unlabeled", download=True, transform=TransformsSimCLR(size=args.image_size), ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( args.dataset_dir, download=True, transform=TransformsSimCLR(size=args.image_size), ) else: raise NotImplementedError if args.nodes > 1: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=args.world_size, rank=rank, shuffle=True) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) # initialize ResNet encoder = get_resnet(args.resnet, pretrained=False) n_features = encoder.fc.in_features # get dimensions of fc layer # initialize model model = SimCLR(args, encoder, n_features) if args.reload: model_fp = os.path.join(args.model_path, "checkpoint_{}.tar".format(args.epoch_num)) print(model_fp) model.load_state_dict( torch.load(model_fp, map_location=args.device.type)) model = model.to(args.device) # optimizer / loss optimizer, scheduler = load_optimizer(args, model) criterion = NT_Xent(args.batch_size, args.temperature, args.device, args.world_size) # DDP / DP if args.dataparallel: model = convert_model(model) model = DataParallel(model) else: if args.nodes > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = DDP(model, device_ids=[gpu]) model = model.to(args.device) writer = None if args.nr == 0: writer = SummaryWriter() #added by @IvanKruzhilov decoder = Decoder(3, 3, args.image_size) optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=0.001) #decoder.load_state_dict(torch.load('save/decoder_my_algorithm_augmented.pt')) decoder = decoder.to(args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): lr = optimizer.param_groups[0]["lr"] scatter_radius = 0.2 random_fake = None #set in train fucntion now loss_epoch, loss_epoch_decoder, penalty_epoch = \ train(args, train_loader, model, decoder, criterion, optimizer, \ optimizer_decoder, writer, random_fake, scatter_radius) loss_mean, bce_mean = train_autoencoder(model, decoder, train_loader, None, \ optimizer_decoder, freeze_encoder=True) if args.nr == 0 and scheduler: scheduler.step() if args.nr == 0 and epoch % 5 == 0: save_model(args, model, optimizer) torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder{0}.pt'.format(epoch))) if epoch % 10 == 0: decoder = Decoder(3, 3, args.image_size) optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=0.001) decoder = decoder.to(args.device) if args.nr == 0: writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) writer.add_scalar("Misc/learning_rate", lr, epoch) mean_loss = loss_epoch / len(train_loader) mean_loss_decoder = loss_epoch_decoder / len(train_loader) mean_penalty = penalty_epoch / len(train_loader) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {mean_loss}\t decoder loss: {mean_loss_decoder}\t \ penalty: {mean_penalty}\t lr: {round(lr, 5)}") print('loss: ', loss_mean, 'mse: ', bce_mean) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
# load pre-trained model from checkpoint simclr_model = SimCLR(args, encoder, n_features) model_fp = os.path.join(args.model_path, "model{}.tar".format(args.model_num)) simclr_model.load_state_dict( torch.load(model_fp, map_location=args.device.type)) simclr_model = simclr_model.to(args.device) simclr_model.eval() n_classes = 3 # covid, healthy, other patience = 20 # 5-fold cross validation merge_data = torchvision.datasets.ImageFolder( '/home/opticho/source/SimCLR/datasets/dataset2(1)/train', transform=TransformsSimCLR(size=(args.image_size, args.image_size)).test_transform) test_dataset = torchvision.datasets.ImageFolder( '/home/opticho/source/SimCLR/datasets/dataset2(1)/test', transform=TransformsSimCLR(size=(args.image_size, args.image_size)).test_transform) k_fold_split = KFoldSplit(5, merge_data, random_state=args.seed) test_loss, test_accuracy = [], [] models = [] for fold, (train_dataset, valid_dataset) in enumerate(k_fold_split): train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.logistic_batch_size, shuffle=True,
import numpy as np from data_helper import SimclrUnlabeledDataset from helper import convert_map_to_lane_map, convert_map_to_road_map, collate_fn, draw_box unlabeled_scene_index = np.arange(106) """### Load dataset into train loader""" root = "./datasets" train_sampler = None if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( root, split="unlabeled", download=True, transform=TransformsSimCLR() ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( root, download=True, transform=TransformsSimCLR() ) elif args.dataset == "road": train_dataset = SimclrUnlabeledDataset(image_folder=image_folder, scene_index=unlabeled_scene_index, first_dim='sample', transform=TransformsSimCLR()) else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),