def convert_vectors_for_contrastive( cfg: OmegaConf, data_loader, model: ContrastiveModel, device: torch.device ): """ Convert experiment to feature representations. :param cfg: Hydra's config instance :param data_loader: Tata loader for raw experiment. :param model: Pre-trained instance :param device: PyTorch's device instance :return: Tuple of numpy array and labels. """ model.eval() new_X = [] new_y = [] with torch.no_grad(): for (view0, _), y_batches in data_loader: if cfg["parameter"]["use_full_encoder"]: fs = model(view0.to(device)) else: fs = model.encode(view0.to(device)) new_X.append(fs) new_y.append(y_batches) X = torch.cat(new_X).cpu() y = torch.cat(new_y).cpu() return X.numpy(), y.numpy()
def main(cfg: OmegaConf): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) stream_handler.terminator = "" logger.addHandler(stream_handler) init_ddp(cfg) check_hydra_conf(cfg) seed = cfg["parameter"]["seed"] np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False rank = cfg["distributed"]["local_rank"] logger.info("Using {}".format(rank)) transform = SimCLRTransforms(strength=cfg["experiment"]["strength"]) root = "~/pytorch_datasets" if cfg["experiment"]["name"] == "cifar10": training_dataset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform) elif cfg["experiment"]["name"] == "cifar100": training_dataset = torchvision.datasets.CIFAR100(root=root, train=True, download=True, transform=transform) else: assert cfg["experiment"]["name"] in {"cifar10", "cifar100"} sampler = torch.utils.data.distributed.DistributedSampler(training_dataset, shuffle=True) training_data_loader = DataLoader( dataset=training_dataset, sampler=sampler, num_workers=cfg["parameter"]["num_workers"], batch_size=cfg["experiment"]["batches"], pin_memory=True, drop_last=True, ) model = ContrastiveModel(base_cnn=cfg["experiment"]["base_cnn"], d=cfg["parameter"]["d"]) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(rank) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) train(cfg, training_data_loader, model)
def main(cfg: OmegaConf): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) stream_handler.terminator = "" logger.addHandler(stream_handler) check_hydra_conf(cfg) seed = cfg["parameter"]["seed"] torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False use_cuda = cfg["parameter"]["use_cuda"] and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") logger.info("Using {}".format(device)) for weights_path in Path(cfg["experiment"]["target_dir"]).glob("*.pt"): key = str(weights_path).split("/")[-1] logger.info("Save features extracted by using {}".format(key)) model = ContrastiveModel(base_cnn=cfg["experiment"]["base_cnn"], d=cfg["parameter"]["d"]).to(device) # load weights trained on self-supervised task if use_cuda: model.load_state_dict(torch.load(weights_path)) else: model.load_state_dict(torch.load(weights_path, map_location=device)) # no data-augmentation training_data_loader, val_data_loader = get_data_loaders(cfg, False) X_train, y_train = convert_vectors(cfg, training_data_loader, model, device) X_val, y_val = convert_vectors(cfg, val_data_loader, model, device) fname = "{}.feature.train.npy".format(key) np.save(fname, X_train) fname = "{}.label.train.npy".format(key) np.save(fname, y_train) fname = "{}.feature.val.npy".format(key) np.save(fname, X_val) fname = "{}.label.val.npy".format(key) np.save(fname, y_val) # average of data augmentation size_of_iterations = (1, 5, 20) training_data_loader, val_data_loader = get_data_loaders(cfg, True) X_trains = [] X_vals = [] for t in range(1, size_of_iterations[-1]+1): X_trains.append(convert_vectors_for_contrastive(cfg, training_data_loader, model, device)[0]) X_vals.append(convert_vectors_for_contrastive(cfg, val_data_loader, model, device)[0]) if t in size_of_iterations: fname = "{}.aug-{}.feature.train.npy".format(key, t) np.save(fname, np.array(X_trains).mean(axis=0)) fname = "{}.aug-{}.feature.val.npy".format(key, t) np.save(fname, np.array(X_vals).mean(axis=0))
from torchvision.models import resnet18 from model import ContrastiveModel, get_backbone from dataset_loader import get_clf_train_test_transform from dataset_loader import get_clf_train_test_dataloaders if torch.cuda.is_available(): dtype = torch.cuda.FloatTensor device = torch.device("cuda") # torch.cuda.set_device(device_id) print('GPU') else: dtype = torch.FloatTensor device = torch.device("cpu") backbone = get_backbone(resnet18(pretrained=False)) model = ContrastiveModel(backbone).to(device) obj = torch.load( "/home/octo/Desktop/clr/ckpt/0417151425/SimCLR_0417175433.pth") model.load_state_dict(obj['state_dict']) encoder = model.backbone last_layers = torch.nn.Sequential( *(list(model.projectionhead.children())[0:2])) encoder = nn.Sequential(encoder, last_layers) new_model = FineTunedModel(encoder, model.output_dim).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(new_model.parameters(),
def train( cfg: OmegaConf, training_data_loader: torch.utils.data.DataLoader, model: ContrastiveModel, ) -> None: """ Training function :param cfg: Hydra's config instance :param training_data_loader: Training data loader for contrastive learning :param model: Contrastive model based on resnet :return: None """ local_rank = cfg["distributed"]["local_rank"] num_gpus = cfg["distributed"]["world_size"] epochs = cfg["parameter"]["epochs"] num_training_samples = len(training_data_loader.dataset.data) steps_per_epoch = int( num_training_samples / (cfg["experiment"]["batches"] * num_gpus)) # because the drop=True total_steps = cfg["parameter"]["epochs"] * steps_per_epoch warmup_steps = cfg["parameter"]["warmup_epochs"] * steps_per_epoch current_step = 0 model.train() nt_cross_entropy_loss = NT_Xent( temperature=cfg["parameter"]["temperature"], device=local_rank) optimizer = torch.optim.SGD(params=exclude_from_wt_decay( model.named_parameters(), weight_decay=cfg["experiment"]["decay"]), lr=calculate_initial_lr(cfg), momentum=cfg["parameter"]["momentum"], nesterov=False, weight_decay=0.) # https://github.com/google-research/simclr/blob/master/lars_optimizer.py#L26 optimizer = LARC(optimizer=optimizer, trust_coefficient=0.001, clip=False) cos_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer.optim, T_max=total_steps - warmup_steps, ) for epoch in range(1, epochs + 1): training_data_loader.sampler.set_epoch(epoch) for (view0, view1), _ in training_data_loader: # adjust learning rate by applying linear warming if current_step <= warmup_steps: lr = calculate_lr(cfg, warmup_steps, current_step) for param_group in optimizer.param_groups: param_group["lr"] = lr optimizer.zero_grad() z0 = model(view0.to(local_rank)) z1 = model(view1.to(local_rank)) loss = nt_cross_entropy_loss(z0, z1) loss.backward() optimizer.step() # adjust learning rate by applying cosine annealing if current_step > warmup_steps: cos_lr_scheduler.step() current_step += 1 if local_rank == 0: logging.info( "Epoch:{}/{} progress:{:.3f} loss:{:.3f}, lr:{:.7f}".format( epoch, epochs, epoch / epochs, loss.item(), optimizer.param_groups[0]["lr"])) if epoch % cfg["experiment"]["save_model_epoch"] == 0: save_fname = "epoch={}-{}".format( epoch, cfg["experiment"]["output_model_name"]) torch.save(model.state_dict(), save_fname)
log_dir = "runs/" + str(datetime.now().strftime('%m%d%H%M%S')) #create dataset folder if not os.path.exists(data_dir): os.makedirs(data_dir) # Setup asset directories if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) logger = Logger(log_dir=log_dir, tensorboard=True, matplotlib=True) backbone = get_backbone(resnet18(pretrained=False)) model = ContrastiveModel(backbone).to(device) loss_func = ContrastiveLoss().to(device) #hyperparams features = 128 batch_size = batch = 2048 epochs = 25 #use num_epochs if you have time and resources to train. Else, for POC, 25 epochs should yield a decreasing loss. lr = 1e-4 device_id = 0 weight_decay = 1.e-6 image_size = (32, 32) momentum = 0.9 warmup_epochs = 10 warmup_lr = 0
def main(cfg: OmegaConf): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) stream_handler.terminator = "" logger.addHandler(stream_handler) check_hydra_conf(cfg) seed = cfg["parameter"]["seed"] torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False use_cuda = cfg["parameter"]["use_cuda"] and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") logger.info("Using {}".format(device)) transform = transforms.Compose([ transforms.ToTensor(), ]) root = "~/pytorch_datasets" if cfg["experiment"]["name"] == "cifar10": training_dataset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform) val_dataset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform) num_classes = 10 elif cfg["experiment"]["name"] == "cifar100": training_dataset = torchvision.datasets.CIFAR100(root=root, train=True, download=True, transform=transform) val_dataset = torchvision.datasets.CIFAR100(root=root, train=False, download=True, transform=transform) num_classes = 100 else: assert cfg["experiment"]["name"] in {"cifar10", "cifar100"} training_data_loader = DataLoader( dataset=training_dataset, batch_size=cfg["experiment"]["batches"], shuffle=True, ) val_data_loader = DataLoader( dataset=val_dataset, batch_size=cfg["experiment"]["batches"], shuffle=False, ) classification_results = {} top_k = cfg["parameter"]["top_k"] for weights_path in Path(cfg["experiment"]["target_dir"]).glob("*.pt"): key = str(weights_path).split("/")[-1] logger.info("Evaluation by using {}".format(key)) model = ContrastiveModel(base_cnn=cfg["experiment"]["base_cnn"], d=cfg["parameter"]["d"]) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = model.to(device) state_dict = torch.load(weights_path) state_dict = { k.replace("module.", ""): v for k, v in state_dict.items() } # load weights trained on self-supervised task if use_cuda: model.load_state_dict(state_dict, strict=False) else: model.load_state_dict(state_dict, strict=False, map_location=device) downstream_training_dataset = DownstreamDataset( *convert_vectors(cfg, training_data_loader, model, device)) downstream_val_dataset = DownstreamDataset( *convert_vectors(cfg, val_data_loader, model, device)) downstream_training_data_loader = DataLoader( dataset=downstream_training_dataset, batch_size=cfg["experiment"]["batches"], shuffle=True, ) downstream_val_data_loader = DataLoader( dataset=downstream_val_dataset, batch_size=cfg["experiment"]["batches"], shuffle=False, ) if cfg["parameter"]["classifier"] == "centroid": classifier = CentroidClassifier( weights=CentroidClassifier.create_weights( downstream_training_dataset, num_classes=num_classes).to( device)) train_acc, train_top_k_acc = centroid_eval( downstream_training_data_loader, device, classifier, top_k) val_acc, val_top_k_acc = centroid_eval(downstream_val_data_loader, device, classifier, top_k) classification_results[key] = { "train_acc": train_acc, "train_top_{}_acc".format(top_k): train_top_k_acc, "val_acc": val_acc, "val_top_{}_acc".format(top_k): val_top_k_acc } logger.info("train acc: {}, val acc: {}".format( train_acc, val_acc)) else: if cfg["parameter"]["use_full_encoder"]: num_last_units = model.g[-1].out_features else: num_last_units = model.g[0].in_features if cfg["parameter"]["classifier"] == "linear": classifier = LinearClassifier(num_last_units, num_classes).to(device) elif cfg["parameter"]["classifier"].replace("-", "") == "nonlinear": classifier = NonLinearClassifier(num_last_units, num_classes).to(device) train_accuracies, train_top_k_accuracies, train_losses, val_accuracies, val_top_k_accuracies, val_losses = \ learnable_eval(cfg, classifier, downstream_training_data_loader, downstream_val_data_loader, device) classification_results[key] = { "train_accuracies": train_accuracies, "val_accuracies": val_accuracies, "train_losses": train_losses, "val_losses": val_losses, "train_top_{}_accuracies".format(top_k): train_top_k_accuracies, "val_top_{}_accuracies".format(top_k): val_top_k_accuracies, "lowest_val_loss": min(val_losses), "highest_val_acc": max(val_accuracies), "highest_val_top_k_acc": max(val_top_k_accuracies) } logger.info("train acc: {}, val acc: {}".format( max(train_accuracies), max(val_accuracies))) fname = cfg["parameter"]["classification_results_json_fname"] with open(fname, "w") as f: json.dump(classification_results, f)