def gen_training_accuracy(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "acc_train", "acc_test"] csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers) for epoch, ckpt_paths in enumerate(ckpt_paths): if epoch % 5 != 0: continue net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) # load data train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=500, num_workers=4) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=500, num_workers=4) test_features, test_labels = tf.get_features(net, testloader, verbose=False) acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels) utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv') print("Finished generating accuracy.")
def gen_testloss(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e", "discrimn_loss_t", "compress_loss_t"] csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers) print('writing to:', csv_path) # load data test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=params['bs'], shuffle=False, num_workers=4) # save loss criterion = MaximalCodingRateReduction(gam1=params['gam1'], gam2=params['gam2'], eps=params['eps']) for epoch, ckpt_path in enumerate(ckpt_paths): net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) for step, (batch_imgs, batch_lbls) in enumerate(testloader): features = net(batch_imgs.cuda()) loss, loss_empi, loss_theo = criterion(features, batch_lbls, num_classes=len(testset.num_classes)) utils.save_state(args.model_dir, epoch, step, loss.item(), *loss_empi, *loss_theo, filename='losses_test.csv') print("Finished generating test loss.")
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Extract features from model and data') parser.add_argument('--model_dir', type=str, help='base directory for saving PyTorch model.') parser.add_argument('--epoch', type=int, default=None, help='which epoch for evaluation') parser.add_argument('--save_dir', type=str, default="./extractions/") parser.add_argument('--tail', type=str, default='', help='extra information to add to file name') args = parser.parse_args() params = utils.load_params(args.model_dir) net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True) train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) features, labels = tf.get_features(net, trainloader) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) np.save(os.path.join(args.save_dir, "features.npy"), features.cpu().detach().numpy()) np.save(os.path.join(args.save_dir, "labels.npy"), labels.numpy()) make_tarfile("./extractions.tgz", args.save_dir)
default=10, help='number of classes in each learning batch (default: 10)') parser.add_argument('--save', action='store_true', help='save labels') parser.add_argument('--data_dir', default='./data/', help='path to dataset') args = parser.parse_args() print("evaluate using label_batch: {}".format(args.label_batch)) params = utils.load_params(args.model_dir) # get train features and labels train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True, path=args.data_dir) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs']) new_labels = trainset.targets assert (trainset.num_classes % args.cpb == 0), "Number of classes not divisible by cpb" ## load model net, epoch = tf.load_checkpoint_ce(args.model_dir, trainset.num_classes, args.epoch, eval_=True, label_batch_id=args.label_batch) net = net.cuda().eval() classes = np.unique(trainset.targets)
model_dir = os.path.join( args.save_dir, 'selfsup_{}+{}_{}_epo{}_bs{}_aug{}+{}_lr{}_mom{}_wd{}_gam1{}_gam2{}_eps{}{}' .format(args.arch, args.fd, args.data, args.epo, args.bs, args.aug, args.transform, args.lr, args.mom, args.wd, args.gam1, args.gam2, args.eps, args.tail)) utils.init_pipeline(model_dir) ## Prepare for Training if args.pretrain_dir is not None: net, _ = tf.load_checkpoint(args.pretrain_dir, args.pretrain_epo) utils.update_params(model_dir, args.pretrain_dir) else: net = tf.load_architectures(args.arch, args.fd) transforms = tf.load_transforms(args.transform) trainset = tf.load_trainset(args.data, path=args.data_dir) trainloader = AugmentLoader(trainset, transforms=transforms, sampler=args.sampler, batch_size=args.bs, num_aug=args.aug) criterion = MaximalCodingRateReduction(gam1=args.gam1, gam2=args.gam2, eps=args.eps) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.wd) scheduler = lr_scheduler.MultiStepLR(optimizer, [30, 60], gamma=0.1) utils.save_params(model_dir, vars(args))
## per model functions def lr_schedule(epoch, optimizer): """decrease the learning rate""" lr = args.lr if epoch >= 400: lr = args.lr * 0.01 elif epoch >= 200: lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = lr ## Prepare for Training transforms = tf.load_transforms(args.transform) trainset = tf.load_trainset(args.data, transforms, path=args.data_dir) #trainset = tf.corrupt_labels(trainset, args.lcr, args.lcs) if args.pretrain_dir is not None: net, _ = tf.load_checkpoint(args.pretrain_dir, args.pretrain_epo) utils.update_params(model_dir, args.pretrain_dir) else: net = tf.load_architectures_ce(args.arch, trainset.num_classes) assert (trainset.num_classes % args.cpb == 0), "Number of classes not divisible by cpb" classes = np.unique(trainset.targets) class_batch_num = trainset.num_classes // args.cpb class_batch_list = classes.reshape(class_batch_num, args.cpb) #trainloader = DataLoader(trainset, batch_size=args.bs, drop_last=True, num_workers=4) criterion = nn.CrossEntropyLoss() optimizer = SGD(net.parameters(),
elif epoch >= 200: lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = lr ## Prepare for Training if args.pretrain_dir is not None: pretrain_model_dir = os.path.join(args.pretrain_dir, 'sup_expert_resnet18+128_{}_epo200_bs1000_lr0.001_mom0.9_wd0.0005_gam11.0_gam21.0_eps0.5_lcr0.0'.format(source_name)) net, _ = tf.load_checkpoint(pretrain_model_dir, args.pretrain_epo) utils.update_params(model_dir, pretrain_model_dir) else: net = tf.load_architectures(args.arch, args.fd) transforms = tf.load_transforms(args.transform) trainset = tf.load_trainset(ds_name, transforms, path=args.data_dir) print("Number of classes in {} is: {}".format(ds_name,trainset.num_classes)) trainset = tf.corrupt_labels(trainset, args.lcr, args.lcs) trainloader = DataLoader(trainset, batch_size=args.bs, drop_last=True, num_workers=4) criterion = MaximalCodingRateReduction(gam1=args.gam1, gam2=args.gam2, eps=args.eps) optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.wd) ## Training for epoch in range(args.epo): lr_schedule(epoch, optimizer) for step, (batch_imgs, batch_lbls) in enumerate(trainloader): features = net(batch_imgs.cuda()) loss, loss_empi, loss_theo = criterion(features, batch_lbls, num_classes=trainset.num_classes) optimizer.zero_grad() loss.backward()
def plot_pca_epoch(args): """Plot PCA for different epochs in the same plot. """ EPOCHS = [0, 10, 100, 500] params = utils.load_params(args.model_dir) transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], transforms) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) sig_vals = [] for epoch in EPOCHS: epoch_ = epoch - 1 if epoch_ == -1: # randomly initialized net = tf.load_architectures(params['arch'], params['fd']) else: net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch_, eval_=True) features, labels = tf.get_features(net, trainloader) if args.class_ is not None: features_sort, _ = utils.sort_dataset( features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) features_ = features_sort[args.class_] else: features_ = features.numpy() n_comp = np.min([args.comp, features.shape[1]]) pca = PCA(n_components=n_comp).fit(features_) sig_vals.append(pca.singular_values_) ## plot singular values plt.rc('text', usetex=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=400) x_min = np.min([len(sig_val) for sig_val in sig_vals]) if args.class_ is not None: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 40, 9)) ax.set_ylim(0, 40) else: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 80, 9)) ax.set_ylim(0, 90) for epoch, sig_val in zip(EPOCHS, sig_vals): ax.plot(np.arange(x_min), sig_val[:x_min], marker='', markersize=5, label=f'epoch - {epoch}', alpha=0.6) ax.legend(loc='upper right', frameon=True, fancybox=True, prop={"size": 8}, ncol=1, framealpha=0.5) ax.set_xlabel("components") ax.set_ylabel("sigular values") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()] ax.grid(True, color='white') ax.set_facecolor('whitesmoke') fig.tight_layout() ## save save_dir = os.path.join(args.model_dir, 'figures', 'pca') np.save(os.path.join(save_dir, "sig_vals_epoch.npy"), sig_vals) if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"pca_class{args.class_}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"pca_class{args.class_}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
path = os.path.join(args.model_dir, 'losses_test.csv') if not os.path.exists(path): gen_testloss(args) plot_traintest(args, path) if args.acc: path = os.path.join(args.model_dir, 'accuracy.csv') if not os.path.exists(path): gen_training_accuracy(args) plot_accuracy(args, path) if args.pca or args.hist or args.heat or args.nearcomp_sup or args.nearcomp_unsup or args.nearcomp_class: ## load data and model params = utils.load_params(args.model_dir) net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True) transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], transforms) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs']) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) features, labels = tf.get_features(net, trainloader) if args.pca: plot_pca(args, features, labels, epoch) if args.nearcomp_sup: plot_nearest_component_supervised(args, features, labels, epoch, trainset) if args.nearcomp_unsup: plot_nearest_component_unsupervised(args, features, labels, epoch, trainset) if args.nearcomp_class:
# Prepare data loaders transform_train = transforms.Compose([ transforms.Resize(72), transforms.RandomCrop(64), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.Resize(72), transforms.CenterCrop(64), transforms.ToTensor(), ]) trainset = tf.load_trainset(args.dataset[0], transform_train, train=True, path=args.datadir) testset = tf.load_trainset(args.dataset[0], transform_test, train=False, path=args.datadir) trainloader = DataLoader(trainset, batch_size=128) testloader = DataLoader(testset, batch_size=100) train_loaders, val_loaders = [trainloader], [testloader] args.num_classes = [trainset.num_classes] # Load checkpoint and initialize the networks with the weights of a pretrained network print('==> Resuming from checkpoint..') net_old, _ = tf.load_checkpoint(args.source, None, eval_=True)
print("Use pretrained network on: {}".format(source_ds_name)) pretrained_model_dir = args.model_dir.split( "{}")[0] + source_ds_name + args.model_dir.split("{}")[1] ## load model params = utils.load_params(pretrained_model_dir) net, epoch = tf.load_checkpoint(pretrained_model_dir, args.epoch, eval_=True) net = net.cuda().eval() for j, target_ds_name in enumerate(dataset_list): stats_dict = {} # get train features and labels train_transforms = tf.load_transforms('transfer') #('test') trainset = tf.load_trainset(target_ds_name, train_transforms, train=True, path=args.data_dir) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs']) new_labels = trainset.targets trainloader = DataLoader(trainset, batch_size=200) print("Target task on: {}".format(target_ds_name)) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) # get test features and labels test_transforms = tf.load_transforms('transfer') #('test') testset = tf.load_trainset(target_ds_name, test_transforms,