def gen_training_accuracy(args): # load data and model params = utils.load_params(args.model_dir) ckpt_dir = os.path.join(args.model_dir, 'checkpoints') ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"] ckpt_paths = np.sort(ckpt_paths) # csv headers = ["epoch", "acc_train", "acc_test"] csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers) for epoch, ckpt_paths in enumerate(ckpt_paths): if epoch % 5 != 0: continue net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True) # load data train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=500, num_workers=4) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=500, num_workers=4) test_features, test_labels = tf.get_features(net, testloader, verbose=False) acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels) utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv') print("Finished generating accuracy.")
train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True, path=args.data_dir) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs']) new_labels = trainset.targets assert (trainset.num_classes % args.cpb == 0), "Number of classes not divisible by cpb" ## load model net, epoch = tf.load_checkpoint_ce(args.model_dir, trainset.num_classes, args.epoch, eval_=True, label_batch_id=args.label_batch) net = net.cuda().eval() classes = np.unique(trainset.targets) class_batch_num = trainset.num_classes // args.cpb class_batch_list = classes.reshape(class_batch_num, args.cpb) # get test features and labels test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) subtestset = tf.get_subset(class_batch_list[0, :], testset) testloader = DataLoader(subtestset, batch_size=200) test_features, test_labels = tf.get_features(net, testloader) calc_acc(test_features, test_labels)
net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True) net = net.cuda().eval() # get train features and labels train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True, path=args.data_dir) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(params['corrupt'])(trainset, params['lcr'], params['lcs']) new_labels = trainset.targets trainloader = DataLoader(trainset, batch_size=200) train_features, train_labels = tf.get_features(net, trainloader) # get test features and labels test_transforms = tf.load_transforms('test') testset = tf.load_trainset(params['data'], test_transforms, train=False) testloader = DataLoader(testset, batch_size=200) test_features, test_labels = tf.get_features(net, testloader) if args.svm: svm(args, train_features, train_labels, test_features, test_labels) if args.knn: knn(args, train_features, train_labels, test_features, test_labels) if args.nearsub: nearsub(args, train_features, train_labels, test_features, test_labels) if args.kmeans: kmeans(args, train_features, train_labels)
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Extract features from model and data') parser.add_argument('--model_dir', type=str, help='base directory for saving PyTorch model.') parser.add_argument('--epoch', type=int, default=None, help='which epoch for evaluation') parser.add_argument('--save_dir', type=str, default="./extractions/") parser.add_argument('--tail', type=str, default='', help='extra information to add to file name') args = parser.parse_args() params = utils.load_params(args.model_dir) net, epoch = tf.load_checkpoint(args.model_dir, args.epoch, eval_=True) train_transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], train_transforms, train=True) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) features, labels = tf.get_features(net, trainloader) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) np.save(os.path.join(args.save_dir, "features.npy"), features.cpu().detach().numpy()) np.save(os.path.join(args.save_dir, "labels.npy"), labels.numpy()) make_tarfile("./extractions.tgz", args.save_dir)
def plot_pca_epoch(args): """Plot PCA for different epochs in the same plot. """ EPOCHS = [0, 10, 100, 500] params = utils.load_params(args.model_dir) transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], transforms) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) sig_vals = [] for epoch in EPOCHS: epoch_ = epoch - 1 if epoch_ == -1: # randomly initialized net = tf.load_architectures(params['arch'], params['fd']) else: net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch_, eval_=True) features, labels = tf.get_features(net, trainloader) if args.class_ is not None: features_sort, _ = utils.sort_dataset( features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) features_ = features_sort[args.class_] else: features_ = features.numpy() n_comp = np.min([args.comp, features.shape[1]]) pca = PCA(n_components=n_comp).fit(features_) sig_vals.append(pca.singular_values_) ## plot singular values plt.rc('text', usetex=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=400) x_min = np.min([len(sig_val) for sig_val in sig_vals]) if args.class_ is not None: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 40, 9)) ax.set_ylim(0, 40) else: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 80, 9)) ax.set_ylim(0, 90) for epoch, sig_val in zip(EPOCHS, sig_vals): ax.plot(np.arange(x_min), sig_val[:x_min], marker='', markersize=5, label=f'epoch - {epoch}', alpha=0.6) ax.legend(loc='upper right', frameon=True, fancybox=True, prop={"size": 8}, ncol=1, framealpha=0.5) ax.set_xlabel("components") ax.set_ylabel("sigular values") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()] ax.grid(True, color='white') ax.set_facecolor('whitesmoke') fig.tight_layout() ## save save_dir = os.path.join(args.model_dir, 'figures', 'pca') np.save(os.path.join(save_dir, "sig_vals_epoch.npy"), sig_vals) if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"pca_class{args.class_}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"pca_class{args.class_}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
for j, target_ds_name in enumerate(dataset_list): stats_dict = {} # get train features and labels train_transforms = tf.load_transforms('transfer') #('test') trainset = tf.load_trainset(target_ds_name, train_transforms, train=True, path=args.data_dir) if 'lcr' in params.keys(): # supervised corruption case trainset = tf.corrupt_labels(trainset, params['lcr'], params['lcs']) new_labels = trainset.targets trainloader = DataLoader(trainset, batch_size=200) print("Target task on: {}".format(target_ds_name)) train_features, train_labels = tf.get_features(net, trainloader, verbose=False) # get test features and labels test_transforms = tf.load_transforms('transfer') #('test') testset = tf.load_trainset(target_ds_name, test_transforms, train=False, path=args.data_dir) testloader = DataLoader(testset, batch_size=200) test_features, test_labels = tf.get_features(net, testloader, verbose=False) trainloader_for_mcr = DataLoader(trainset, batch_size=1000,