def plot_sample_angle_combined(train_features, train_labels, test_features, test_labels, model_dir, title1, title2, tail=""): save_dir = os.path.join(model_dir, "figures", "sample_angle_combined") os.makedirs(save_dir, exist_ok=True) colors = ['blue', 'red', 'green'] _bins = np.linspace(-0.05, 1.05, 21) classes = np.unique(y_train) fs_train, _ = utils.sort_dataset(train_features, train_labels, classes=classes, stack=False) fs_test, _ = utils.sort_dataset(test_features, test_labels, classes=classes, stack=False) angles = [] for class_train in classes: for class_test in classes: if class_train == class_test: continue angles.append( (fs_train[class_train] @ fs_test[class_test].T).reshape(-1)) # plt.rc('text', usetex=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(figsize=(7, 5)) ax.hist( np.hstack(angles), bins=_bins, alpha=0.5, color='red', #colors[class_test], edgecolor='black') #, label=f'Class {class_test}') ax.set_xlabel('Similarity', fontsize=38) ax.set_ylabel('Count', fontsize=38) ax.ticklabel_format(style='sci', scilimits=(0, 3)) [tick.label.set_fontsize(22) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(22) for tick in ax.yaxis.get_major_ticks()] # ax.legend(loc='upper center', prop={"size": 13}, ncol=1, framealpha=0.5) fig.tight_layout() fig.savefig( os.path.join(save_dir, f'sample_angle_combined-{title1}-vs-{title2}{tail}.pdf')) plt.close()
def plot_nearest_component_class(args, features, labels, epoch, trainset): """Find corresponding images to the nearests component per class. """ features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) data_sort, _ = utils.sort_dataset(trainset.data, labels.numpy(), num_classes=trainset.num_classes, stack=False) for class_ in range(trainset.num_classes): nearest_data = [] nearest_val = [] pca = TruncatedSVD(n_components=10, random_state=10).fit(features_sort[class_]) for j in range(8): proj = features_sort[class_] @ pca.components_.T[:, j] img_idx = np.argsort(np.abs(proj), axis=0)[::-1][:10] nearest_val.append(proj[img_idx]) nearest_data.append(np.array(data_sort[class_])[img_idx]) fig, ax = plt.subplots(ncols=10, nrows=8, figsize=(10, 10)) for r in range(8): for c in range(10): ax[r, c].imshow(nearest_data[r][c]) ax[r, c].set_xticks([]) ax[r, c].set_yticks([]) ax[r, c].spines['top'].set_visible(False) ax[r, c].spines['right'].set_visible(False) ax[r, c].spines['bottom'].set_linewidth(False) ax[r, c].spines['left'].set_linewidth(False) ax[r, c].set_xlabel(f"proj: {nearest_val[r][c]:.2f}") if c == 0: ax[r, c].set_ylabel(f"comp {r}") fig.tight_layout() ## save save_dir = os.path.join(args.model_dir, 'figures', 'nearcomp_class') if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"nearest_class{class_}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"nearest_class{class_}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def nearsub(train_features, train_labels, test_features, test_labels, num_classes, n_comp=10, return_pred=False): train_scores, test_scores = [], [] classes = np.arange(num_classes) features_sort, _ = utils.sort_dataset(train_features, train_labels, classes=classes, stack=False) fd = features_sort[0].shape[1] for j in classes: _, _, V = torch.svd(features_sort[j]) components = V[:, :n_comp].T subspace_j = torch.eye(fd) - components.T @ components train_j = subspace_j @ train_features.T test_j = subspace_j @ test_features.T train_scores_j = torch.linalg.norm(train_j, ord=2, axis=0) test_scores_j = torch.linalg.norm(test_j, ord=2, axis=0) train_scores.append(train_scores_j) test_scores.append(test_scores_j) train_pred = torch.stack(train_scores).argmin(0) test_pred = torch.stack(test_scores).argmin(0) if return_pred: return train_pred.numpy(), test_pred.numpy() train_acc = compute_accuracy(classes[train_pred], train_labels.numpy()) test_acc = compute_accuracy(classes[test_pred], test_labels.numpy()) print('SVD: {}, {}'.format(train_acc, test_acc)) return train_acc, test_acc
def plot_heatmap(model_dir, name, features, labels, num_classes): """Plot heatmap of cosine simliarity for all features. """ features_sort, _ = utils.sort_dataset(features, labels, classes=num_classes, stack=False) features_sort_ = np.vstack(features_sort) sim_mat = np.abs(features_sort_ @ features_sort_.T) # plt.rc('text', usetex=False) # plt.rcParams['font.family'] = 'serif' # plt.rcParams['font.serif'] = ['Times New Roman'] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(figsize=(7, 5), sharey=True, sharex=True) im = ax.imshow(sim_mat, cmap='Blues') fig.colorbar(im, pad=0.02, drawedges=0, ticks=[0, 0.5, 1]) ax.set_xticks(np.linspace(0, len(labels), num_classes + 1)) ax.set_yticks(np.linspace(0, len(labels), num_classes + 1)) [tick.label.set_fontsize(10) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(10) for tick in ax.yaxis.get_major_ticks()] fig.tight_layout() save_dir = os.path.join(model_dir, 'figures', 'heatmaps') os.makedirs(save_dir, exist_ok=True) file_name = os.path.join(save_dir, f"{name}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def nearsub(args, train_features, train_labels, test_features, test_labels): """Perform nearest subspace classification. Options: n_comp (int): number of components for PCA or SVD """ scores_pca = [] scores_svd = [] num_classes = train_labels.numpy().max() + 1 # should be correct most of the time features_sort, _ = utils.sort_dataset(train_features.numpy(), train_labels.numpy(), num_classes=num_classes, stack=False) for j in range(num_classes): pca = PCA(n_components=args.n_comp).fit(features_sort[j]) pca_subspace = pca.components_.T mean = np.mean(features_sort[j], axis=0) pca_j = (np.eye(params["fd"]) - pca_subspace @ pca_subspace.T) \ @ (test_features.numpy() - mean).T score_pca_j = np.linalg.norm(pca_j, ord=2, axis=0) svd = TruncatedSVD(n_components=args.n_comp).fit(features_sort[j]) svd_subspace = svd.components_.T svd_j = (np.eye(params["fd"]) - svd_subspace @ svd_subspace.T) \ @ (test_features.numpy()).T score_svd_j = np.linalg.norm(svd_j, ord=2, axis=0) scores_pca.append(score_pca_j) scores_svd.append(score_svd_j) test_predict_pca = np.argmin(scores_pca, axis=0) test_predict_svd = np.argmin(scores_svd, axis=0) acc_pca = utils.compute_accuracy(test_predict_pca, test_labels.numpy()) acc_svd = utils.compute_accuracy(test_predict_svd, test_labels.numpy()) print('PCA: {}'.format(acc_pca)) print('SVD: {}'.format(acc_svd)) return acc_pca
def plot_heatmap(features, labels, title, model_dir): """Plot heatmap of cosine simliarity for all features. """ num_samples = features.shape[0] classes = np.arange(np.unique(labels).size) features_sort_, _ = utils.sort_dataset(features, labels, classes=classes, stack=True) sim_mat = np.abs(features_sort_ @ features_sort_.T) print(sim_mat.min(), sim_mat.max()) # plt.rc('text', usetex=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(figsize=(8, 7), sharey=True, sharex=True) im = ax.imshow(sim_mat, cmap='Blues') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.1) cbar = fig.colorbar(im, cax=cax, drawedges=0, ticks=[0, 0.5, 1]) cbar.ax.tick_params(labelsize=18) # fig.colorbar(im, pad=0.02, drawedges=0, ticks=[0, 0.5, 1]) ax.set_xticks(np.linspace(0, num_samples, len(classes) + 1)) ax.set_yticks(np.linspace(0, num_samples, len(classes) + 1)) [tick.label.set_fontsize(24) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(24) for tick in ax.yaxis.get_major_ticks()] fig.tight_layout() save_dir = os.path.join(model_dir, "figures", "heatmaps") os.makedirs(save_dir, exist_ok=True) plt.savefig(os.path.join(save_dir, f"heatmap-{title}.pdf")) plt.close()
def nearsub(train_features, train_labels, test_features, test_labels, n_comp=10): """Perform nearest subspace classification. Options: n_comp (int): number of components for PCA or SVD """ scores_svd = [] classes = np.unique(test_labels) features_sort, _ = utils.sort_dataset(train_features, train_labels, classes=classes, stack=False) fd = features_sort[0].shape[1] if n_comp >= fd: n_comp = fd - 1 for j in np.arange(len(classes)): svd = TruncatedSVD(n_components=n_comp).fit(features_sort[j]) svd_subspace = svd.components_.T svd_j = (np.eye(fd) - svd_subspace @ svd_subspace.T) \ @ (test_features).T score_svd_j = np.linalg.norm(svd_j, ord=2, axis=0) scores_svd.append(score_svd_j) test_predict_svd = np.argmin(scores_svd, axis=0) acc_svd = compute_accuracy(classes[test_predict_svd], test_labels) print('SVD: {}'.format(acc_svd)) return acc_svd
def nearsub_pca(train_features, train_labels, test_features, test_labels, n_comp=10): """Perform nearest subspace classification. Options: n_comp (int): number of components for PCA or SVD """ scores_pca = [] classes = np.unique(test_labels) features_sort, _ = utils.sort_dataset(train_features, train_labels, classes=classes, stack=False) fd = features_sort[0].shape[1] if n_comp >= fd: n_comp = fd - 1 for j in np.arange(len(classes)): pca = PCA(n_components=n_comp).fit(features_sort[j]) pca_subspace = pca.components_.T mean = np.mean(features_sort[j], axis=0) pca_j = (np.eye(fd) - pca_subspace @ pca_subspace.T) \ @ (test_features - mean).T score_pca_j = np.linalg.norm(pca_j, ord=2, axis=0) scores_pca.append(score_pca_j) test_predict_pca = np.argmin(scores_pca, axis=0) acc_pca = compute_accuracy(classes[test_predict_pca], test_labels) print('PCA: {}'.format(acc_pca)) return acc_svd
def get_dataset(self): dataset = open(self.file_path,encoding='utf-8').readlines() new_dataset = [] sent = [] label_counts = Counter() root = [START_TAG, START_TAG] for line in dataset: if len(line.strip())==0: if len(sent)>0: sent.append([END_TAG, END_TAG ]) if len(sent)>2: new_dataset.append([root]+sent) #new_dataset.append(sent) sent = [] else: row = line.rstrip().split() row[0] = row[0].replace("\ufeff","") sent.append(row) label_counts.update([row[-1]]) if len(sent)>0: sent.append([END_TAG, END_TAG ]) new_dataset.append([root]+sent) #new_dataset.append(sent) print("Number of sentences : {} ".format(len(new_dataset))) #print(new_dataset) new_dataset, orig_idx = sort_dataset(new_dataset, sort = True) return new_dataset, orig_idx, label_counts
def plot_hist(args, features, labels, epoch): """Plot histogram of class vs. class. """ ## create save folder hist_folder = os.path.join(args.model_dir, 'figures', 'hist') if not os.path.exists(hist_folder): os.makedirs(hist_folder) num_classes = labels.numpy().max() + 1 features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=num_classes, stack=False) for i in range(num_classes): for j in range(i, num_classes): fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7, 5), dpi=250) if i == j: sim_mat = features_sort[i] @ features_sort[j].T sim_mat = sim_mat[np.triu_indices(sim_mat.shape[0], k=1)] else: sim_mat = (features_sort[i] @ features_sort[j].T).reshape(-1) ax.hist(sim_mat, bins=40, color='red', alpha=0.5) ax.set_xlabel("cosine similarity") ax.set_ylabel("count") ax.set_title(f"Class {i} vs. Class {j}") ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) fig.tight_layout() file_name = os.path.join(hist_folder, f"hist_{i}v{j}") fig.savefig(file_name) plt.close() print("Plot saved to: {}".format(file_name))
def plot_heatmap(args, features, labels, epoch): """Plot heatmap of cosine simliarity for all features. """ num_classes = trainset.num_classes features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=num_classes, stack=False) features_sort_ = np.vstack(features_sort) sim_mat = np.abs(features_sort_ @ features_sort_.T) plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(figsize=(7, 5), sharey=True, sharex=True, dpi=400) im = ax.imshow(sim_mat, cmap='Blues') fig.colorbar(im, pad=0.02, drawedges=0, ticks=[0, 0.5, 1]) ax.set_xticks(np.linspace(0, 50000, 6)) ax.set_yticks(np.linspace(0, 50000, 6)) [tick.label.set_fontsize(10) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(10) for tick in ax.yaxis.get_major_ticks()] fig.tight_layout() save_dir = os.path.join(args.model_dir, 'figures', 'heatmaps') if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"heatmat_epoch{epoch}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"heatmat_epoch{epoch}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_pca(args, features, labels, epoch): """Plot PCA of learned features. """ ## create save folder pca_dir = os.path.join(args.model_dir, 'figures', 'pca') if not os.path.exists(pca_dir): os.makedirs(pca_dir) ## perform PCA on features n_comp = np.min([args.comp, features.shape[1]]) features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) pca = PCA(n_components=n_comp).fit(features.numpy()) sig_vals = [pca.singular_values_] for c in range(trainset.num_classes): pca = PCA(n_components=n_comp).fit(features_sort[c]) sig_vals.append((pca.singular_values_)) ## plot features fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7, 5), dpi=500) x_min = np.min([len(sig_val) for sig_val in sig_vals]) ax.plot(np.arange(x_min), sig_vals[0][:x_min], '-p', markersize=3, markeredgecolor='black', linewidth=1.5, color='tomato') map_vir = plt.cm.get_cmap('Blues', 6) norm = plt.Normalize(-10, 10) class_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] norm_class = norm(class_list) color = map_vir(norm_class) for c, sig_val in enumerate(sig_vals[1:]): ax.plot(np.arange(x_min), sig_val[:x_min], '-o', markersize=3, markeredgecolor='black', alpha=0.6, linewidth=1.0, color=color[c]) ax.set_xticks(np.arange(0, x_min, 5)) ax.set_yticks(np.arange(0, 35, 5)) ax.set_xlabel("components", fontsize=14) ax.set_ylabel("sigular values", fontsize=14) [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()] fig.tight_layout() np.save(os.path.join(pca_dir, "sig_vals.npy"), sig_vals) file_name = os.path.join(pca_dir, f"pca_classVclass_epoch{epoch}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(pca_dir, f"pca_classVclass_epoch{epoch}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_nearest_component_supervised(args, features, labels, epoch, trainset): """Find corresponding images to the nearests component. """ ## perform PCA on features features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) data_sort, _ = utils.sort_dataset(trainset.data, labels.numpy(), num_classes=trainset.num_classes, stack=False) nearest_data = [] for c in range(trainset.num_classes): pca = TruncatedSVD(n_components=10, random_state=10).fit(features_sort[c]) proj = features_sort[c] @ pca.components_.T img_idx = np.argmax(np.abs(proj), axis=0) nearest_data.append(np.array(data_sort[c])[img_idx]) fig, ax = plt.subplots(ncols=10, nrows=10, figsize=(10, 10)) for r in range(10): for c in range(10): ax[r, c].imshow(nearest_data[r][c]) ax[r, c].set_xticks([]) ax[r, c].set_yticks([]) ax[r, c].spines['top'].set_visible(False) ax[r, c].spines['right'].set_visible(False) ax[r, c].spines['bottom'].set_linewidth(False) ax[r, c].spines['left'].set_linewidth(False) if c == 0: ax[r, c].set_ylabel(f"comp {r}") ## save save_dir = os.path.join(args.model_dir, 'figures', 'nearcomp_sup') if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"nearest_data.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"nearest_data.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_nearest_component(args, features, labels, epoch, trainset): ## perform PCA on features features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=len(trainset.classes), stack=False) data_sort, _ = utils.sort_dataset(trainset.data, labels.numpy(), num_classes=len(trainset.classes), stack=False) nearest_data = [] for c in range(len(trainset.classes)): pca = TruncatedSVD(n_components=10, random_state=10).fit(features_sort[c]) proj = features_sort[c] @ pca.components_.T img_idx = np.argmax(np.abs(proj), axis=0) nearest_data.append(np.array(data_sort[c])[img_idx]) fig, ax = plt.subplots(ncols=10, nrows=10, figsize=(10, 10)) for r in range(10): for c in range(10): ax[r, c].imshow(nearest_data[r][c]) ax[r, c].set_axis_off() plt.xlabel("per component") plt.ylabel("per class") ## save save_dir = os.path.join(args.model_dir, 'figures', 'pca') if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"nearest_data.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"nearest_data.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_heatmap(args, features, labels, epoch): num_classes = len(trainset.classes) features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=num_classes, stack=False) features_sort_ = np.vstack(features_sort) sim_mat = np.abs(features_sort_ @ features_sort_.T) plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman' ] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(figsize=(7, 5), sharey=True, sharex=True, dpi=400) im = ax.imshow(sim_mat, cmap='Blues') fig.colorbar(im, pad=0.02, drawedges=0, ticks=[0, 0.5, 1]) ax.set_xticks(np.linspace(0, 50000, 6)) ax.set_yticks(np.linspace(0, 50000, 6)) # ax.spines['right'].set_visible(False) # ax.spines['top'].set_visible(False) # ax.spines['bottom'].set_visible(False) # ax.spines['left'].set_visible(False) [tick.label.set_fontsize(10) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(10) for tick in ax.yaxis.get_major_ticks()] fig.tight_layout() save_dir = os.path.join(args.model_dir, 'figures', 'heatmaps') # os.mkdir(save_dir+"/sim_mat/") for i in range(500): np.save(save_dir + f"/sim_mat/sim_mat{i}.npy", sim_mat[i * 100:(i + 1) * 100]) if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"heatmat_epoch{epoch}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"heatmat_epoch{epoch}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_pca(args, features, labels, epoch): ## create save folder pca_dir = os.path.join(args.model_dir, 'figures', 'pca') if not os.path.exists(pca_dir): os.makedirs(pca_dir) ## perform PCA on features n_comp = np.min([args.comp, features.shape[1]]) features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=len(trainset.classes), stack=False) pca = PCA(n_components=n_comp).fit(features.numpy()) sig_vals = [pca.singular_values_] for c in range(len(trainset.classes)): pca = PCA(n_components=n_comp).fit(features_sort[c]) sig_vals.append((pca.singular_values_)) ## plot features plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman' ] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7, 5), dpi=500) x_min = np.min([len(sig_val) for sig_val in sig_vals]) ax.plot(np.arange(x_min), sig_vals[0][:x_min], '-p', markersize=3, markeredgecolor='black', linewidth=1.5, color='tomato') map_vir = plt.cm.get_cmap('Blues', 6) norm = plt.Normalize(-10, 10) class_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] norm_class = norm(class_list) color = map_vir(norm_class) for c, sig_val in enumerate(sig_vals[1:]): ax.plot(np.arange(x_min), sig_val[:x_min], '-o', markersize=3, markeredgecolor='black', alpha=0.6, linewidth=1.0, color=color[c]) ax.set_xticks(np.arange(0, x_min, 5)) ax.set_yticks(np.arange(0, 35, 5)) # for c, sig_val in enumerate(sig_vals[1:]): # ax.plot(np.arange(x_min), sig_val[:x_min], marker='o', markersize=2, # label=f'class - {c}', alpha=0.6) # ax.legend(loc='upper right', frameon=True, fancybox=True, prop={"size": 12}, ncol=2, framealpha=0.5) ax.set_xlabel("components", fontsize=14) ax.set_ylabel("sigular values", fontsize=14) # ax.set_title(f"PCA on features (Epoch: {epoch})") # ax.spines['top'].set_visible(False) # ax.spines['right'].set_visible(False) # ax.spines['bottom'].set_visible(False) # ax.spines['left'].set_visible(False) [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()] # ax.grid(True, color='white') # ax.set_facecolor('whitesmoke') fig.tight_layout() np.save(os.path.join(pca_dir, "sig_vals.npy"), sig_vals) file_name = os.path.join(pca_dir, f"pca_classVclass_epoch{epoch}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(pca_dir, f"pca_classVclass_epoch{epoch}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_hist_all(args, features, labels, epoch): ## create save folder hist_folder = os.path.join(args.model_dir, 'figures', 'hist_all') if not os.path.exists(hist_folder): os.makedirs(hist_folder) if not os.path.exists(os.path.join(hist_folder, "sim_mat")): os.makedirs(os.path.join(hist_folder, "sim_mat")) num_classes = len(trainset.classes) features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=num_classes, stack=False) plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(ncols=10, nrows=10, figsize=(10, 10), dpi=250, sharex=True, sharey=True) for j in range(10): for i in range(10): sim_mat = features_sort[i] @ features_sort[j].T if i == j: sim_mat = sim_mat[np.triu_indices(sim_mat.shape[0], k=1)] h1 = ax[i, j].hist(sim_mat, bins=np.arange(10) - 0.05, color='green', alpha=0.3, histtype='bar', density=True) else: sim_mat = sim_mat.reshape(-1) h2 = ax[i, j].hist(sim_mat, bins=np.arange(10) - 0.05, color='red', alpha=0.3, histtype='bar', density=True) ax[i, j].spines['right'].set_visible(False) ax[i, j].spines['top'].set_visible(False) ax[i, j].spines['bottom'].set_visible(False) ax[i, j].spines['left'].set_visible(False) ax[i, j].grid(True, color='white', axis='y') ax[i, j].set_xlim(0, 1.) ax[i, j].set_facecolor('whitesmoke') if i == 0: ax[i, j].set_title(f'class {j}') if j == 0: ax[i, j].set_ylabel(f'class {i}') np.save( os.path.join(hist_folder, "sim_mat", f"sim_mat{i}_{j}.npy"), sim_mat) fig.text(0.5, -0.01, 'cosine similarity', ha='center') fig.text(-0.01, 0.5, 'count', va='center', rotation='vertical') plt.tight_layout() plt.show() file_name = os.path.join(hist_folder, f"hist_all_epoch{epoch}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(hist_folder, f"hist_all_epoch{epoch}.pdf") fig.savefig(file_name) plt.close() print("Plot saved to: {}".format(file_name))
def plot_pca_epoch(args): """Plot PCA for different epochs in the same plot. """ EPOCHS = [0, 10, 100, 500] params = utils.load_params(args.model_dir) transforms = tf.load_transforms('test') trainset = tf.load_trainset(params['data'], transforms) trainloader = DataLoader(trainset, batch_size=200, num_workers=4) sig_vals = [] for epoch in EPOCHS: epoch_ = epoch - 1 if epoch_ == -1: # randomly initialized net = tf.load_architectures(params['arch'], params['fd']) else: net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch_, eval_=True) features, labels = tf.get_features(net, trainloader) if args.class_ is not None: features_sort, _ = utils.sort_dataset( features.numpy(), labels.numpy(), num_classes=trainset.num_classes, stack=False) features_ = features_sort[args.class_] else: features_ = features.numpy() n_comp = np.min([args.comp, features.shape[1]]) pca = PCA(n_components=n_comp).fit(features_) sig_vals.append(pca.singular_values_) ## plot singular values plt.rc('text', usetex=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman'] fig, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=400) x_min = np.min([len(sig_val) for sig_val in sig_vals]) if args.class_ is not None: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 40, 9)) ax.set_ylim(0, 40) else: ax.set_xticks(np.arange(0, x_min, 10)) ax.set_yticks(np.linspace(0, 80, 9)) ax.set_ylim(0, 90) for epoch, sig_val in zip(EPOCHS, sig_vals): ax.plot(np.arange(x_min), sig_val[:x_min], marker='', markersize=5, label=f'epoch - {epoch}', alpha=0.6) ax.legend(loc='upper right', frameon=True, fancybox=True, prop={"size": 8}, ncol=1, framealpha=0.5) ax.set_xlabel("components") ax.set_ylabel("sigular values") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) [tick.label.set_fontsize(12) for tick in ax.xaxis.get_major_ticks()] [tick.label.set_fontsize(12) for tick in ax.yaxis.get_major_ticks()] ax.grid(True, color='white') ax.set_facecolor('whitesmoke') fig.tight_layout() ## save save_dir = os.path.join(args.model_dir, 'figures', 'pca') np.save(os.path.join(save_dir, "sig_vals_epoch.npy"), sig_vals) if not os.path.exists(save_dir): os.makedirs(save_dir) file_name = os.path.join(save_dir, f"pca_class{args.class_}.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(save_dir, f"pca_class{args.class_}.pdf") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) plt.close()
def plot_hist_paper(args, features, labels, epoch): ## create save folder hist_folder = os.path.join(args.model_dir, 'figures', 'hist_paper') if not os.path.exists(hist_folder): os.makedirs(hist_folder) num_classes = len(trainset.classes) features_sort, _ = utils.sort_dataset(features.numpy(), labels.numpy(), num_classes=num_classes, stack=False) i = 0 ## inside class plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman' ] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7, 5), dpi=250) sim_mat = features_sort[i] @ features_sort[i].T sim_mat = sim_mat[np.triu_indices(sim_mat.shape[0], k=1)][-500:] ax.hist(sim_mat, bins=30, color='green', alpha=0.4, density=True) ax.grid(True, color='white', axis='y') ax.set_xlim(0, 1.) ax.set_facecolor('whitesmoke') ax.set_xlabel("cosine similarity") ax.set_ylabel("density") ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) fig.tight_layout() file_name = os.path.join(hist_folder, f"hist_0v0.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(hist_folder, f"hist_0v0.pdf") fig.savefig(file_name) plt.close() print("Plot saved to: {}".format(file_name)) ## outside class plt.rc('text', usetex=False) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman' ] #+ plt.rcParams['font.serif'] fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7, 5), dpi=250) temp = [] for j in range(num_classes): sim_mat = (features_sort[i] @ features_sort[j].T).reshape(-1) temp.append(sim_mat) temp = np.hstack(temp) ax.hist(temp, bins=30, color='red', alpha=0.4, density=True) ax.grid(True, color='white', axis='y') ax.set_xlim(0, 1.) ax.set_facecolor('whitesmoke') ax.set_xlabel("cosine similarity") ax.set_ylabel("density") ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) fig.tight_layout() file_name = os.path.join(hist_folder, f"hist_0vall.png") fig.savefig(file_name) print("Plot saved to: {}".format(file_name)) file_name = os.path.join(hist_folder, f"hist_0vall.pdf") fig.savefig(file_name) plt.close() print("Plot saved to: {}".format(file_name))