def main(): for ds_dir in fullpath_list(args.root_dir): ds_name = os.path.basename(ds_dir) if args.datasets and ds_name not in args.datasets: continue exp_dir = os.path.join(ds_dir, 'flipp_0.0000', loss_fn_str) width = 6.5 if args.no_ylabel else 7 fig, ax = plt.subplots(figsize=(width, 5)) for man_dir in fullpath_list(exp_dir): man_name = os.path.basename(man_dir) if args.manifolds and man_name not in args.manifolds: continue factor_names = manifold_factors_from_path_label(man_name) man_factors = build_manifold(*factor_names) man_label = manifold_label_for_display(*factor_names) dim = sum([m.dim for m in man_factors]) if args.dims and dim not in args.dims: continue # load the angle ratio samples samples = load_angle_ratio_samples(man_dir) # plot them plot_angle_ratios( ax, samples, label=man_label, color=get_color_for_manifold(man_name)) # save the figure configure_and_save_plots(ax, fig, ds_name, loss_fn_str)
def main(): if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # use double precision by default torch.set_default_dtype(torch.float64) for ds_dir in fullpath_list(args.root_dir): ds_name = os.path.basename(ds_dir) if args.datasets and ds_name not in args.datasets: continue # Load the dataset graph in order to compute F1 scores. _, g = load_graph_pdists(os.path.join('../data', ds_name + '.edges.gz'), cache_dir='.cached_pdists') n_nodes = g.number_of_nodes() with Timer('constructing FastPrecision'): fp = FastPrecision(g) nodes_per_layer = fp.nodes_per_layer()[1:] nodes_per_layer = nodes_per_layer / np.sum(nodes_per_layer) for flipp_dir in fullpath_list(ds_dir): flipp = os.path.basename(flipp_dir).split('_')[1] if args.flip_probabilities and flipp not in args.flip_probabilities: continue for loss_fn_dir in fullpath_list(flipp_dir): loss_fn_str = os.path.basename(loss_fn_dir) if args.loss_fns and loss_fn_str not in args.loss_fns: continue # create one plot per (dataset, flipp, loss_fn) combination width = 6 if args.leftmost or args.rightmost else 5 fig, ax = plt.subplots(figsize=(width, 5)) plot_id = 0 for man_dir in fullpath_list(loss_fn_dir): man_name = os.path.basename(man_dir) if args.manifolds and man_name not in args.manifolds: continue factor_names = manifold_factors_from_path_label(man_name) man_factors = build_manifold(*factor_names) man_label = manifold_label_for_display(*factor_names) dim = sum([m.dim for m in man_factors]) if args.dims and dim not in args.dims: continue # compute the metric means, stds = comp_metric(ds_name, n_nodes, fp, flipp, loss_fn_str, man_dir, man_factors, man_label) if means is None: continue # add them to the plot plot_f1_scores(ax, means, stds, plot_id, label=man_label) plot_id += 1 # save the figure configure_and_save_plots(ax, fig, ds_name, flipp, loss_fn_str, nodes_per_layer)
def main(): for dim_dir in fullpath_list(args.root_dir, only_dirs=True): dim = os.path.basename(dim_dir) if args.dims and dim not in args.dims: continue dim = int(dim) width = 7 if dim == 2 else 6 fig, ax = plt.subplots(figsize=(width, 5)) bins = np.linspace(xmin, xmax, 100) i = 0 for man_dir in fullpath_list(dim_dir, only_dirs=True): man_name = os.path.basename(man_dir) if args.manifolds and man_name not in args.manifolds: continue filename = os.path.join(man_dir, 'angle_ratios.npy') values = np.load(filename) plt.hist( values, bins, density=True, label=manifold_label_for_display(man_name), color=get_color_for_manifold(man_name), alpha=0.5) i += 1 ax.grid(color='lightgray', lw=2, alpha=0.5) ax.set_axisbelow(True) ax.set_xlabel('Normalized Sum of Angles') if dim == 2: ax.set_ylabel('PDF') else: ax.set_yticklabels([]) ax.set_title('Triangles Thickness (n={})'.format(dim), y=1.18) ax.set_xlim(xmin, xmax) ax.set_ylim(top=args.ymax) hs, labels = ax.get_legend_handles_labels() hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1])) ax.legend( hs, labels, bbox_to_anchor=(0, 1.02, 1, 0.2), loc='lower left', mode='expand', borderaxespad=0, ncol=4) plt.tight_layout() figpath = os.path.join(args.save_dir, f'angles-{dim}.pdf') fig.savefig(figpath, bbox_inches='tight') plt.close()
def main(): torch.set_default_dtype(torch.float64) with ProcessPoolExecutor(max_workers=args.num_cpus) as pool: futures = [] for dim_dir in fullpath_list(args.root_dir, only_dirs=True): for man_dir in fullpath_list(dim_dir, only_dirs=True): man_name = os.path.basename(man_dir) if args.manifolds and man_name not in args.manifolds: continue f = pool.submit(grid_fn, man_dir, man_name) futures.append(f) for f in futures: f.result(None)
def load_angle_ratio_samples(man_dir): all_samples = [] for run_dir in fullpath_list(man_dir): samples = load_samples_for_best_embedding(run_dir) all_samples.append(samples) return np.concatenate(all_samples)
def comp_metric(ds_name, n_nodes, fp, flipp, loss_fn_str, man_dir, man_factors, man_label): run_dirs = list(fullpath_list(man_dir)) num_pdists = n_nodes * (n_nodes - 1) // 2 n_runs = len(run_dirs) all_pdists = np.ndarray(shape=(num_pdists * n_runs)) for i, run_dir in enumerate(run_dirs): # load the embedding embedding = ManifoldEmbedding(n_nodes, man_factors) emb_state_dict, _, _ = load_best_embedding(run_dir) embedding.load_state_dict(emb_state_dict) # compute the pairwise distances with Timer('computing pdists'), torch.no_grad(): man_pdists = embedding.compute_dists(None) man_pdists.sqrt_() indices = np.arange(i * num_pdists, (i + 1) * num_pdists) all_pdists[indices] = man_pdists.numpy() # compute the f1 scores run_id = make_run_id(dataset=ds_name, fp=flipp, loss_fn=loss_fn_str, manifold=man_label) logging.info('Computing F1 scores for (%s)', run_id) with Timer('computing F1 scores'): means, stds = fp.layer_mean_average_f1_scores(all_pdists, n_runs) return means[:args.max_layers], stds[:args.max_layers]
def main(): with ProcessPoolExecutor(max_workers=args.num_cpus) as pool: futures = [] for dim_dir in fullpath_list(args.root_dir, only_dirs=True): dim = os.path.basename(dim_dir) if args.dims and dim not in args.dims: continue f = pool.submit(grid_fn, dim_dir, int(dim)) futures.append(f) for f in futures: f.result(None)
def grid_fn(input_dir, dim): results = {} for man_dir in fullpath_list(input_dir, only_dirs=True): man_name = os.path.basename(man_dir) results[man_name] = {} for thresh_dir in fullpath_list(man_dir, only_dirs=True): thresh = os.path.basename(thresh_dir) results[man_name][thresh] = {} for quantity in args.plots: ret = read_quantity_results(thresh_dir, quantity) if ret is not None: results[man_name][thresh][quantity] = ret quantity_labels = dict( degrees='Node Degree', seccurvs='Graph Sectional Curvature') quantity_titles = dict( degrees='Degree Distributions', seccurvs='Curvature Estimates') for quantity in args.plots: ls = ['-', '--', '-.', ':'] ms = ['o', 'v', '*', 'd', 'x', '1'] width = 7 if dim == 2 else 6 height = 5.5 if quantity == 'degrees' else 5 fig, ax = plt.subplots(figsize=(width, height)) for i, (man_name, values) in enumerate(results.items()): if man_name.startswith('euc') and quantity != 'degrees': continue xs = [] ys = [] for k in values.keys(): thresh_values = values[k] if quantity in thresh_values: xs.append(float(k)) ys.append(thresh_values[quantity]) xs, ys = zip(*sorted(zip(xs, ys), key=lambda e: e[0])) p25, p50, p75 = zip(*ys) line, = plt.plot( xs, p50, label=manifold_label_for_display(man_name), color=get_color_for_manifold(man_name), lw=6 - i, ls=ls[i % 4], marker=ms[i % 6], ms=10) ax.fill_between(xs, p25, p75, facecolor=line.get_color(), alpha=0.3) ax.set_ylim(bottom=args.ymin, top=args.ymax) ax.set_xlim(left=0, right=args.xmax) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.grid(color='lightgray', lw=2, alpha=0.5) ax.set_xlabel('Distance Threshold') if dim == 2: ax.set_ylabel(quantity_labels[quantity]) else: ax.set_yticklabels([]) ax.set_title( '{} (n={})'.format(quantity_titles[quantity], dim), y=1.32 if quantity == 'degrees' else 1.18) ax.set_axisbelow(True) hs, labels = ax.get_legend_handles_labels() hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1])) ax.legend( hs, labels, bbox_to_anchor=(0, 1.02, 1, 0.2), loc='lower left', mode='expand', borderaxespad=0, ncol=2 if quantity == 'degrees' else 3) plt.tight_layout() fig_name = os.path.join(args.save_dir, f'{quantity}-{dim}.pdf') fig.savefig(fig_name, bbox_inches='tight') plt.close()
def main(): for dim_dir in fullpath_list(args.root_dir, only_dirs=True): dim = os.path.basename(dim_dir) if args.dims and dim not in args.dims: continue dim = int(dim) width = 7 if dim == 3 else 6 fig, ax = plt.subplots(figsize=(width, 5)) bins = np.linspace(xmin, xmax, 100) colors = ['tab:orange', 'tab:green'] i = 0 ymax = 0 for man_dir in sorted(fullpath_list(dim_dir, only_dirs=True)): man_name = os.path.basename(man_dir) if args.manifolds and man_name not in args.manifolds: continue filename = os.path.join(man_dir, 'angle_ratios.npy') values = np.load(filename) ret = plt.hist( values, bins, density=True, label=manifold_label_for_display(man_name), color=colors[i], alpha=0.5) ymax = max(ymax, ret[0].max()) i += 1 ax.grid(color='lightgray', lw=2, alpha=0.5) ax.set_axisbelow(True) ax.set_xlabel('Normalized Sum of Angles') if dim == 3: ax.set_ylabel('PDF') else: ax.annotate( 'cut off (max {:.2f})'.format(ymax), xy=(-0.47, 19.5), xytext=(-0.38, 17), arrowprops=dict(facecolor='k', arrowstyle='-|>'), bbox=dict(boxstyle='round', facecolor='none')) ax.set_yticklabels([]) ax.set_title('Triangles Thickness (n={})'.format(dim), y=1.18) ax.set_xlim(xmin, xmax) if args.ymax: ax.set_ylim(top=args.ymax + 0.1) hs, labels = ax.get_legend_handles_labels() hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1])) ax.legend( hs, labels, bbox_to_anchor=(0, 1.02, 1, 0.2), loc='lower left', mode='expand', borderaxespad=0, ncol=4) plt.tight_layout() figpath = os.path.join(args.save_dir, f'angles-{dim}.pdf') fig.savefig(figpath, bbox_inches='tight') plt.close()