Ejemplo n.º 1
0
def main():
    for ds_dir in fullpath_list(args.root_dir):
        ds_name = os.path.basename(ds_dir)
        if args.datasets and ds_name not in args.datasets:
            continue
        exp_dir = os.path.join(ds_dir, 'flipp_0.0000', loss_fn_str)

        width = 6.5 if args.no_ylabel else 7
        fig, ax = plt.subplots(figsize=(width, 5))

        for man_dir in fullpath_list(exp_dir):
            man_name = os.path.basename(man_dir)
            if args.manifolds and man_name not in args.manifolds:
                continue
            factor_names = manifold_factors_from_path_label(man_name)
            man_factors = build_manifold(*factor_names)
            man_label = manifold_label_for_display(*factor_names)
            dim = sum([m.dim for m in man_factors])
            if args.dims and dim not in args.dims:
                continue

            # load the angle ratio samples
            samples = load_angle_ratio_samples(man_dir)

            # plot them
            plot_angle_ratios(
                    ax,
                    samples,
                    label=man_label,
                    color=get_color_for_manifold(man_name))

        # save the figure
        configure_and_save_plots(ax, fig, ds_name, loss_fn_str)
Ejemplo n.º 2
0
def main():
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # use double precision by default
    torch.set_default_dtype(torch.float64)

    for ds_dir in fullpath_list(args.root_dir):
        ds_name = os.path.basename(ds_dir)
        if args.datasets and ds_name not in args.datasets:
            continue
        # Load the dataset graph in order to compute F1 scores.
        _, g = load_graph_pdists(os.path.join('../data',
                                              ds_name + '.edges.gz'),
                                 cache_dir='.cached_pdists')
        n_nodes = g.number_of_nodes()
        with Timer('constructing FastPrecision'):
            fp = FastPrecision(g)
            nodes_per_layer = fp.nodes_per_layer()[1:]
            nodes_per_layer = nodes_per_layer / np.sum(nodes_per_layer)

        for flipp_dir in fullpath_list(ds_dir):
            flipp = os.path.basename(flipp_dir).split('_')[1]
            if args.flip_probabilities and flipp not in args.flip_probabilities:
                continue

            for loss_fn_dir in fullpath_list(flipp_dir):
                loss_fn_str = os.path.basename(loss_fn_dir)
                if args.loss_fns and loss_fn_str not in args.loss_fns:
                    continue
                # create one plot per (dataset, flipp, loss_fn) combination
                width = 6 if args.leftmost or args.rightmost else 5
                fig, ax = plt.subplots(figsize=(width, 5))
                plot_id = 0

                for man_dir in fullpath_list(loss_fn_dir):
                    man_name = os.path.basename(man_dir)
                    if args.manifolds and man_name not in args.manifolds:
                        continue
                    factor_names = manifold_factors_from_path_label(man_name)
                    man_factors = build_manifold(*factor_names)
                    man_label = manifold_label_for_display(*factor_names)
                    dim = sum([m.dim for m in man_factors])
                    if args.dims and dim not in args.dims:
                        continue

                    # compute the metric
                    means, stds = comp_metric(ds_name, n_nodes, fp, flipp,
                                              loss_fn_str, man_dir,
                                              man_factors, man_label)
                    if means is None:
                        continue

                    # add them to the plot
                    plot_f1_scores(ax, means, stds, plot_id, label=man_label)
                    plot_id += 1

                # save the figure
                configure_and_save_plots(ax, fig, ds_name, flipp, loss_fn_str,
                                         nodes_per_layer)
Ejemplo n.º 3
0
def main():
    for dim_dir in fullpath_list(args.root_dir, only_dirs=True):
        dim = os.path.basename(dim_dir)
        if args.dims and dim not in args.dims:
            continue
        dim = int(dim)

        width = 7 if dim == 2 else 6
        fig, ax = plt.subplots(figsize=(width, 5))
        bins = np.linspace(xmin, xmax, 100)

        i = 0
        for man_dir in fullpath_list(dim_dir, only_dirs=True):
            man_name = os.path.basename(man_dir)
            if args.manifolds and man_name not in args.manifolds:
                continue

            filename = os.path.join(man_dir, 'angle_ratios.npy')
            values = np.load(filename)
            plt.hist(
                    values,
                    bins,
                    density=True,
                    label=manifold_label_for_display(man_name),
                    color=get_color_for_manifold(man_name),
                    alpha=0.5)
            i += 1

        ax.grid(color='lightgray', lw=2, alpha=0.5)
        ax.set_axisbelow(True)
        ax.set_xlabel('Normalized Sum of Angles')
        if dim == 2:
            ax.set_ylabel('PDF')
        else:
            ax.set_yticklabels([])
        ax.set_title('Triangles Thickness (n={})'.format(dim), y=1.18)
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(top=args.ymax)

        hs, labels = ax.get_legend_handles_labels()
        hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1]))
        ax.legend(
                hs,
                labels,
                bbox_to_anchor=(0, 1.02, 1, 0.2),
                loc='lower left',
                mode='expand',
                borderaxespad=0,
                ncol=4)

        plt.tight_layout()
        figpath = os.path.join(args.save_dir, f'angles-{dim}.pdf')
        fig.savefig(figpath, bbox_inches='tight')
        plt.close()
Ejemplo n.º 4
0
def main():
    torch.set_default_dtype(torch.float64)

    with ProcessPoolExecutor(max_workers=args.num_cpus) as pool:
        futures = []
        for dim_dir in fullpath_list(args.root_dir, only_dirs=True):
            for man_dir in fullpath_list(dim_dir, only_dirs=True):
                man_name = os.path.basename(man_dir)
                if args.manifolds and man_name not in args.manifolds:
                    continue
                f = pool.submit(grid_fn, man_dir, man_name)
                futures.append(f)
        for f in futures:
            f.result(None)
Ejemplo n.º 5
0
def load_angle_ratio_samples(man_dir):
    all_samples = []
    for run_dir in fullpath_list(man_dir):
        samples = load_samples_for_best_embedding(run_dir)
        all_samples.append(samples)

    return np.concatenate(all_samples)
Ejemplo n.º 6
0
def comp_metric(ds_name, n_nodes, fp, flipp, loss_fn_str, man_dir, man_factors,
                man_label):
    run_dirs = list(fullpath_list(man_dir))
    num_pdists = n_nodes * (n_nodes - 1) // 2
    n_runs = len(run_dirs)
    all_pdists = np.ndarray(shape=(num_pdists * n_runs))

    for i, run_dir in enumerate(run_dirs):
        # load the embedding
        embedding = ManifoldEmbedding(n_nodes, man_factors)
        emb_state_dict, _, _ = load_best_embedding(run_dir)
        embedding.load_state_dict(emb_state_dict)

        # compute the pairwise distances
        with Timer('computing pdists'), torch.no_grad():
            man_pdists = embedding.compute_dists(None)
        man_pdists.sqrt_()
        indices = np.arange(i * num_pdists, (i + 1) * num_pdists)
        all_pdists[indices] = man_pdists.numpy()

    # compute the f1 scores
    run_id = make_run_id(dataset=ds_name,
                         fp=flipp,
                         loss_fn=loss_fn_str,
                         manifold=man_label)
    logging.info('Computing F1 scores for (%s)', run_id)
    with Timer('computing F1 scores'):
        means, stds = fp.layer_mean_average_f1_scores(all_pdists, n_runs)

    return means[:args.max_layers], stds[:args.max_layers]
Ejemplo n.º 7
0
def main():
    with ProcessPoolExecutor(max_workers=args.num_cpus) as pool:
        futures = []
        for dim_dir in fullpath_list(args.root_dir, only_dirs=True):
            dim = os.path.basename(dim_dir)
            if args.dims and dim not in args.dims:
                continue
            f = pool.submit(grid_fn, dim_dir, int(dim))
            futures.append(f)
        for f in futures:
            f.result(None)
Ejemplo n.º 8
0
def grid_fn(input_dir, dim):
    results = {}
    for man_dir in fullpath_list(input_dir, only_dirs=True):
        man_name = os.path.basename(man_dir)
        results[man_name] = {}
        for thresh_dir in fullpath_list(man_dir, only_dirs=True):
            thresh = os.path.basename(thresh_dir)

            results[man_name][thresh] = {}
            for quantity in args.plots:
                ret = read_quantity_results(thresh_dir, quantity)
                if ret is not None:
                    results[man_name][thresh][quantity] = ret

    quantity_labels = dict(
            degrees='Node Degree', seccurvs='Graph Sectional Curvature')
    quantity_titles = dict(
            degrees='Degree Distributions', seccurvs='Curvature Estimates')

    for quantity in args.plots:
        ls = ['-', '--', '-.', ':']
        ms = ['o', 'v', '*', 'd', 'x', '1']
        width = 7 if dim == 2 else 6
        height = 5.5 if quantity == 'degrees' else 5
        fig, ax = plt.subplots(figsize=(width, height))

        for i, (man_name, values) in enumerate(results.items()):
            if man_name.startswith('euc') and quantity != 'degrees':
                continue
            xs = []
            ys = []
            for k in values.keys():
                thresh_values = values[k]
                if quantity in thresh_values:
                    xs.append(float(k))
                    ys.append(thresh_values[quantity])

            xs, ys = zip(*sorted(zip(xs, ys), key=lambda e: e[0]))
            p25, p50, p75 = zip(*ys)
            line, = plt.plot(
                    xs,
                    p50,
                    label=manifold_label_for_display(man_name),
                    color=get_color_for_manifold(man_name),
                    lw=6 - i,
                    ls=ls[i % 4],
                    marker=ms[i % 6],
                    ms=10)
            ax.fill_between(xs, p25, p75, facecolor=line.get_color(), alpha=0.3)

        ax.set_ylim(bottom=args.ymin, top=args.ymax)
        ax.set_xlim(left=0, right=args.xmax)
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.grid(color='lightgray', lw=2, alpha=0.5)
        ax.set_xlabel('Distance Threshold')
        if dim == 2:
            ax.set_ylabel(quantity_labels[quantity])
        else:
            ax.set_yticklabels([])
        ax.set_title(
                '{} (n={})'.format(quantity_titles[quantity], dim),
                y=1.32 if quantity == 'degrees' else 1.18)
        ax.set_axisbelow(True)

        hs, labels = ax.get_legend_handles_labels()
        hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1]))
        ax.legend(
                hs,
                labels,
                bbox_to_anchor=(0, 1.02, 1, 0.2),
                loc='lower left',
                mode='expand',
                borderaxespad=0,
                ncol=2 if quantity == 'degrees' else 3)

        plt.tight_layout()
        fig_name = os.path.join(args.save_dir, f'{quantity}-{dim}.pdf')
        fig.savefig(fig_name, bbox_inches='tight')
        plt.close()
Ejemplo n.º 9
0
def main():
    for dim_dir in fullpath_list(args.root_dir, only_dirs=True):
        dim = os.path.basename(dim_dir)
        if args.dims and dim not in args.dims:
            continue
        dim = int(dim)

        width = 7 if dim == 3 else 6
        fig, ax = plt.subplots(figsize=(width, 5))
        bins = np.linspace(xmin, xmax, 100)
        colors = ['tab:orange', 'tab:green']

        i = 0
        ymax = 0
        for man_dir in sorted(fullpath_list(dim_dir, only_dirs=True)):
            man_name = os.path.basename(man_dir)
            if args.manifolds and man_name not in args.manifolds:
                continue

            filename = os.path.join(man_dir, 'angle_ratios.npy')
            values = np.load(filename)
            ret = plt.hist(
                    values,
                    bins,
                    density=True,
                    label=manifold_label_for_display(man_name),
                    color=colors[i],
                    alpha=0.5)
            ymax = max(ymax, ret[0].max())
            i += 1

        ax.grid(color='lightgray', lw=2, alpha=0.5)
        ax.set_axisbelow(True)
        ax.set_xlabel('Normalized Sum of Angles')
        if dim == 3:
            ax.set_ylabel('PDF')
        else:
            ax.annotate(
                    'cut off (max {:.2f})'.format(ymax),
                    xy=(-0.47, 19.5),
                    xytext=(-0.38, 17),
                    arrowprops=dict(facecolor='k', arrowstyle='-|>'),
                    bbox=dict(boxstyle='round', facecolor='none'))
            ax.set_yticklabels([])
        ax.set_title('Triangles Thickness (n={})'.format(dim), y=1.18)
        ax.set_xlim(xmin, xmax)
        if args.ymax:
            ax.set_ylim(top=args.ymax + 0.1)

        hs, labels = ax.get_legend_handles_labels()
        hs, labels = zip(*sorted(zip(hs, labels), key=lambda t: t[1]))
        ax.legend(
                hs,
                labels,
                bbox_to_anchor=(0, 1.02, 1, 0.2),
                loc='lower left',
                mode='expand',
                borderaxespad=0,
                ncol=4)

        plt.tight_layout()
        figpath = os.path.join(args.save_dir, f'angles-{dim}.pdf')
        fig.savefig(figpath, bbox_inches='tight')
        plt.close()