Esempio n. 1
0
    # Cost function parameters
    r_eps = 0.05

    # Phase 2 cost function parameters
    lambdas_2 = [1, 1.2, 0]
    if args.star:
        lambdas_2[1] = 0.1

    # Phase 3 cost function parameters
    lambdas_3 = [1, 0.01, 0.6]

    # Read input graph
    print('Reading graph: {0}...'.format(args.input_graph),
          end=' ',
          flush=True)
    g = graph_io.load_graph(args.input_graph)
    print('Done.')

    print('Input graph: {0}, (|V|, |E|) = ({1}, {2})'.format(
        graph_name, g.num_vertices(), g.num_edges()))

    # Load the PivotMDS layout for initial placement
    if args.star:
        print('Reading PivotMDS layout...', end=' ', flush=True)
        _, Y_init = layout_io.load_layout(
            './pivotmds_layouts/{0}.vna'.format(graph_name))
        print('Done.')
    else:
        Y_init = None

    # Time the method including SPDM calculations
Esempio n. 2
0
def main():
    import sys
    import os.path
    import glob
    import itertools
    from argparse import ArgumentParser

    parser = ArgumentParser(
        description='Read a graph, and produce a layout with t-SNE.')

    # Input
    parser.add_argument(
        'graphs',
        nargs='+',
        help='(List of) input graph(s). Or a folder with graphs.')

    # Output
    parser.add_argument('-o',
                        default='./output',
                        help='Folder to write output to. Default: ./output')
    parser.add_argument('--save_every',
                        type=int,
                        help='Save a jpg snapshot ever x epochs.')
    parser.add_argument(
        '--render_video',
        action='store_true',
        help=
        'Render a video of the layout evolution. Needs ImageMagick and ffmpeg.'
    )
    parser.add_argument(
        '--retain_snaps',
        action='store_true',
        help=
        'Retain the snapshots. This argument is ignored if no video is rendered.'
    )
    parser.add_argument(
        '--save_layout_data',
        action='store_true',
        help='Save all layout coordinates in a .pickle file and a .txt file.')
    parser.add_argument('--opacity',
                        type=float,
                        default=0.3,
                        help='Edge opacity.')

    # Manipulations to graph
    parser.add_argument(
        '--strip_graph',
        action='store_true',
        help='Retain only the largest connected component in the graph.')
    parser.add_argument('--rnd_seed',
                        '-r',
                        type=int,
                        nargs='+',
                        default=[None],
                        help='Seed for random state. (Default: Random seed)')
    parser.add_argument(
        '--pre_sfdp',
        action='store_true',
        help=
        'If this flag is given, the vertices will be pre-initialized with SFDP.'
    )
    parser.add_argument('--only_sfdp',
                        action='store_true',
                        help='If this flag is given, only SFDP will be done.')
    parser.add_argument(
        '--accept_all_sfdp',
        action='store_true',
        help=
        'If this flag is given, no confirmation is asked for the SFDP layouts.'
    )
    parser.add_argument(
        '--remove_rnd_edges',
        nargs='+',
        type=float,
        default=[0],
        help=
        'Mutate the graph by removing random edges. If this is used without a random seed, a random random seed will be generated. The value given to this argument is the fraction of edges that will be removed.'
    )

    # Hyperparameters
    parser.add_argument('--n_epochs',
                        '-e',
                        nargs='+',
                        type=int,
                        default=[1000],
                        help='One or more numbers of t-SNE epochs.')
    parser.add_argument('--lr_init',
                        nargs='+',
                        type=float,
                        default=[80],
                        help='One or more initial learning rates.')
    parser.add_argument(
        '--lr_final',
        nargs='+',
        type=float,
        default=[None],
        help='One or more final learning rates. Default: Same as lr_init.')
    parser.add_argument('--lr_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more learning rate switch-points.')
    parser.add_argument('--momentum_init',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more initial momenta.')
    parser.add_argument('--momentum_final',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more initial momenta.')
    parser.add_argument('--momentum_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more momentum switch-points.')

    # Distance metric parameters
    parser.add_argument(
        '--distance_metric',
        '-d',
        choices=['shortest_path', 'spdm', 'modified_adjacency', 'mam'],
        default='spdm',
        help='The distance metric that is used for the pairwise distances.')
    parser.add_argument('-k',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='Exponent for transfer function.')

    # Cost function parameters
    #   Kullback-Leibler
    parser.add_argument('--perplexity',
                        '-p',
                        nargs='+',
                        type=float,
                        default=[80],
                        help='One or more perplexities.')
    parser.add_argument('--l_kl_init',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='One or more KL factors.')
    parser.add_argument('--l_kl_final',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='One or more KL factors.')
    parser.add_argument('--l_kl_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more KL switch-points')
    #   Edge contraction
    parser.add_argument('--l_e_init',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more edge contraction factors.')
    parser.add_argument('--l_e_final',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more edge contraction factors.')
    parser.add_argument('--l_e_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more edge contraction switch-points')
    #   Compression
    parser.add_argument('--l_c_init',
                        nargs='+',
                        type=float,
                        default=[1.2],
                        help='One or more compression factors.')
    parser.add_argument('--l_c_final',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more compression factors.')
    parser.add_argument('--l_c_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more compression switch-points')
    #   Repulsion
    parser.add_argument('--l_r_init',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more repulsion factors.')
    parser.add_argument('--l_r_final',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more repulsion factors.')
    parser.add_argument('--l_r_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more repulsion switch-points')
    parser.add_argument(
        '--r_eps',
        nargs='+',
        type=float,
        default=[0.2],
        help='Additional term in denominator to prevent near-singularities.')

    args = parser.parse_args()

    # Retrieve a list of all files in the directory, if args.graphs[0] is a directory.
    if len(args.graphs) == 1 and os.path.isdir(args.graphs[0]):
        args.graphs = glob.glob(args.graphs[0] + '/*')

    # Check graph input
    for g_file in args.graphs:
        if not os.path.isfile(g_file):
            raise FileNotFoundError(g_file + ' is not a file.')

    # Generate random random seed if none is given.
    if args.rnd_seed == [None]:
        args.rnd_seed = [np.random.randint(1e8)]

    # Ignore retain_snaps argument if no video is rendered.
    if not args.render_video:
        args.retain_snaps = True

    # Get names of the graphs (by splitting of path and extension)
    names = [
        os.path.split(os.path.splitext(file)[0])[1] for file in args.graphs
    ]

    # Determine output folders. One is created in the specified output folder
    # for every graph that is supplied.
    output_folders = [args.o + '/' + name for name in names]

    # Check (and possibly create) output folders
    for folder in [args.o] + output_folders:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # At least everything is fine for now.
    there_were_exceptions = False

    # Loop over all graphs (and their respective output folders)
    for g_file, g_name, output_folder in zip(args.graphs, names,
                                             output_folders):
        # Load the graph
        g = graph_io.load_graph(g_file)
        print(
            '[tsnetwork] Loaded graph {0} (|V| = {1}, |E| = {2}) into memory.'.
            format(g_name, g.num_vertices(), g.num_edges()))

        # Add graph name as propery in the internal representation
        g.graph_properties['name'] = g.new_graph_property('string', g_name)

        # Usually this loop has just one iteration, with only 0 as the value
        # for rmv_edge_frac (that is, no edges are removed).
        for rmv_edge_frac in args.remove_rnd_edges:
            print(
                '[tsnetwork] Original graph: (|V|, |E|) = ({0}, {1}).'.format(
                    g.num_vertices(), g.num_edges()))

            # Create a temporary copy of the graph that will be manipulated.
            gv = gt.GraphView(g)

            # Remove rmv_edge_frac of the graphs edges from gv.
            gv.clear_filters()
            gv.reindex_edges()
            edge_list = list(gv.edges())
            not_here_ep = gv.new_edge_property('bool', val=True)
            n_remove_edges = int(rmv_edge_frac * gv.num_edges())
            for e in np.random.randint(0, gv.num_edges(), n_remove_edges):
                not_here_ep[edge_list[e]] = False
            gv.set_edge_filter(not_here_ep)

            if n_remove_edges > 0:
                print(
                    '[tsnetwork] Removed {2} random edges: (|V|, |E|) = ({0}, {1}).'
                    .format(gv.num_vertices(), gv.num_edges(), n_remove_edges))

            # Filter the graph s.t. only the largest connected component
            # remains.
            if args.strip_graph:
                largest_connected_component = gt.label_largest_component(gv)
                gv.set_vertex_filter(largest_connected_component)
                gv.purge_vertices()
                print(
                    '[tsnetwork] Filtered largest component: (|V|, |E|) = ({0}, {1}).'
                    .format(gv.num_vertices(), gv.num_edges()))

            if args.pre_sfdp or args.only_sfdp:
                # Perform a SFDP layout (either as the only layout or as a
                # starting point for t-SNE.)
                Y_init, _ = sfdp_placement(
                    gv,
                    output_folder,
                    ask_for_acceptance=not args.accept_all_sfdp,
                    opacity=args.opacity)
                if args.only_sfdp:
                    continue
            else:
                # Random positions will be generated
                Y_init = None

            # Compute distance matrix of this graph with the specified metric
            X = distance_matrix.get_distance_matrix(gv, args.distance_metric)

            # Retrieve the adjacency matrix of the graph
            Adj_sparse = gt.adjacency(gv)
            Adj = np.zeros(Adj_sparse.shape, dtype='float32')
            for i, j in zip(*Adj_sparse.nonzero()):
                Adj[i, j] = Adj_sparse[i, j]

            # Make list of tsnetwork configuration objects. These are objects
            # that represent a configuration for a t-SNE layout.
            tsn_configs = []
            for perplexity, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum,\
                final_momentum, momentum_switch,\
                initial_l_kl, final_l_kl, l_kl_switch,\
                initial_l_e, final_l_e, l_e_switch,\
                initial_l_c, final_l_c, l_c_switch,\
                initial_l_r, final_l_r, l_r_switch,\
                r_eps, k, rnd_seed in itertools.product(
                    args.perplexity, args.n_epochs, args.lr_init, args.lr_final,
                    args.lr_switch, args.momentum_init, args.momentum_final,
                    args.momentum_switch,
                    args.l_kl_init, args.l_kl_final, args.l_kl_switch,
                    args.l_e_init, args.l_e_final, args.l_e_switch,
                    args.l_c_init, args.l_c_final, args.l_c_switch,
                    args.l_r_init, args.l_r_final, args.l_r_switch,
                    args.r_eps, args.k, args.rnd_seed):

                # Use 50% for the switching points if no argument is given
                if lr_switch is None:
                    lr_switch = int(n_epochs * 0.5)
                if momentum_switch is None:
                    momentum_switch = int(n_epochs * 0.5)
                if l_kl_switch is None:
                    l_kl_switch = int(n_epochs * 0.5)
                if l_e_switch is None:
                    l_e_switch = int(n_epochs * 0.5)
                if l_c_switch is None:
                    l_c_switch = int(n_epochs * 0.5)
                if l_r_switch is None:
                    l_r_switch = int(n_epochs * 0.5)

                if final_lr is None:
                    final_lr = initial_lr

                cfg = TsnConfig(perplexity=perplexity,
                                n_epochs=n_epochs,
                                initial_lr=initial_lr,
                                final_lr=final_lr,
                                lr_switch=lr_switch,
                                initial_momentum=initial_momentum,
                                final_momentum=final_momentum,
                                momentum_switch=momentum_switch,
                                initial_l_kl=initial_l_kl,
                                final_l_kl=final_l_kl,
                                l_kl_switch=l_kl_switch,
                                initial_l_e=initial_l_e,
                                final_l_e=final_l_e,
                                l_e_switch=l_e_switch,
                                initial_l_c=initial_l_c,
                                final_l_c=final_l_c,
                                l_c_switch=l_c_switch,
                                initial_l_r=initial_l_r,
                                final_l_r=final_l_r,
                                l_r_switch=l_r_switch,
                                r_eps=r_eps,
                                k=k,
                                pre_sfdp=args.pre_sfdp,
                                rmv_edge_frac=rmv_edge_frac,
                                rnd_seed=rnd_seed,
                                distance_matrix=args.distance_metric)

                # Do no add the configurations that already have files matching
                # the description, unless the user confirms to overwrite.
                if any([
                        file.startswith(cfg.get_description() + '.')
                        for file in os.listdir(output_folder)
                ]):
                    if not usr_input.confirm('[tsnetwork] ' +
                                             cfg.get_description() +
                                             ' files exists! Overwrite?'):
                        continue
                tsn_configs.append(cfg)

            # Loop over the t-SNE configurations for a single graph
            for cfg in tsn_configs:
                print('[tsnetwork] Processing: ' + cfg.get_description())

                # String that has the path to the directory where the snapshots
                # will come. (If --save_every is given)
                snaps_dir = output_folder + '/snaps_' + cfg.get_description()

                # Clean out existing snaps directory if it exists.
                if args.save_every is not None and os.path.exists(snaps_dir):
                    if usr_input.confirm('[tsnetwork] ' + snaps_dir +
                                         ' exists. Delete contents?'):
                        for file in os.listdir(snaps_dir):
                            file_path = os.path.join(snaps_dir, file)
                            try:
                                if os.path.isfile(file_path):
                                    os.unlink(file_path)
                                elif os.path.isdir(file_path):
                                    shutil.rmtree(file_path)
                            except Exception as e:
                                print(e)
                elif args.save_every is not None and not os.path.exists(
                        snaps_dir):
                    # Make folder for snaps, if it is necessary and it doesn't
                    # exist yet.
                    os.makedirs(snaps_dir)

                # Apply the transfer function
                X_transfered = X**cfg.k

                # Try to do the tsne layout.
                try:
                    Y, costs = thesne.tsne(
                        X_transfered,
                        random_state=cfg.rnd_seed,
                        perplexity=cfg.perplexity,
                        n_epochs=cfg.n_epochs,
                        Y=Y_init,
                        initial_lr=cfg.initial_lr,
                        final_lr=cfg.final_lr,
                        lr_switch=cfg.lr_switch,
                        initial_momentum=cfg.initial_momentum,
                        final_momentum=cfg.final_momentum,
                        momentum_switch=cfg.momentum_switch,
                        initial_l_kl=cfg.initial_l_kl,
                        final_l_kl=cfg.final_l_kl,
                        l_kl_switch=cfg.l_kl_switch,
                        initial_l_e=cfg.initial_l_e,
                        final_l_e=cfg.final_l_e,
                        l_e_switch=cfg.l_e_switch,
                        initial_l_c=cfg.initial_l_c,
                        final_l_c=cfg.final_l_c,
                        l_c_switch=cfg.l_c_switch,
                        initial_l_r=cfg.initial_l_r,
                        final_l_r=cfg.final_l_r,
                        l_r_switch=cfg.l_r_switch,
                        r_eps=cfg.r_eps,
                        Adj=Adj,
                        g=gv,
                        snaps_output_folder=snaps_dir,
                        save_every=args.save_every)
                except (thesne.NaNException, thesne.SigmaTooLowException) as e:
                    there_were_exceptions = True
                    print('[exception] {0}'.format(e))

                    # Also write exception to a file.
                    with open(
                            output_folder + '/exception_' +
                            cfg.get_description() + '.out', 'w') as f:
                        print('{0}'.format(e), file=f)
                        f.close()
                    print('[tsnetwork] Continuing with next TsnConfig.')
                    continue

                # Render an animation of the snapshots
                if args.render_video:
                    animations.save_animation(snaps_dir, cfg.get_description())

                # Remove the directory with snapshots.
                if args.save_every is not None and not args.retain_snaps and os.path.exists(
                        snaps_dir):
                    print('[tsnetwork] Cleaning up snaps directory.')
                    shutil.rmtree(snaps_dir)

                # Save the data (graph, vertex coordinates)
                if args.save_layout_data:
                    layout_io.save_vna_layout(
                        output_folder + '/layout_' + cfg.get_description() +
                        '.vna', gv, Y)
                    layout_io.save_layout_txt(
                        output_folder + '/layout_edges_' +
                        cfg.get_description() + '.txt', gv, Y)

                # Save final drawing of the layout
                layout_io.save_drawing(output_folder,
                                       gv,
                                       Y.T,
                                       cfg.get_description(),
                                       formats=['jpg', 'pdf'],
                                       edge_colors="rgb",
                                       draw_vertices=False,
                                       opacity=args.opacity)

    if there_were_exceptions:
        print('[tsnetwork] Done! However, be wary. There were exceptions.')
    else:
        print('[tsnetwork] Done!')