Esempio n. 1
0
def coarsening_test2(seed=None):
#visualizes coarsening: stores the coarsening of the nodes, and then labels the original nodes based on their aggregates in the final level
    import matplotlib as mpl
    if seed==None:
        seed = npr.randint(1E6)
    print('rnd seed: %d'%seed)
    npr.seed(seed)
    random.seed(seed)

    G = graphutils.load_graph('data/mesh33.gml')
    #G = graphutils.load_graph('data-engineering/watts_strogatz98_power.elist')
    c_tree = []
    def store_aggregation_chain(G, G_coarse, c_data):
        store_aggregation_chain.static_c_tree.append(c_data['home_nodes'].copy())
        #print c_data['home_nodes']
        #print store_aggregation_chain.static_c_tree

    store_aggregation_chain.static_c_tree = c_tree

    params = {}
    params['do_coarsen_tester'] = store_aggregation_chain
    params['node_edit_rate']    = [0, 0, 0, 0]  #change to force coarsening

    dummy_replica = algorithms.generate_graph(G, params=params)

    node_colors = {}
    aggregate_colors = {seed:(npr.rand(), npr.rand(), npr.rand(), 1.) for seed in list(c_tree[-1].values())}
    for node in G:
        my_final_agg = node
        for c_set in c_tree:
            my_final_agg = c_set[my_final_agg]  #this could be faster with union-find structure
        node_colors[node] = aggregate_colors[my_final_agg]
        clr = aggregate_colors[my_final_agg]
        G.node[node]['color'] = '%.3f %.3f %.3f'%(clr[0],clr[1],clr[2])
        G.node[node]['label'] = ''

    all_nodes = G.nodes()
    color_array = np.ones((len(all_nodes),4))
    for i,node in enumerate(all_nodes):
        color_array[i,:] *= node_colors[node]

    #pos = nx.fruchterman_reingold_layout(G)
    #nx.draw_networkx_nodes(G, pos=pos, nodelist=G.nodes(), node_color=color_array, cmap=pylab.hot, node_size=500, with_labels=True, node_shape='s')
    #nx.draw_networkx_edges(G, pos=pos, alpha=1.0)
    #nx.draw_networkx_labels(G, pos=pos)
    #pylab.show()

    gpath     = 'output/coarsening_test_'+timeNow()+'.dot'
    gpath_fig = gpath+'.pdf'
    graphutils.write_graph(G=G, path=gpath)
    print('Writing graph image: %s ..'%gpath_fig)
    visualizer_cmdl = 'sfdp -Nwidth=0.10 -Nheight=0.10 -Nfixedsize=true -Nstyle=filled -Tpdf %s > %s &'%(gpath,gpath_fig)
    #visualizer_cmdl = 'sfdp -Nwidth=0.03 -Nheight=0.03 -Nfixedsize=true -Nstyle=solid  -Tpdf %s > %s &'%(gpath,gpath_fig)
    retCode = os.system(visualizer_cmdl)
    time.sleep(1)
    subprocess.call(['xdg-open', gpath_fig])
Esempio n. 2
0
def coarsening_test2(seed=None):
#visualizes coarsening: stores the coarsening of the nodes, and then labels the original nodes based on their aggregates in the final level
    import matplotlib as mpl
    if seed==None:
        seed = npr.randint(1E6)
    print('rnd seed: %d'%seed)
    npr.seed(seed)
    random.seed(seed)

    G = graphutils.load_graph('data/mesh33.gml')
    #G = graphutils.load_graph('data-engineering/watts_strogatz98_power.elist')
    c_tree = []
    def store_aggregation_chain(G, G_coarse, c_data):
        store_aggregation_chain.static_c_tree.append(c_data['home_nodes'].copy())
        #print c_data['home_nodes']
        #print store_aggregation_chain.static_c_tree

    store_aggregation_chain.static_c_tree = c_tree

    params = {}
    params['do_coarsen_tester'] = store_aggregation_chain
    params['node_edit_rate']    = [0, 0, 0, 0]  #change to force coarsening

    dummy_replica = algorithms.generate_graph(G, params=params)

    node_colors = {}
    aggregate_colors = {seed:(npr.rand(), npr.rand(), npr.rand(), 1.) for seed in list(c_tree[-1].values())}
    for node in G:
        my_final_agg = node
        for c_set in c_tree:
            my_final_agg = c_set[my_final_agg]  #this could be faster with union-find structure
        node_colors[node] = aggregate_colors[my_final_agg]
        clr = aggregate_colors[my_final_agg]
        G.node[node]['color'] = '%.3f %.3f %.3f'%(clr[0],clr[1],clr[2])
        G.node[node]['label'] = ''

    all_nodes = G.nodes()
    color_array = np.ones((len(all_nodes),4))
    for i,node in enumerate(all_nodes):
        color_array[i,:] *= node_colors[node] 

    #pos = nx.fruchterman_reingold_layout(G)
    #nx.draw_networkx_nodes(G, pos=pos, nodelist=G.nodes(), node_color=color_array, cmap=pylab.hot, node_size=500, with_labels=True, node_shape='s')
    #nx.draw_networkx_edges(G, pos=pos, alpha=1.0)
    #nx.draw_networkx_labels(G, pos=pos)
    #pylab.show()
    
    gpath     = 'output/coarsening_test_'+timeNow()+'.dot'
    gpath_fig = gpath+'.pdf'
    graphutils.write_graph(G=G, path=gpath)
    print('Writing graph image: %s ..'%gpath_fig)
    visualizer_cmdl = 'sfdp -Nwidth=0.10 -Nheight=0.10 -Nfixedsize=true -Nstyle=filled -Tpdf %s > %s &'%(gpath,gpath_fig)
    #visualizer_cmdl = 'sfdp -Nwidth=0.03 -Nheight=0.03 -Nfixedsize=true -Nstyle=solid  -Tpdf %s > %s &'%(gpath,gpath_fig)
    retCode = os.system(visualizer_cmdl)
    time.sleep(1)
    subprocess.call(['xdg-open', gpath_fig])
Esempio n. 3
0
def statistical_tests(seed=8):
#systematic comparison of a collection of problems (graphs and parameters)
    if seed==None:
        seed = npr.randint(1E6)
    print('rand seed: %d'%seed)
    npr.seed(seed)
    random.seed(seed)

    default_num_replicas = 20
    
    params_default  = {'verbose':False, 'edge_edit_rate':[0.08, 0.07], 'node_edit_rate':[0.08, 0.07], 'node_growth_rate':[0], 
            'dont_cutoff_leafs':False,
            'new_edge_horizon':10, 'num_deletion_trials':20, 'locality_bias_correction':[0,], 'edit_method':'sequential',
            }
    #params_default['algorithm'] = algorithms.musketeer_on_subgraphs

    metrics_default = graphutils.default_metrics[:]
    #some metrics are removed because of long running time
    metrics_default  = [met for met in metrics_default if met['name'] not in ['avg flow closeness', 'avg eigvec centrality', 'degree connectivity', 'degree assortativity',  'average shortest path', 'mean ecc', 'powerlaw exp', ]]
    problems = [{'graph_data':nx.erdos_renyi_graph(n=300, p=0.04, seed=42), 'name':'ER300', 'num_replicas':20},
                {'graph_data':'data-samples/ftp3c.elist'},
                {'graph_data':'data-samples/mesh33.edges'},
                {'graph_data':'data-samples/newman06_netscience.gml', 'num_replicas':10},

                {'graph_data':'data-samples/watts_strogatz98_power.elist', 'num_replicas':10},
               ]

    for problem in problems:
        graph_data    = problem['graph_data']
        params        = problem.get('params', params_default)
        metrics       = problem.get('metrics', metrics_default)
        num_replicas  = problem.get('num_replicas', default_num_replicas)

        if type(graph_data) is str:
            base_graph = graphutils.load_graph(path=graph_data)
            base_graph.name = os.path.split(graph_data)[1]
        else:
            base_graph = graph_data
            if not hasattr(base_graph, 'name'):
                base_graph.name = problem.get('name', str(npr.randint(10000)))

        gpath     = 'output/'+os.path.split(base_graph.name)[1]+'_'+timeNow()+'.dot'
        gpath_fig = gpath[:-3]+'eps'
        graphutils.write_graph(G=base_graph, path=gpath)
        visualizer_cmdl = 'sfdp  -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig)
        print('Writing graph image: %s ..'%gpath_fig)
        retCode = os.system(visualizer_cmdl)
        
        replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer')
Esempio n. 4
0
def statistical_tests(seed=8):
#systematic comparison of a collection of problems (graphs and parameters)
    if seed==None:
        seed = npr.randint(1E6)
    print('rand seed: %d'%seed)
    npr.seed(seed)
    random.seed(seed)

    default_num_replicas = 20

    params_default  = {'verbose':False, 'edge_edit_rate':[0.08, 0.07], 'node_edit_rate':[0.08, 0.07], 'node_growth_rate':[0],
            'dont_cutoff_leafs':False,
            'new_edge_horizon':10, 'num_deletion_trials':20, 'locality_bias_correction':[0,], 'edit_method':'sequential',
            }
    #params_default['algorithm'] = algorithms.musketeer_on_subgraphs

    metrics_default = graphutils.default_metrics[:]
    #some metrics are removed because of long running time
    metrics_default  = [met for met in metrics_default if met['name'] not in ['avg flow closeness', 'avg eigvec centrality', 'degree connectivity', 'degree assortativity',  'average shortest path', 'mean ecc', 'powerlaw exp', ]]
    problems = [{'graph_data':nx.erdos_renyi_graph(n=300, p=0.04, seed=42), 'name':'ER300', 'num_replicas':20},
                {'graph_data':'data-samples/ftp3c.elist'},
                {'graph_data':'data-samples/mesh33.edges'},
                {'graph_data':'data-samples/newman06_netscience.gml', 'num_replicas':10},

                {'graph_data':'data-samples/watts_strogatz98_power.elist', 'num_replicas':10},
               ]

    for problem in problems:
        graph_data    = problem['graph_data']
        params        = problem.get('params', params_default)
        metrics       = problem.get('metrics', metrics_default)
        num_replicas  = problem.get('num_replicas', default_num_replicas)

        if type(graph_data) is str:
            base_graph = graphutils.load_graph(path=graph_data)
            base_graph.name = os.path.split(graph_data)[1]
        else:
            base_graph = graph_data
            if not hasattr(base_graph, 'name'):
                base_graph.name = problem.get('name', str(npr.randint(10000)))

        gpath     = 'output/'+os.path.split(base_graph.name)[1]+'_'+timeNow()+'.dot'
        gpath_fig = gpath[:-3]+'eps'
        graphutils.write_graph(G=base_graph, path=gpath)
        visualizer_cmdl = 'sfdp  -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig)
        print('Writing graph image: %s ..'%gpath_fig)
        retCode = os.system(visualizer_cmdl)

        replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer')
Esempio n. 5
0
def show_usage():
    print 'Multiscale Entropic Network Generator 2 (MUSKETEER2)'
    print 'Allowed options are:'
    print '-c, --citation    Citation information for MUSKETEER 2'
    print '-f, --input_path  Input graph file path'
    print '-h, --help        Shows these options'
    print '-M, --metrics     Compare the replica to the original.  Computing intensive. (Default: -M False).'
    print '-o, --output_path Path to the output file for the graph.'
    print '                  Output format is chosen automatically based on the extension.'
    print '-p, --params      Input paremeters.  Surround the argument with double quotes:'
    print '                  e.g. -p "{\'p1_name\':p1_value, \'p2_name\':p2_value}"'
    print '                  Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])'
    print '-s, --seed        Random seed (integer)'
    print '-T, --test        Run a quick self-test'
    print '-t, --graph_type  Specify the format of the input graph (Default: -t AUTODETECT)'
    print '-v, --visualizer  Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d'
    print '--verbose         Verbose output (Default: --verbose True)'
    print '-w, --write_graph Write replica to disc (Default: -w True).'
    print '                  For interactive Python make False to speed up generation (disables visualization).'
    print
    print 'For reading graphs with -t, the supported graph types are: \n%s' % graphutils.load_graph(
        path=None, list_types_and_exit=True)
    print
    print 'For writing graphs with -o, the supported graph extensions are: \n%s' % graphutils.write_graph(
        G=None, path=None, list_types_and_exit=True)
    print
    print
    print 'Example call format:'
    print graphutils.MUSKETEER_EXAMPLE_CMD
Esempio n. 6
0
def show_usage():
    print "Multiscale Entropic Network Generator 2 (MUSKETEER2)"
    print "Allowed options are:"
    print "-c, --citation    Citation information for MUSKETEER 2"
    print "-f, --input_path  Input graph file path"
    print "-h, --help        Shows these options"
    print "-M, --metrics     Compare the replica to the original.  Computing intensive. (Default: -M False)."
    print "-o, --output_path Path to the output file for the graph."
    print "                  Output format is chosen automatically based on the extension."
    print "-p, --params      Input paremeters.  Surround the argument with double quotes:"
    print "                  e.g. -p \"{'p1_name':p1_value, 'p2_name':p2_value}\""
    print "                  Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])"
    print "-s, --seed        Random seed (integer)"
    print "-T, --test        Run a quick self-test"
    print "-t, --graph_type  Specify the format of the input graph (Default: -t AUTODETECT)"
    print "-v, --visualizer  Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d"
    print "--verbose         Verbose output (Default: --verbose True)"
    print "-w, --write_graph Write replica to disc (Default: -w True)."
    print "                  For interactive Python make False to speed up generation (disables visualization)."
    print
    print "For reading graphs with -t, the supported graph types are: \n%s" % graphutils.load_graph(
        path=None, list_types_and_exit=True
    )
    print
    print "For writing graphs with -o, the supported graph extensions are: \n%s" % graphutils.write_graph(
        G=None, path=None, list_types_and_exit=True
    )
    print
    print
    print "Example call format:"
    print graphutils.MUSKETEER_EXAMPLE_CMD
        if not os.path.isdir('output'):
            raise ValueError('Cannot write to directory "output"')
        output_base = 'output/' + os.path.splitext(
            os.path.basename(input_path))[0]
        output_path = output_base + '__' + t_str + '.dot'
        output_path_adj = output_base + '__' + t_str + '.adjlist'
        if write_graph:
            if verbose:
                print('Saving graph: %s' % output_path_adj)
            sys.stdout.flush()
            nx.write_adjlist(new_G, output_path_adj)
    if write_graph:
        if verbose:
            print('Saving graph: %s' % output_path)
        sys.stdout.flush()
        graphutils.write_graph(new_G, output_path)
    image_path = output_path + '.pdf'
    stderr_path = output_path + '.err.txt'

    if init_options['compare_replica']:
        if verbose:
            print('Generator Report')
        print('Comparing replica')
        sys.stdout.flush()
        graphutils.compare_nets(G, new_G, params=params)
        print(planarity.is_planar(new_G.edges()))
        pos = nx.graphviz_layout(new_G)
        nx.draw(new_G, pos, with_labels=False, node_size=1)
        plt.show()
        benchmarks.find_differences(G, new_G)
def replica_vs_original(seed=None,
                        figpath=None,
                        generator_func=None,
                        G=None,
                        params=None,
                        num_replicas=150,
                        title_infix='',
                        metrics=None,
                        intermediates=False,
                        n_jobs=-1,
                        store_replicas=False):
    """generate one or more replicas and compare them to the original graph"""
    if seed == None:
        seed = npr.randint(1E6)
    print('rand seed: %d' % seed)
    npr.seed(seed)
    random.seed(seed)

    if generator_func == None:
        generator_func = algorithms.generate_graph

    if G == None:
        G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist')

    if metrics == None:
        metrics = graphutils.default_metrics[:]
    metrics = [m for m in metrics if m['optional'] < 2]
    if 'metric_runningtime_bound' in params:
        mrtb = params['metric_runningtime_bound']
        metrics = [m for m in metrics if m['runningtime'] <= mrtb]
    metrics = [m for m in metrics
               if m['name'] not in ['avg flow closeness']]  #broken in NX 1.6
    metrics.reverse()

    if params == None:
        params = {
            'verbose': False,
            'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01],
            'node_growth_rate': [0],
            'locality_bias_correction': 0.,
            'enforce_connected': True,
            'accept_chance_edges': 1.0,
            'retain_intermediates': intermediates
        }
    if intermediates:
        params['retain_intermediates'] = True
    print('Params:')
    print(params)
    print('Metrics:')
    print([metric['name'] for metric in metrics])

    #if generator_func == algorithms.generate_graph:
    #replicas         = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs)
    #else:
    #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params,
    #   title_infix=title_infix, n_jobs=n_jobs)
    replicas = read_all_files()
    jaccard_edges = evaluate_similarity(
        base_graphs=G, graphs=replicas,
        n_jobs=n_jobs)  #this is actually a mean
    vals_of_all = evaluate_metrics(graphs=[G] + replicas,
                                   metrics=metrics,
                                   n_jobs=n_jobs)
    vals_of_graph = [metric_data[0] for metric_data in vals_of_all]
    vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all]
    replica_statistics, figpath = plot_deviation(vals_of_replicas,
                                                 vals_of_graph, metrics,
                                                 figpath, jaccard_edges,
                                                 title_infix, seed,
                                                 getattr(G, 'name', ''))
    #pylab.show()
    data = {
        'metrics': [met['name'] for met in metrics],
        'name': getattr(G, 'name', ''),
        'params': params,
        'num_replicas': num_replicas,
        'figpath': figpath
    }
    data[0] = replica_statistics
    data[0].update({
        'vals_of_replicas': vals_of_replicas,
        'val_of_models': vals_of_graph,
        'avg_jaccard_edges': jaccard_edges
    })
    out_dir = "/home/varsha/Documents/final_results/Krongen/Boeing_normalized" + timeNow(
    )
    myfile = open(out_dir, 'w')
    i = 0
    for repl in vals_of_replicas:
        for elem in repl:
            nor_value = elem
            if (vals_of_graph[i] != 0):
                nor_value = float(elem) / vals_of_graph[i]
            myfile.write(str(nor_value))
            myfile.write('\t')
        i += 1
        myfile.write('\n')
    myfile.close()

    if intermediates:
        current_replicas = replicas
        for level in range(
                1,
                max(len(params.get('node_edit_rate', [])),
                    len(params.get('edge_edit_rate', [])),
                    len(params.get('node_growth_rate', [])),
                    len(params.get('edge_growth_rate', [])))):
            print('LEVEL: %d' % level)
            coarse_models = [
                r.coarser_graph.model_graph for r in current_replicas
            ]
            coarse_replicas = [r.coarser_graph for r in current_replicas]
            vals_of_models = evaluate_metrics(graphs=coarse_models,
                                              metrics=metrics,
                                              n_jobs=n_jobs)
            vals_of_replicas = evaluate_metrics(graphs=coarse_replicas,
                                                metrics=metrics,
                                                n_jobs=n_jobs)
            jaccard_edges = evaluate_similarity(base_graphs=coarse_models,
                                                graphs=coarse_replicas,
                                                n_jobs=n_jobs)

            replica_statistics, dummy \
                 = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models,
                                  metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges)
            current_replicas = coarse_replicas
            data[level] = replica_statistics
            data[level].update({
                'vals_of_replicas': vals_of_replicas,
                'vals_of_models': vals_of_models,
                'avg_jaccard_edges': jaccard_edges
            })
    graphutils.safe_pickle(path=figpath + '.pkl', data=data)
    save_param_set(params, seed, figpath)
    save_stats_csv(path=figpath + '.csv', seed=seed, data=data)

    # optionally store replica graphs in files
    if store_replicas:
        out_dir = "output/replicas_{0}_{1}".format(getattr(
            G, "name", ""), timeNow())  # FIXME: add graph name
        os.mkdir(out_dir)
        for (G, replica_no) in zip(replicas, range(len(replicas))):
            graphutils.write_graph(G,
                                   path="{0}/{1}.gml".format(
                                       out_dir, replica_no))

    return data
Esempio n. 9
0
            os.mkdir("output")
        if not os.path.isdir("output"):
            raise ValueError, 'Cannot write to directory "output"'
        output_base = "output/" + os.path.splitext(os.path.basename(input_path))[0]
        output_path = output_base + "__" + t_str + ".dot"
        output_path_adj = output_base + "__" + t_str + ".adjlist"
        if write_graph:
            if verbose:
                print "Saving graph: %s" % output_path_adj
            sys.stdout.flush()
            nx.write_adjlist(new_G, output_path_adj)
    if write_graph:
        if verbose:
            print "Saving graph: %s" % output_path
        sys.stdout.flush()
        graphutils.write_graph(new_G, output_path)
    image_path = output_path + ".pdf"
    stderr_path = output_path + ".err.txt"

    if init_options["compare_replica"]:
        if verbose:
            print "Generator Report"
        sys.stdout.flush()
        graphutils.compare_nets(G, new_G, params=params)

    # 0.03 is too small for Linux
    # sfdp_default_cmd = 'sfdp -Goverlap="prism100" -Goverlap_scaling=-100 -Nlabel="" -Nwidth=0.01 -Nfixedsize=true -Nheight=0.01'
    sfdp_default_cmd = 'sfdp -Nlabel="" -Nwidth=0.06 -Nfixedsize=true -Nheight=0.06 -Nstyle=filled'
    if write_graph and visualizer == "sfdp" and output_path[-3:] == "dot":
        visualizer_cmdl = sfdp_default_cmd + " -Tpdf %s > %s 2> %s " % (output_path, image_path, stderr_path)
        if verbose:
import pickle
import algorithms
import graphutils
import simpletesters

np.seterr(all='raise')
timeNow = lambda: time.strftime('%Y_%m_%d__%H_%M_%S', time.localtime())

version = 'Beta 1'

G = nx.read_edgelist(sys.argv[1])
H = nx.read_edgelist(sys.argv[2])

X = nx.disjoint_union(G, H)

C1 = nx.connected_component_subgraphs(X)[0]
C2 = nx.connected_component_subgraphs(X)[1]

n1 = random.choice(C1.nodes())
n2 = random.choice(C2.nodes())
X.add_edge(n1, n2)

r = random.uniform(0, 1)
if r < 0.5:
    n1 = random.choice(X.nodes())
    n2 = random.choice(X.nodes())
    X.add_edge(n1, n2)

nx.write_edgelist(X, "outgraph.elist")
graphutils.write_graph(X, "outgraph.dot")