def coarsening_test2(seed=None): #visualizes coarsening: stores the coarsening of the nodes, and then labels the original nodes based on their aggregates in the final level import matplotlib as mpl if seed==None: seed = npr.randint(1E6) print('rnd seed: %d'%seed) npr.seed(seed) random.seed(seed) G = graphutils.load_graph('data/mesh33.gml') #G = graphutils.load_graph('data-engineering/watts_strogatz98_power.elist') c_tree = [] def store_aggregation_chain(G, G_coarse, c_data): store_aggregation_chain.static_c_tree.append(c_data['home_nodes'].copy()) #print c_data['home_nodes'] #print store_aggregation_chain.static_c_tree store_aggregation_chain.static_c_tree = c_tree params = {} params['do_coarsen_tester'] = store_aggregation_chain params['node_edit_rate'] = [0, 0, 0, 0] #change to force coarsening dummy_replica = algorithms.generate_graph(G, params=params) node_colors = {} aggregate_colors = {seed:(npr.rand(), npr.rand(), npr.rand(), 1.) for seed in list(c_tree[-1].values())} for node in G: my_final_agg = node for c_set in c_tree: my_final_agg = c_set[my_final_agg] #this could be faster with union-find structure node_colors[node] = aggregate_colors[my_final_agg] clr = aggregate_colors[my_final_agg] G.node[node]['color'] = '%.3f %.3f %.3f'%(clr[0],clr[1],clr[2]) G.node[node]['label'] = '' all_nodes = G.nodes() color_array = np.ones((len(all_nodes),4)) for i,node in enumerate(all_nodes): color_array[i,:] *= node_colors[node] #pos = nx.fruchterman_reingold_layout(G) #nx.draw_networkx_nodes(G, pos=pos, nodelist=G.nodes(), node_color=color_array, cmap=pylab.hot, node_size=500, with_labels=True, node_shape='s') #nx.draw_networkx_edges(G, pos=pos, alpha=1.0) #nx.draw_networkx_labels(G, pos=pos) #pylab.show() gpath = 'output/coarsening_test_'+timeNow()+'.dot' gpath_fig = gpath+'.pdf' graphutils.write_graph(G=G, path=gpath) print('Writing graph image: %s ..'%gpath_fig) visualizer_cmdl = 'sfdp -Nwidth=0.10 -Nheight=0.10 -Nfixedsize=true -Nstyle=filled -Tpdf %s > %s &'%(gpath,gpath_fig) #visualizer_cmdl = 'sfdp -Nwidth=0.03 -Nheight=0.03 -Nfixedsize=true -Nstyle=solid -Tpdf %s > %s &'%(gpath,gpath_fig) retCode = os.system(visualizer_cmdl) time.sleep(1) subprocess.call(['xdg-open', gpath_fig])
def statistical_tests(seed=8): #systematic comparison of a collection of problems (graphs and parameters) if seed==None: seed = npr.randint(1E6) print('rand seed: %d'%seed) npr.seed(seed) random.seed(seed) default_num_replicas = 20 params_default = {'verbose':False, 'edge_edit_rate':[0.08, 0.07], 'node_edit_rate':[0.08, 0.07], 'node_growth_rate':[0], 'dont_cutoff_leafs':False, 'new_edge_horizon':10, 'num_deletion_trials':20, 'locality_bias_correction':[0,], 'edit_method':'sequential', } #params_default['algorithm'] = algorithms.musketeer_on_subgraphs metrics_default = graphutils.default_metrics[:] #some metrics are removed because of long running time metrics_default = [met for met in metrics_default if met['name'] not in ['avg flow closeness', 'avg eigvec centrality', 'degree connectivity', 'degree assortativity', 'average shortest path', 'mean ecc', 'powerlaw exp', ]] problems = [{'graph_data':nx.erdos_renyi_graph(n=300, p=0.04, seed=42), 'name':'ER300', 'num_replicas':20}, {'graph_data':'data-samples/ftp3c.elist'}, {'graph_data':'data-samples/mesh33.edges'}, {'graph_data':'data-samples/newman06_netscience.gml', 'num_replicas':10}, {'graph_data':'data-samples/watts_strogatz98_power.elist', 'num_replicas':10}, ] for problem in problems: graph_data = problem['graph_data'] params = problem.get('params', params_default) metrics = problem.get('metrics', metrics_default) num_replicas = problem.get('num_replicas', default_num_replicas) if type(graph_data) is str: base_graph = graphutils.load_graph(path=graph_data) base_graph.name = os.path.split(graph_data)[1] else: base_graph = graph_data if not hasattr(base_graph, 'name'): base_graph.name = problem.get('name', str(npr.randint(10000))) gpath = 'output/'+os.path.split(base_graph.name)[1]+'_'+timeNow()+'.dot' gpath_fig = gpath[:-3]+'eps' graphutils.write_graph(G=base_graph, path=gpath) visualizer_cmdl = 'sfdp -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig) print('Writing graph image: %s ..'%gpath_fig) retCode = os.system(visualizer_cmdl) replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer')
def show_usage(): print 'Multiscale Entropic Network Generator 2 (MUSKETEER2)' print 'Allowed options are:' print '-c, --citation Citation information for MUSKETEER 2' print '-f, --input_path Input graph file path' print '-h, --help Shows these options' print '-M, --metrics Compare the replica to the original. Computing intensive. (Default: -M False).' print '-o, --output_path Path to the output file for the graph.' print ' Output format is chosen automatically based on the extension.' print '-p, --params Input paremeters. Surround the argument with double quotes:' print ' e.g. -p "{\'p1_name\':p1_value, \'p2_name\':p2_value}"' print ' Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])' print '-s, --seed Random seed (integer)' print '-T, --test Run a quick self-test' print '-t, --graph_type Specify the format of the input graph (Default: -t AUTODETECT)' print '-v, --visualizer Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d' print '--verbose Verbose output (Default: --verbose True)' print '-w, --write_graph Write replica to disc (Default: -w True).' print ' For interactive Python make False to speed up generation (disables visualization).' print print 'For reading graphs with -t, the supported graph types are: \n%s' % graphutils.load_graph( path=None, list_types_and_exit=True) print print 'For writing graphs with -o, the supported graph extensions are: \n%s' % graphutils.write_graph( G=None, path=None, list_types_and_exit=True) print print print 'Example call format:' print graphutils.MUSKETEER_EXAMPLE_CMD
def show_usage(): print "Multiscale Entropic Network Generator 2 (MUSKETEER2)" print "Allowed options are:" print "-c, --citation Citation information for MUSKETEER 2" print "-f, --input_path Input graph file path" print "-h, --help Shows these options" print "-M, --metrics Compare the replica to the original. Computing intensive. (Default: -M False)." print "-o, --output_path Path to the output file for the graph." print " Output format is chosen automatically based on the extension." print "-p, --params Input paremeters. Surround the argument with double quotes:" print " e.g. -p \"{'p1_name':p1_value, 'p2_name':p2_value}\"" print " Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])" print "-s, --seed Random seed (integer)" print "-T, --test Run a quick self-test" print "-t, --graph_type Specify the format of the input graph (Default: -t AUTODETECT)" print "-v, --visualizer Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d" print "--verbose Verbose output (Default: --verbose True)" print "-w, --write_graph Write replica to disc (Default: -w True)." print " For interactive Python make False to speed up generation (disables visualization)." print print "For reading graphs with -t, the supported graph types are: \n%s" % graphutils.load_graph( path=None, list_types_and_exit=True ) print print "For writing graphs with -o, the supported graph extensions are: \n%s" % graphutils.write_graph( G=None, path=None, list_types_and_exit=True ) print print print "Example call format:" print graphutils.MUSKETEER_EXAMPLE_CMD
if not os.path.isdir('output'): raise ValueError('Cannot write to directory "output"') output_base = 'output/' + os.path.splitext( os.path.basename(input_path))[0] output_path = output_base + '__' + t_str + '.dot' output_path_adj = output_base + '__' + t_str + '.adjlist' if write_graph: if verbose: print('Saving graph: %s' % output_path_adj) sys.stdout.flush() nx.write_adjlist(new_G, output_path_adj) if write_graph: if verbose: print('Saving graph: %s' % output_path) sys.stdout.flush() graphutils.write_graph(new_G, output_path) image_path = output_path + '.pdf' stderr_path = output_path + '.err.txt' if init_options['compare_replica']: if verbose: print('Generator Report') print('Comparing replica') sys.stdout.flush() graphutils.compare_nets(G, new_G, params=params) print(planarity.is_planar(new_G.edges())) pos = nx.graphviz_layout(new_G) nx.draw(new_G, pos, with_labels=False, node_size=1) plt.show() benchmarks.find_differences(G, new_G)
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas=150, title_infix='', metrics=None, intermediates=False, n_jobs=-1, store_replicas=False): """generate one or more replicas and compare them to the original graph""" if seed == None: seed = npr.randint(1E6) print('rand seed: %d' % seed) npr.seed(seed) random.seed(seed) if generator_func == None: generator_func = algorithms.generate_graph if G == None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = { 'verbose': False, 'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate': [0], 'locality_bias_correction': 0., 'enforce_connected': True, 'accept_chance_edges': 1.0, 'retain_intermediates': intermediates } if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) #if generator_func == algorithms.generate_graph: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) #else: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, # title_infix=title_infix, n_jobs=n_jobs) replicas = read_all_files() jaccard_edges = evaluate_similarity( base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G] + replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = { 'metrics': [met['name'] for met in metrics], 'name': getattr(G, 'name', ''), 'params': params, 'num_replicas': num_replicas, 'figpath': figpath } data[0] = replica_statistics data[0].update({ 'vals_of_replicas': vals_of_replicas, 'val_of_models': vals_of_graph, 'avg_jaccard_edges': jaccard_edges }) out_dir = "/home/varsha/Documents/final_results/Krongen/Boeing_normalized" + timeNow( ) myfile = open(out_dir, 'w') i = 0 for repl in vals_of_replicas: for elem in repl: nor_value = elem if (vals_of_graph[i] != 0): nor_value = float(elem) / vals_of_graph[i] myfile.write(str(nor_value)) myfile.write('\t') i += 1 myfile.write('\n') myfile.close() if intermediates: current_replicas = replicas for level in range( 1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print('LEVEL: %d' % level) coarse_models = [ r.coarser_graph.model_graph for r in current_replicas ] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({ 'vals_of_replicas': vals_of_replicas, 'vals_of_models': vals_of_models, 'avg_jaccard_edges': jaccard_edges }) graphutils.safe_pickle(path=figpath + '.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath + '.csv', seed=seed, data=data) # optionally store replica graphs in files if store_replicas: out_dir = "output/replicas_{0}_{1}".format(getattr( G, "name", ""), timeNow()) # FIXME: add graph name os.mkdir(out_dir) for (G, replica_no) in zip(replicas, range(len(replicas))): graphutils.write_graph(G, path="{0}/{1}.gml".format( out_dir, replica_no)) return data
os.mkdir("output") if not os.path.isdir("output"): raise ValueError, 'Cannot write to directory "output"' output_base = "output/" + os.path.splitext(os.path.basename(input_path))[0] output_path = output_base + "__" + t_str + ".dot" output_path_adj = output_base + "__" + t_str + ".adjlist" if write_graph: if verbose: print "Saving graph: %s" % output_path_adj sys.stdout.flush() nx.write_adjlist(new_G, output_path_adj) if write_graph: if verbose: print "Saving graph: %s" % output_path sys.stdout.flush() graphutils.write_graph(new_G, output_path) image_path = output_path + ".pdf" stderr_path = output_path + ".err.txt" if init_options["compare_replica"]: if verbose: print "Generator Report" sys.stdout.flush() graphutils.compare_nets(G, new_G, params=params) # 0.03 is too small for Linux # sfdp_default_cmd = 'sfdp -Goverlap="prism100" -Goverlap_scaling=-100 -Nlabel="" -Nwidth=0.01 -Nfixedsize=true -Nheight=0.01' sfdp_default_cmd = 'sfdp -Nlabel="" -Nwidth=0.06 -Nfixedsize=true -Nheight=0.06 -Nstyle=filled' if write_graph and visualizer == "sfdp" and output_path[-3:] == "dot": visualizer_cmdl = sfdp_default_cmd + " -Tpdf %s > %s 2> %s " % (output_path, image_path, stderr_path) if verbose:
import pickle import algorithms import graphutils import simpletesters np.seterr(all='raise') timeNow = lambda: time.strftime('%Y_%m_%d__%H_%M_%S', time.localtime()) version = 'Beta 1' G = nx.read_edgelist(sys.argv[1]) H = nx.read_edgelist(sys.argv[2]) X = nx.disjoint_union(G, H) C1 = nx.connected_component_subgraphs(X)[0] C2 = nx.connected_component_subgraphs(X)[1] n1 = random.choice(C1.nodes()) n2 = random.choice(C2.nodes()) X.add_edge(n1, n2) r = random.uniform(0, 1) if r < 0.5: n1 = random.choice(X.nodes()) n2 = random.choice(X.nodes()) X.add_edge(n1, n2) nx.write_edgelist(X, "outgraph.elist") graphutils.write_graph(X, "outgraph.dot")