def show_usage(): print 'Multiscale Entropic Network Generator 2 (MUSKETEER2)' print 'Allowed options are:' print '-c, --citation Citation information for MUSKETEER 2' print '-f, --input_path Input graph file path' print '-h, --help Shows these options' print '-M, --metrics Compare the replica to the original. Computing intensive. (Default: -M False).' print '-o, --output_path Path to the output file for the graph.' print ' Output format is chosen automatically based on the extension.' print '-p, --params Input paremeters. Surround the argument with double quotes:' print ' e.g. -p "{\'p1_name\':p1_value, \'p2_name\':p2_value}"' print ' Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])' print '-s, --seed Random seed (integer)' print '-T, --test Run a quick self-test' print '-t, --graph_type Specify the format of the input graph (Default: -t AUTODETECT)' print '-v, --visualizer Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d' print '--verbose Verbose output (Default: --verbose True)' print '-w, --write_graph Write replica to disc (Default: -w True).' print ' For interactive Python make False to speed up generation (disables visualization).' print print 'For reading graphs with -t, the supported graph types are: \n%s' % graphutils.load_graph( path=None, list_types_and_exit=True) print print 'For writing graphs with -o, the supported graph extensions are: \n%s' % graphutils.write_graph( G=None, path=None, list_types_and_exit=True) print print print 'Example call format:' print graphutils.MUSKETEER_EXAMPLE_CMD
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas = 150, title_infix='', metrics=None, intermediates=False, n_jobs=-1): #generate one or more replicas and compare them to the original graph if seed==None: seed = npr.randint(1E6) print 'rand seed: %d'%seed npr.seed(seed) random.seed(seed) if generator_func==None: generator_func=algorithms.generate_graph if G==None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = filter(lambda m: m['optional'] < 2, metrics) if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = filter(lambda m: m['runningtime'] <= mrtb, metrics) metrics = filter(lambda m: m['name'] not in ['avg flow closeness'], metrics) #broken in NX 1.6 metrics.reverse() if params == None: params = {'verbose':False, 'node_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate':[0], 'locality_bias_correction':0., 'enforce_connected':True, 'accept_chance_edges':1.0, 'retain_intermediates':intermediates} if intermediates: params['retain_intermediates'] = True print 'Params:' print params print 'Metrics:' print [metric['name'] for metric in metrics]
def show_usage(): print "Multiscale Entropic Network Generator 2 (MUSKETEER2)" print "Allowed options are:" print "-c, --citation Citation information for MUSKETEER 2" print "-f, --input_path Input graph file path" print "-h, --help Shows these options" print "-M, --metrics Compare the replica to the original. Computing intensive. (Default: -M False)." print "-o, --output_path Path to the output file for the graph." print " Output format is chosen automatically based on the extension." print "-p, --params Input paremeters. Surround the argument with double quotes:" print " e.g. -p \"{'p1_name':p1_value, 'p2_name':p2_value}\"" print " Key parameters: edge_edit_rate, node_edit_rate, node_growth_rate, edge_growth_rate (all are lists of values e.g. [0.01, 0.02])" print "-s, --seed Random seed (integer)" print "-T, --test Run a quick self-test" print "-t, --graph_type Specify the format of the input graph (Default: -t AUTODETECT)" print "-v, --visualizer Visualization command to call after the replica has been prepared (Default: -v None). Try -v sfdp or -v sfdp3d" print "--verbose Verbose output (Default: --verbose True)" print "-w, --write_graph Write replica to disc (Default: -w True)." print " For interactive Python make False to speed up generation (disables visualization)." print print "For reading graphs with -t, the supported graph types are: \n%s" % graphutils.load_graph( path=None, list_types_and_exit=True ) print print "For writing graphs with -o, the supported graph extensions are: \n%s" % graphutils.write_graph( G=None, path=None, list_types_and_exit=True ) print print print "Example call format:" print graphutils.MUSKETEER_EXAMPLE_CMD
def coarsening_test2(seed=None): #visualizes coarsening: stores the coarsening of the nodes, and then labels the original nodes based on their aggregates in the final level import matplotlib as mpl if seed==None: seed = npr.randint(1E6) print('rnd seed: %d'%seed) npr.seed(seed) random.seed(seed) G = graphutils.load_graph('data/mesh33.gml') #G = graphutils.load_graph('data-engineering/watts_strogatz98_power.elist') c_tree = [] def store_aggregation_chain(G, G_coarse, c_data): store_aggregation_chain.static_c_tree.append(c_data['home_nodes'].copy()) #print c_data['home_nodes'] #print store_aggregation_chain.static_c_tree store_aggregation_chain.static_c_tree = c_tree params = {} params['do_coarsen_tester'] = store_aggregation_chain params['node_edit_rate'] = [0, 0, 0, 0] #change to force coarsening dummy_replica = algorithms.generate_graph(G, params=params) node_colors = {} aggregate_colors = {seed:(npr.rand(), npr.rand(), npr.rand(), 1.) for seed in list(c_tree[-1].values())} for node in G: my_final_agg = node for c_set in c_tree: my_final_agg = c_set[my_final_agg] #this could be faster with union-find structure node_colors[node] = aggregate_colors[my_final_agg] clr = aggregate_colors[my_final_agg] G.node[node]['color'] = '%.3f %.3f %.3f'%(clr[0],clr[1],clr[2]) G.node[node]['label'] = '' all_nodes = G.nodes() color_array = np.ones((len(all_nodes),4)) for i,node in enumerate(all_nodes): color_array[i,:] *= node_colors[node] #pos = nx.fruchterman_reingold_layout(G) #nx.draw_networkx_nodes(G, pos=pos, nodelist=G.nodes(), node_color=color_array, cmap=pylab.hot, node_size=500, with_labels=True, node_shape='s') #nx.draw_networkx_edges(G, pos=pos, alpha=1.0) #nx.draw_networkx_labels(G, pos=pos) #pylab.show() gpath = 'output/coarsening_test_'+timeNow()+'.dot' gpath_fig = gpath+'.pdf' graphutils.write_graph(G=G, path=gpath) print('Writing graph image: %s ..'%gpath_fig) visualizer_cmdl = 'sfdp -Nwidth=0.10 -Nheight=0.10 -Nfixedsize=true -Nstyle=filled -Tpdf %s > %s &'%(gpath,gpath_fig) #visualizer_cmdl = 'sfdp -Nwidth=0.03 -Nheight=0.03 -Nfixedsize=true -Nstyle=solid -Tpdf %s > %s &'%(gpath,gpath_fig) retCode = os.system(visualizer_cmdl) time.sleep(1) subprocess.call(['xdg-open', gpath_fig])
def statistical_tests(seed=8): #systematic comparison of a collection of problems (graphs and parameters) if seed==None: seed = npr.randint(1E6) print('rand seed: %d'%seed) npr.seed(seed) random.seed(seed) default_num_replicas = 20 params_default = {'verbose':False, 'edge_edit_rate':[0.08, 0.07], 'node_edit_rate':[0.08, 0.07], 'node_growth_rate':[0], 'dont_cutoff_leafs':False, 'new_edge_horizon':10, 'num_deletion_trials':20, 'locality_bias_correction':[0,], 'edit_method':'sequential', } #params_default['algorithm'] = algorithms.musketeer_on_subgraphs metrics_default = graphutils.default_metrics[:] #some metrics are removed because of long running time metrics_default = [met for met in metrics_default if met['name'] not in ['avg flow closeness', 'avg eigvec centrality', 'degree connectivity', 'degree assortativity', 'average shortest path', 'mean ecc', 'powerlaw exp', ]] problems = [{'graph_data':nx.erdos_renyi_graph(n=300, p=0.04, seed=42), 'name':'ER300', 'num_replicas':20}, {'graph_data':'data-samples/ftp3c.elist'}, {'graph_data':'data-samples/mesh33.edges'}, {'graph_data':'data-samples/newman06_netscience.gml', 'num_replicas':10}, {'graph_data':'data-samples/watts_strogatz98_power.elist', 'num_replicas':10}, ] for problem in problems: graph_data = problem['graph_data'] params = problem.get('params', params_default) metrics = problem.get('metrics', metrics_default) num_replicas = problem.get('num_replicas', default_num_replicas) if type(graph_data) is str: base_graph = graphutils.load_graph(path=graph_data) base_graph.name = os.path.split(graph_data)[1] else: base_graph = graph_data if not hasattr(base_graph, 'name'): base_graph.name = problem.get('name', str(npr.randint(10000))) gpath = 'output/'+os.path.split(base_graph.name)[1]+'_'+timeNow()+'.dot' gpath_fig = gpath[:-3]+'eps' graphutils.write_graph(G=base_graph, path=gpath) visualizer_cmdl = 'sfdp -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig) print('Writing graph image: %s ..'%gpath_fig) retCode = os.system(visualizer_cmdl) replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer')
def __main() : """Main function to mimic C++ version behavior""" try: import graphutils filename = sys.argv[1] #graph = __load_binary(filename) graph = graphutils.load_graph(path=filename) partition = best_partition(graph) print >> sys.stderr, str(modularity(partition, graph)) for elem, part in partition.iteritems() : print str(elem) + " " + str(part) except (IndexError, IOError): print "Usage : ./community filename" print "find the communities in graph filename and display the dendogram" print "Parameters:" print "filename is a binary file as generated by the " print "convert utility distributed with the C implementation"
def __main(): """Main function to mimic C++ version behavior""" try: import graphutils filename = sys.argv[1] #graph = __load_binary(filename) graph = graphutils.load_graph(path=filename) partition = best_partition(graph) print >> sys.stderr, str(modularity(partition, graph)) for elem, part in partition.iteritems(): print str(elem) + " " + str(part) except (IndexError, IOError): print "Usage : ./community filename" print "find the communities in graph filename and display the dendogram" print "Parameters:" print "filename is a binary file as generated by the " print "convert utility distributed with the C implementation"
def drake_hougardy_test(): import new_algs, graphutils matching_weight = lambda G, mat: sum(G.edge[u][mat[u]].get('weight', 1.0) for u in mat)/2.0 def is_matching(mat): G = nx.Graph() G.add_edges_from(list(mat.items())) for cc in nx.connected_components(G): if len(cc) not in [0,2]: return False return True def is_maximal(G, mat): for edge in G.edges(): if (edge[0] not in mat) and (edge[1] not in mat): return False return True path = nx.path_graph(11) for u,v,d in path.edges(data=True): d['weight'] = max(u,v)**2 matching = graphutils.drake_hougardy_slow(path) print('Matching slow: ' + str(matching)) print(' wt: ' + str(matching_weight(path,matching))) matching = graphutils.drake_hougardy(path) assert is_matching(matching) assert is_maximal(path,matching) print('Matching: ' + str(matching)) print(' wt: ' + str(matching_weight(path,matching))) path_opt_m = nx.max_weight_matching(path) print(' Opt Mat: ' + str(path_opt_m)) print(' wt: ' + str(matching_weight(path,path_opt_m))) Gr2 = graphutils.load_graph('data-cyber-small/gr2.gml') matching = graphutils.drake_hougardy_slow(Gr2) print('Matching slow: ' + str(matching)) print(' wt: ' + str(matching_weight(Gr2,matching))) matching = graphutils.drake_hougardy(Gr2) assert is_matching(matching) assert is_maximal(Gr2, matching) print('Matching: ' + str(matching)) print(' wt: ' + str(matching_weight(Gr2,matching))) gr2_opt_m = nx.max_weight_matching(Gr2) print(' Opt Mat: ' + str(gr2_opt_m)) print(' wt: ' + str(matching_weight(Gr2, gr2_opt_m))) #matching = graphutils.drake_hougardy(nx.erdos_renyi_graph(1000, 0.02)) num_test_graphs = 100 num_nodes = 400 edge_density = 0.02 seed = 0 for trial in range(num_test_graphs): seed += 1 Gnp = nx.erdos_renyi_graph(num_nodes, edge_density, seed=seed) print('Seed: %d'%seed) matching = graphutils.drake_hougardy(Gnp) assert is_matching(matching) assert is_maximal(Gnp, matching) wtDH = matching_weight(Gnp,matching) print(' wt DH: ' + str(wtDH)) gnp_opt_m = nx.max_weight_matching(Gnp) wtOpt = matching_weight(Gnp, gnp_opt_m) print(' wt Opt: ' + str(wtOpt)) assert wtOpt <= 2*wtDH
def replica_vs_original(seed=None, figpath=None, graph_name = "Generated", generator_func=None, G=None, params=None, num_replicas = 100, title_infix='', metrics=None, generator = "musketeer_all", intermediates=False, n_jobs=-1, store_replicas=False, output_path = "Results"): """generate one or more replicas and compare them to the original graph""" if seed==None: seed = npr.randint(1E6) print('rand seed: %d'%seed) npr.seed(seed) random.seed(seed) if generator_func==None: generator_func=algorithms.generate_graph if G==None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = {'verbose':False, 'node_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate':[0], 'locality_bias_correction':0., 'enforce_connected':True, 'accept_chance_edges':1.0, 'retain_intermediates':intermediates} if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) data = {} replicas = read_all_files(output_path +generator+"/") out_dir = output_path + generator +"/"+graph_name+"computation_results" print(out_dir) myfile = open(out_dir+ generator, 'w') #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, # title_infix=title_infix, n_jobs=n_jobs) #jaccard_edges = evaluate_similarity(base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G]+replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] #replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() #data = {'metrics':[met['name'] for met in metrics], 'name':getattr(G, 'name', ''), 'params':params, 'num_replicas':num_replicas, 'figpath':figpath} #data[0] = replica_statistics #data[0].update({'vals_of_replicas':vals_of_replicas, 'val_of_models':vals_of_graph, 'avg_jaccard_edges':jaccard_edges}) i = 0 for (vals,met) in zip(vals_of_replicas,metrics): attribute = met['name'].replace(" ", "_") myfile.write(attribute+'\t') for elem in vals: nor_value = elem if (vals_of_graph[i] != 0): nor_value = float(elem) / vals_of_graph[i] myfile.write(str(nor_value)) myfile.write('\t') i += 1 myfile.write('\n') myfile.write('deleted_edges'+'\t') i=1 for replica in replicas: myfile.write(str(find_deleted(G,replica))+'\t') #nx.write_edgelist(replica,output_path+generator+'/'+ graph_name+ str(i)+".edgelist") #nx.write_edgelist(replica,output_path+generator+'/'+ str(i)+".edgelist") i+=1 myfile.write('\nnew_edges' + '\t') for replica in replicas: myfile.write(str(find_new(G,replica))+'\t') myfile.close() # if intermediates: # current_replicas = replicas # for level in range(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): # print('LEVEL: %d'%level) # coarse_models = [r.coarser_graph.model_graph for r in current_replicas] # coarse_replicas = [r.coarser_graph for r in current_replicas] # vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) # vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) # jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) # replica_statistics, dummy \ # = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, # metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) # current_replicas = coarse_replicas # data[level] = replica_statistics #data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges}) #graphutils.safe_pickle(path=figpath+'.pkl', data=data) #save_param_set(params, seed, figpath) #save_stats_csv(path=figpath+'.csv', seed=seed, data=data) # optionally store replica graphs in files #if store_replicas: # out_dir = "output/replicas_{0}_{1}".format(getattr(G, "name", ""), timeNow()) # FIXME: add graph name # os.mkdir(out_dir) # for (G, replica_no) in zip(replicas, range(len(replicas))): # graphutils.write_graph(G, path="{0}/{1}.gml".format(out_dir, replica_no)) return data
metrics = problem.get('metrics', metrics_default) num_replicas = problem.get('num_replicas', default_num_replicas) if type(graph_data) is str: base_graph = graphutils.load_graph(path=graph_data) base_graph.name = os.path.split(graph_data)[1] else: base_graph = graph_data if not hasattr(base_graph, 'name'): base_graph.name = problem.get('name', str(npr.randint(10000))) gpath = 'output/'+os.path.split(base_graph.name)[1]+'_'+timeNow()+'.dot' gpath_fig = gpath[:-3]+'eps' graphutils.write_graph(G=base_graph, path=gpath) visualizer_cmdl = 'sfdp -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig) print('Writing graph image: %s ..'%gpath_fig) retCode = os.system(visualizer_cmdl) replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer') if __name__ == '__main__': pass #drake_hougardy_test() #coarsening_test() #coarsening_test2(1) #edge_attachment_test(seed=None) #print 'Statistical tests: this would take time ...' #statistical_tests() replica_vs_original(G=graphutils.load_graph('data-samples/mesh33.edges'), params={'edge_edit_rate':[0.01, 0.01]}, num_replicas=2, n_jobs=1)
if __name__ == '__main__': init_options = initialize() input_path = init_options['input_path'] params = init_options['params'] graph_type = init_options['graph_type'] output_path = init_options['output_path'] visualizer = init_options['visualizer'] verbose = init_options['verbose'] write_graph = init_options['write_graph'] planar = init_options['planar'] if verbose: print('Loading: %s' % input_path) G = graphutils.load_graph(path=input_path, params={ 'graph_type': graph_type, 'verbose': verbose }) #G = generatesubgraphs.bfs_tree_custom(G,1000) if verbose: print('Generating ...') new_G = algorithms.generate_graph(G, params=params, planar=planar) #optional #print graphutils.graph_graph_delta(G=G, new_G=new_G) #new_G = nx.convert_node_labels_to_integers(new_G, 1, 'default', True) #TODO: too many reports if params.get('stats_report_on_all_levels', False): model_Gs = [new_G.model_graph]
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas=150, title_infix='', metrics=None, intermediates=False, n_jobs=-1): #generate one or more replicas and compare them to the original graph if seed == None: seed = npr.randint(1E6) print 'rand seed: %d' % seed npr.seed(seed) random.seed(seed) if generator_func == None: generator_func = algorithms.generate_graph if G == None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = filter(lambda m: m['optional'] < 2, metrics) if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = filter(lambda m: m['runningtime'] <= mrtb, metrics) metrics = filter(lambda m: m['name'] not in ['avg flow closeness'], metrics) #broken in NX 1.6 metrics.reverse() if params == None: params = { 'verbose': False, 'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate': [0], 'locality_bias_correction': 0., 'enforce_connected': True, 'accept_chance_edges': 1.0, 'retain_intermediates': intermediates } if intermediates: params['retain_intermediates'] = True print 'Params:' print params print 'Metrics:' print[metric['name'] for metric in metrics] replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) jaccard_edges = evaluate_similarity( base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G] + replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = { 'metrics': [met['name'] for met in metrics], 'name': getattr(G, 'name', ''), 'params': params, 'num_replicas': num_replicas, 'figpath': figpath } data[0] = replica_statistics data[0].update({ 'vals_of_replicas': vals_of_replicas, 'val_of_models': vals_of_graph, 'avg_jaccard_edges': jaccard_edges }) if intermediates: current_replicas = replicas for level in xrange( 1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print 'LEVEL: %d' % level coarse_models = [ r.coarser_graph.model_graph for r in current_replicas ] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({ 'vals_of_replicas': vals_of_replicas, 'vals_of_models': vals_of_models, 'avg_jaccard_edges': jaccard_edges }) graphutils.safe_pickle(path=figpath + '.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath + '.csv', seed=seed, data=data) return data
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas=150, title_infix='', metrics=None, intermediates=False, n_jobs=-1, store_replicas=False): """generate one or more replicas and compare them to the original graph""" if seed == None: seed = npr.randint(1E6) print('rand seed: %d' % seed) npr.seed(seed) random.seed(seed) if generator_func == None: generator_func = algorithms.generate_graph if G == None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = { 'verbose': False, 'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate': [0], 'locality_bias_correction': 0., 'enforce_connected': True, 'accept_chance_edges': 1.0, 'retain_intermediates': intermediates } if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) #if generator_func == algorithms.generate_graph: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) #else: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, # title_infix=title_infix, n_jobs=n_jobs) replicas = read_all_files() jaccard_edges = evaluate_similarity( base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G] + replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = { 'metrics': [met['name'] for met in metrics], 'name': getattr(G, 'name', ''), 'params': params, 'num_replicas': num_replicas, 'figpath': figpath } data[0] = replica_statistics data[0].update({ 'vals_of_replicas': vals_of_replicas, 'val_of_models': vals_of_graph, 'avg_jaccard_edges': jaccard_edges }) out_dir = "/home/varsha/Documents/final_results/Krongen/Boeing_normalized" + timeNow( ) myfile = open(out_dir, 'w') i = 0 for repl in vals_of_replicas: for elem in repl: nor_value = elem if (vals_of_graph[i] != 0): nor_value = float(elem) / vals_of_graph[i] myfile.write(str(nor_value)) myfile.write('\t') i += 1 myfile.write('\n') myfile.close() if intermediates: current_replicas = replicas for level in range( 1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print('LEVEL: %d' % level) coarse_models = [ r.coarser_graph.model_graph for r in current_replicas ] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({ 'vals_of_replicas': vals_of_replicas, 'vals_of_models': vals_of_models, 'avg_jaccard_edges': jaccard_edges }) graphutils.safe_pickle(path=figpath + '.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath + '.csv', seed=seed, data=data) # optionally store replica graphs in files if store_replicas: out_dir = "output/replicas_{0}_{1}".format(getattr( G, "name", ""), timeNow()) # FIXME: add graph name os.mkdir(out_dir) for (G, replica_no) in zip(replicas, range(len(replicas))): graphutils.write_graph(G, path="{0}/{1}.gml".format( out_dir, replica_no)) return data
def drake_hougardy_test(): import new_algs, graphutils matching_weight = lambda G, mat: sum(G.edge[u][mat[u]].get('weight', 1.0) for u in mat) / 2.0 def is_matching(mat): G = nx.Graph() G.add_edges_from(list(mat.items())) for cc in nx.connected_components(G): if len(cc) not in [0, 2]: return False return True def is_maximal(G, mat): for edge in G.edges(): if (edge[0] not in mat) and (edge[1] not in mat): return False return True path = nx.path_graph(11) for u, v, d in path.edges(data=True): d['weight'] = max(u, v)**2 matching = graphutils.drake_hougardy_slow(path) print('Matching slow: ' + str(matching)) print(' wt: ' + str(matching_weight(path, matching))) matching = graphutils.drake_hougardy(path) assert is_matching(matching) assert is_maximal(path, matching) print('Matching: ' + str(matching)) print(' wt: ' + str(matching_weight(path, matching))) path_opt_m = nx.max_weight_matching(path) print(' Opt Mat: ' + str(path_opt_m)) print(' wt: ' + str(matching_weight(path, path_opt_m))) Gr2 = graphutils.load_graph('data-cyber-small/gr2.gml') matching = graphutils.drake_hougardy_slow(Gr2) print('Matching slow: ' + str(matching)) print(' wt: ' + str(matching_weight(Gr2, matching))) matching = graphutils.drake_hougardy(Gr2) assert is_matching(matching) assert is_maximal(Gr2, matching) print('Matching: ' + str(matching)) print(' wt: ' + str(matching_weight(Gr2, matching))) gr2_opt_m = nx.max_weight_matching(Gr2) print(' Opt Mat: ' + str(gr2_opt_m)) print(' wt: ' + str(matching_weight(Gr2, gr2_opt_m))) #matching = graphutils.drake_hougardy(nx.erdos_renyi_graph(1000, 0.02)) num_test_graphs = 100 num_nodes = 400 edge_density = 0.02 seed = 0 for trial in range(num_test_graphs): seed += 1 Gnp = nx.erdos_renyi_graph(num_nodes, edge_density, seed=seed) print('Seed: %d' % seed) matching = graphutils.drake_hougardy(Gnp) assert is_matching(matching) assert is_maximal(Gnp, matching) wtDH = matching_weight(Gnp, matching) print(' wt DH: ' + str(wtDH)) gnp_opt_m = nx.max_weight_matching(Gnp) wtOpt = matching_weight(Gnp, gnp_opt_m) print(' wt Opt: ' + str(wtOpt)) assert wtOpt <= 2 * wtDH
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas = 150, title_infix='', metrics=None, intermediates=False, n_jobs=-1): #generate one or more replicas and compare them to the original graph if seed==None: seed = npr.randint(1E6) print('rand seed: %d'%seed) npr.seed(seed) random.seed(seed) if generator_func==None: generator_func=algorithms.generate_graph if G==None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = {'verbose':False, 'node_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate':[0], 'locality_bias_correction':0., 'enforce_connected':True, 'accept_chance_edges':1.0, 'retain_intermediates':intermediates} if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G]+replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = {'metrics':[met['name'] for met in metrics], 'name':getattr(G, 'name', ''), 'params':params, 'num_replicas':num_replicas, 'figpath':figpath} data[0] = replica_statistics data[0].update({'vals_of_replicas':vals_of_replicas, 'val_of_models':vals_of_graph, 'avg_jaccard_edges':jaccard_edges}) if intermediates: current_replicas = replicas for level in range(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print('LEVEL: %d'%level) coarse_models = [r.coarser_graph.model_graph for r in current_replicas] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges}) graphutils.safe_pickle(path=figpath+'.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath+'.csv', seed=seed, data=data) return data
print graphutils.MUSKETEER_EXAMPLE_CMD if __name__ == "__main__": init_options = initialize() input_path = init_options["input_path"] params = init_options["params"] graph_type = init_options["graph_type"] output_path = init_options["output_path"] visualizer = init_options["visualizer"] verbose = init_options["verbose"] write_graph = init_options["write_graph"] if verbose: print "Loading: %s" % input_path G = graphutils.load_graph(path=input_path, params={"graph_type": graph_type, "verbose": verbose}) if verbose: print "Generating ..." new_G = algorithms.generate_graph(G, params=params) # optional # print graphutils.graph_graph_delta(G=G, new_G=new_G) # new_G = nx.convert_node_labels_to_integers(new_G, 1, 'default', True) # TODO: too many reports if params.get("stats_report_on_all_levels", False): model_Gs = [new_G.model_graph] Gs = [new_G] current_G = new_G.coarser_graph while current_G.coarser_graph != None:
if __name__ == '__main__': init_options = initialize() input_path = init_options['input_path'] params = init_options['params'] graph_type = init_options['graph_type'] output_path = init_options['output_path'] visualizer = init_options['visualizer'] verbose = init_options['verbose'] write_graph = init_options['write_graph'] if verbose: print('Loading: %s'%input_path) G = graphutils.load_graph(path=input_path, params={'graph_type':graph_type, 'verbose':verbose}) if verbose: print('Generating ...') new_G = algorithms.generate_graph(G, params=params) #optional #print graphutils.graph_graph_delta(G=G, new_G=new_G) #new_G = nx.convert_node_labels_to_integers(new_G, 1, 'default', True) #TODO: too many reports if params.get('stats_report_on_all_levels', False): model_Gs = [new_G.model_graph] Gs = [new_G] current_G = new_G.coarser_graph while current_G.coarser_graph != None:
metrics=metrics, title_infix='musketeer') def read_all_files(): import os replicas = [] files = os.listdir( "/home/varsha/Documents/final_results/Krongen/planar/Boeing/") for file in files: tmp = nx.read_gml( "/home/varsha/Documents/final_results/Krongen/planar/Boeing/" + file) replica = tmp.to_undirected() replicas.append(replica) return replicas if __name__ == '__main__': pass #drake_hougardy_test() #coarsening_test() #coarsening_test2(1) #edge_attachment_test(seed=None) #print 'Statistical tests: this would take time ...' #statistical_tests() replica_vs_original(G=graphutils.load_graph('data-samples/mesh33.edges'), params={'edge_edit_rate': [0.01, 0.01]}, num_replicas=2, n_jobs=25)
gpath_fig = gpath[:-3]+'eps' graphutils.write_graph(G=base_graph, path=gpath) visualizer_cmdl = 'sfdp -Nlabel="" -Nwidth=0.03 -Nfixedsize=true -Nheight=0.03 -Teps %s > %s &'%(gpath,gpath_fig) print('Writing graph image: %s ..'%gpath_fig) retCode = os.system(visualizer_cmdl) replica_vs_original(G=base_graph, num_replicas=num_replicas, seed=1, params=params, metrics=metrics, title_infix='musketeer') def read_all_files(dir): print(dir) import os replicas = [] print(dir) files = os.listdir(dir) for file in files: tmp = nx.read_edgelist(dir+file) replica = tmp.to_undirected() replicas.append(replica) return replicas if __name__ == '__main__': pass #drake_hougardy_test() #coarsening_test() #coarsening_test2(1) #edge_attachment_test(seed=None) #print 'Statistical tests: this would take time ...' #statistical_tests() replica_vs_original(G=graphutils.load_graph('data-samples/mesh33.edges'), params={'edge_edit_rate':[0.01, 0.01]}, num_replicas=2, n_jobs=25)