def subgraphs_exploding_trees(orig, sub_graph_lst): # edl -> dimacs -> treeX -> CliqueTreeX # union the prod rules? prs_paths_lst = [] for sbg_edl_fname in sub_graph_lst: dimacsFname = edgelist_to_dimacs(sbg_edl_fname) # argsd['orig'][0]) varElimLst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] ## # dict where values are the file path of the written trees dimacsTrees_d = tree_decomposition_with_varelims( dimacsFname, varElimLst) trees_lst = [] for x in dimacsTrees_d.itervalues(): [trees_lst.append(f[0]) for f in x] ## # to HRG Clique Tree, in stacked pd df form / returns indiv. filenames prs_paths_lst.append( convert_dimacs_tree_objs_to_hrg_clique_trees(orig, trees_lst)) ## # stack production rules // returns an array of k (2) // prs_stacked_dfs = [ get_hrg_prod_rules(multi_paths_lst) for multi_paths_lst in prs_paths_lst ] if len(prs_stacked_dfs) == 2: prs_stacked_df = pd.concat([prs_stacked_dfs[0], prs_stacked_dfs[1]]) gb = prs_stacked_df.groupby(['cate']).groups.keys() ## # Jaccard Similarity get_isom_overlap_in_stacked_prod_rules(gb, prs_stacked_df) ## # isomorph_intersection_2dfstacked iso_union, iso_interx = isoint.isomorph_intersection_2dfstacked( prs_stacked_df) gname = graph_name(orig) iso_interx[[1, 2, 3, 4]].to_csv('Results/{}_isom_interxn.tsv'.format(gname), sep="\t", header=False, index=False) if os.path.exists('Results/{}_isom_interxn.tsv'.format(gname)): print("\t", 'Written:', 'Results/{}_isom_interxn.tsv'.format(gname)) print( "\t", 'Next step is to generate graphs using this subsect of production rules.' ) else: print("!!Unable to savefile") print("Done") print(gb, "\n---------------<>---------------<>---------------") exit() '''
def transform_edgelist_to_dimacs(files, f_str=None): """ edgelist to dimacs graph format :param files: list of file path(s) :return: """ Info("edgelist to dimacs graph format\n" + "-" * 40) rslt = [] for f in files: g = nx.read_gpickle(f) g.name = graph_name(f) rslt.append(convert_nx_gObjs_to_dimacs_gObjs([g])) return rslt
def synth_checks_network_metrics(orig_graph): gname = graph_name(orig_graph) files = glob("./FakeGraphs/" + gname + "*") shl_db = shelve.open(files[0]) # open for read origG = load_edgelist(orig_graph) print("%%") print("%%", gname) print("%%") for k in shl_db.keys(): synthGs = shl_db[k] # print synthGs[0].number_of_edges(), synthGs[0].number_of_nodes() metricx = ['degree'] metrics.network_properties([origG], metricx, synthGs, name="hstars_" + origG.name, out_tsv=False) shl_db.close()
def ref_graph_largest_conn_componet(fname): df = Pandas_DataFrame_From_Edgelist([fname])[0] G = nx.from_pandas_dataframe(df, source='src', target='trg') Gc = max(nx.connected_component_subgraphs(G), key=len) gname = graph_name(fname) num_nodes = Gc.number_of_nodes() subg_fnm_lst = [] ## sample of the graph larger than 500 nodes if num_nodes >= 500: cnt = 0 for Gprime in gs.rwr_sample(G, 2, 300): subg_fnm_lst.append('.{}_lcc_{}.edl'.format(gname, cnt)) try: nx.write_edgelist(Gprime, '.{}_lcc_{}.edl'.format(gname, cnt), data=False) cnt += 1 except Exception, e: print(str(e), '\n!!Error writing to disk') return ""
def new_main(args): if not (args['base'] is None): Info("<- converts to dimacs") gn = graph_name(args['base'][0]) f = "../datasets/" + gn + "*.p" files = glob(f) dimacs_lst = transform_edgelist_to_dimacs(files) results = [] trees = explode_to_trees(dimacs_lst, results) pp.pprint(files) pp.pprint(dimacs_lst) pp.pprint(trees) print pp.pprint(results) exit(0) elif not (args['orig'] is None): Info("<- converts edgelist gpickle") f = args['orig'][0] g = load_edgelist(f) # full graph Info("# of conn comp: %d" % len(list(nx.connected_component_subgraphs(g)))) g = largest_conn_comp(f) # largerst conn comp if isinstance(g, list): for k, Gprime in enumerate(g): subg_out_fname = max(graph_name(f).split("."), key=len) subg_out_fname = "../datasets/" + subg_out_fname subg_out_fname += "_{}.p".format(k) cc_fname = nx.write_gpickle( Gprime, subg_out_fname) # subgraph to temp edgelist if os.path.exists(subg_out_fname): Info("Wrote %s" % subg_out_fname) else: subg_out_fname = max(graph_name(f).split("."), key=len) subg_out_fname = "../datasets/" + subg_out_fname subg_out_fname += ".p" cc_fname = nx.write_gpickle(g, subg_out_fname) if os.path.exists(subg_out_fname): Info("Wrote %s" % subg_out_fname) print("done") exit() elif not (args['edgelist2dimacs'] is None): f = args['edgelist2dimacs'][0] pfname = graph_name(f) pfname = "../datasets/{}.p".format(pfname) if not os.path.exists(pfname): Info("File not found, please run:") Info(" python explodingTree.py --orig path/to/edgelist") G = load_edgelist(f) subgraph = max(nx.connected_component_subgraphs(G), key=len) gprime_lst = [] if subgraph.number_of_nodes() > 500: for j, Gprime in enumerate(gs.rwr_sample(subgraph, 2, 300)): Gprime.name = G.name + "_%d" % j gprime_lst.append(convert_graph_obj_2dimacs([Gprime])) print[x for x in gprime_lst] elif not (args['prules'] is None): gn = graph_name(args['prules'][0]) print gn f = "../datasets/" + gn + "*.tree" files = glob(f) f = "../datasets/" + gn + "*.p" graphs = glob(f) for g in graphs: for f in files: dimacs_td_ct_fast(g, f) # dimacs to tree (decomposition) exit(0) elif not (args['td'] is None): origG = args['td'][0] dimacs_f = glob("../datasets/" + graph_name(args['td'][0]) + "*.dimacs") ''' "Explode to trees" ''' # ToDo var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for j, f in enumerate(dimacs_f): print f gn = graph_name(f) dimacs_file = "../datasets/{}.dimacs".format(gn) p = mp.Pool(processes=2) for vael in var_els: p.apply_async(dimacs_nddgo_tree_simple, args=( dimacs_file, vael, ), callback=collect_results_trees) # xt.dimacs_nddgo_tree_simple(f, vael) p.close() p.join() # dimacs_td_ct_fast(oriG, tdfname) # dimacs to tree (decomposition) else: sys.exit(0) # dimacs_convert_orig_graph(args['orig']) pickle_fname = "../datasets/" + f + ".p" g = nx.read_gpickle(pickle_fname) subgraph = max(nx.connected_component_subgraphs(g), key=len) if subgraph.number_of_nodes() > 500: for Gprime in gs.rwr_sample(subgraph, 2, 300): edgelist_in_dimacs_out(Gprime)
def xplodingTree(argsd): """ Run a full set of tests. explodingTree.py Flaship function to run functions for a complete test Parameters ---------- arg1 : dict Passing the whole se of args needed Returns ------- None """ sub_graphs_fnames_lst = ref_graph_largest_conn_componet( argsd['orig'][0]) # max largest conn componet if len(sub_graphs_fnames_lst) > 1: print('process subgraphs from sampling') print(sub_graphs_fnames_lst) subgraphs_exploding_trees(argsd['orig'][0], sub_graphs_fnames_lst) exit() dimacsFname = edgelist_to_dimacs( sub_graphs_fnames_lst[0]) # argsd['orig'][0]) varElimLst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] ## # dict where values are the file path of the written trees dimacsTrees_d = tree_decomposition_with_varelims(dimacsFname, varElimLst) trees_lst = [] for x in dimacsTrees_d.itervalues(): [trees_lst.append(f[0]) for f in x] ## # to HRG Clique Tree, in stacked pd df form / returns indiv. filenames prs_paths_lst = convert_dimacs_tree_objs_to_hrg_clique_trees( argsd['orig'][0], trees_lst) ## # stack production rules prs_stacked_df = get_hrg_prod_rules(prs_paths_lst) gb = prs_stacked_df.groupby(['cate']).groups.keys() ## # Jaccard Similarity get_isom_overlap_in_stacked_prod_rules(gb, prs_stacked_df) ## # isomorph_intersection_2dfstacked iso_union, iso_interx = isoint.isomorph_intersection_2dfstacked( prs_stacked_df) gname = graph_name(argsd['orig'][0]) iso_interx[[1, 2, 3, 4]].to_csv('Results/{}_isom_interxn.tsv'.format(gname), sep="\t", header=False, index=False) if os.path.exists('Results/{}_isom_interxn.tsv'.format(gname)): print("\t", 'Written:', 'Results/{}_isom_interxn.tsv'.format(gname)) print( "\t", 'Next step is to generate graphs using this subsect of production rules.' ) else: print("!!Unable to savefile") print("Done") exit()
if __name__ == '__main__': if len (sys.argv) < 2: Info ("Usage:") Info ("python xplotree_subgraphs_prs.py path/to/orig_net_edgelist") sys.exit (1) elif sys.argv[1] == "-ut": fname = "/Users/sal.aguinaga/KynKon/datasets/out.as20000102" else: fname = sys.argv[1] if not os.path.exists (fname): Info ("Path to edgeslits does not exists.") sys.exit (1) gn = graph_name (fname) prsfname = '../ProdRules/{}.tsv.phrg.prs'.format(gn) if os.path.exists(prsfname): Info('{} already exists'.format(prsfname)) sys.exit(0) og = load_edgelist (fname) og.name = gn # sgp = glob("../datasets/"+ gn + "*.p" ) print ("--") print ("-- derive subgraphs") print ("--") Info ("sample 2 subg of 300 nodes and derive the set of production rules") prod_rules = get_sampled_gpickled_graphs(og)
# df = recompute_probabilities(staked_prs_df) # from core.baseball # # test if stacked prs can fire # stck_fired = probe_stacked_prs_likelihood_tofire(df, graph_name(f), el_base_info_d[graph_name(f)]) # print (stck_fired) # # # break if __name__ == '__main__': import sys from core.utils import graph_name if len(sys.argv) < 2: Info("add an out.* dataset with its full path") exit() f = sys.argv[1] f = "../datasets/" + graph_name(f) + "*.tree" ftrees = glob(f) orig = sys.argv[ 1] #"/Users/sal.aguinaga/KynKon/datasets/out.karate_club_graph" from core.utils import graph_name import networkx as nx gn = graph_name(orig) f = "../datasets/" + gn + "*.p" results = [] for p in glob(f): pp.pprint(p) g = nx.read_gpickle(p) for tf in ftrees: print("\t"), tf