def hrg_clique_tree (G): if G is None: return # ------------------ ## # tree decomposition # ------------------ ## num_nodes = G.number_of_nodes() prod_rules = {} if num_nodes >= 500: for Gprime in gs.rwr_sample(G, 2, 300): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) # pprint.pprint (children) return root, children
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs): if len(df.columns) == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts') else: G = nx.from_pandas_dataframe(df, 'src', 'trg') # force to unrepeated edgesA if 0: print nx.info(G) G = G.to_undirected() if 0: print nx.info(G) exit() # Derive the prod rules in a naive way, where prod_rules = PHRG.probabilistic_hrg_learning(G) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 1: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['eigen'] g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') print ">", graph_name metrics.network_properties([g], metricx, Hstars, name=graph_name, out_tsv=True)
def synth_plots(): num_nodes = 100 samples = 5 chunglu_M = [] kron_M = [] HRG_M = [] pHRG_M = [] G_M = [] for i in range(0, samples): ##BA Graph G = nx.erdos_renyi_graph(num_nodes, .1) G_M.append(G) for i in range(0, samples): chunglu_M.append(nx.expected_degree_graph(G.degree().values())) HRG_M_s, degree = HRG.stochastic_hrg(G, samples) HRG_M = HRG_M + HRG_M_s pHRG_M_s = PHRG.probabilistic_hrg(G, samples) pHRG_M = pHRG_M + pHRG_M_s for i in range(0, samples): P = kronfit(G) k = math.log(num_nodes, 2) kron_M.append( product.kronecker_random_graph(int(math.floor(k)), P, directed=False)) metrics.draw_network_value(G_M, chunglu_M, HRG_M, pHRG_M, kron_M)
def derive_prules_from(list_of_graphs): lst_prod_rules = [] for g in list_of_graphs: if g.number_of_nodes() > 0: pr = PHRG.probabilistic_hrg_deriving_prod_rules(g) lst_prod_rules.append(pr) return lst_prod_rules
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ if n <=0: sys.exit(1) print runs print n print gname for i,x in enumerate(prod_rules): print i,' ', x[:1] g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print '... pcfg.Grammar' g.set_max_size(n) print "Done with max size" if DBG: print '*' * 40 hstars_lst = [] for i in range(0, runs): rule_list = g.sample(n) print 'g.sample' hstar = PHRG.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def Hstar_Graphs_Control(G, graph_name, axs): print '-', Hstar_Graphs_Control, '-' # Derive the prod rules in a naive way, where prod_rules = PHRG.probabilistic_hrg_learning(G) print prod_rules g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 1: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['degree', 'gcd'] # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') if DBG: print ">", graph_name metrics.network_properties([G], metricx, Hstars, name=graph_name, out_tsv=True)
def main(argsD): runs = argsD['runs'] print print 'dataset: {}\nruns: {},'.format(argsD['orig'][0], runs), G = read_load_graph(argsD['orig'][0]) print "(V,E): {},{}".format(G.number_of_nodes(), G.number_of_edges()) ## if metrix if argsD['netstats']: compute_netstats(G, G.name) exit(0) if argsD['peek']: compute_netstats_peek(G, G.name, piikshl=True) exit(0) ofname = "Results/" + G.name + ".shl" # if argsD['rods']: ofname = ofname.split(".")[0] + "_rods.shl" database = shelve.open(ofname) if argsD['rods']: print '% --> Control Rods' start_time = time.time() HRG_M, degree = HRG.stochastic_hrg(G, runs) print(" %d, %s seconds ---" % (G.number_of_nodes(), time.time() - start_time)) database['rods_hstars'] = HRG_M else: print '% --> PHRG' start_time = time.time() A = PHRG.probabilistic_hrg(G, runs) # returns a list of Hstar graphs # print(" --- Total %s seconds ---" % (time.time() - start_time)) print(" %d, %s seconds ---" % (G.number_of_nodes(), time.time() - start_time)) database['prob_hstars'] = A print start_time = time.time() print '% --> CHLU' clgs = [] z = G.degree().values() for i in range(runs): clgs.append(nx.expected_degree_graph(z)) database['clgs'] = clgs print(" %d, %s seconds ---" % (G.number_of_nodes(), time.time() - start_time)) # -- Kron Prod Graphs print '% --> Kron' start_time = time.time() database['kpgs'] = grow_graphs_using_krongen(G, gn=G.name, nbr_runs=runs) print(" %d, %s seconds ---" % (G.number_of_nodes(), time.time() - start_time)) database.close() return
def get_clique_tree(g): g.remove_edges_from(g.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(g), key=len) g = nx.subgraph(g, giant_nodes) prod_rules = {} T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) unfold_2wide_tuple(T) return
def synthetic_graph_generator(ref, graph_model): G = ref synth_graph = None n = ref.number_of_nodes() if 'hrg' in graph_model: prod_rules = PHRG.probabilistic_hrg_deriving_prod_rules( G) # derive rules g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() # print "Starting max size",'n=',num_nodes g.set_max_size(num_nodes) # print "Done with max size" Hstars = [] rule_list = g.sample(num_nodes) synth_graph = PHRG.grow(rule_list, g)[0] return synth_graph
def Growing_Network_Using_Final_State_ProdRules(pddf, prod_rules, nSlices, kSlice, axs): ''' Grow a synthetic graph up to the end of block kSlice using HRG rules from the final (whole) state of the graph. pddf: pandas df prod_rules: production rules learned on the entire graph nSlices: total number of blocks (pseudo-states of the graph) kSlice: the current slice axs: axes to plot to ''' span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1)) mask = (pddf['ts'] >= pddf['ts'].min()) & ( pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1)) ldf = pddf.loc[mask] G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) num_nodes = G.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" num_samples = 20 print '*' * 40 tdf = pd.DataFrame() for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] df = pd.DataFrame.from_dict(hstar.degree().items()) # tdf = pd.concat([df.groupby([1]).count(), df.groupby([1]).count()], axis=1) tdf = tdf[0].mean(axis=1) tdf.plot(ax=axs, color='r', label='Orig') # Orig Graph tdf = pd.DataFrame.from_dict(G.degree().items()) gb = tdf.groupby([1]).count() gb[0].plot(ax=axs, color='b', label='Orig') axs.set_xscale('log') '''
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ if n <= 0: sys.exit(1) # print runs # for i,x in enumerate(prod_rules): # print i,' ', x g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: rhs = [f[1:-1] for f in re.findall("'.+?'", rhs)] prob = float(prob) g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1)) # mask = (pddf['ts'] >= pddf['ts'].min()) & (pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1)) # ldf = pddf.loc[mask] # # G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) # num_nodes = n if DBG: print "Starting max size" g.set_max_size(num_nodes) if DBG: print "Done with max size" # # num_samples = 20 if DBG: print '*' * 40 hstars_lst = [] for i in range(0, runs): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] hstars_lst.append(hstar) # print rule_list return hstars_lst
def hstar_fixed_graph_gen(args): import networkx as nx orig_fname = args['grow'][0] gn = graph_name(orig_fname) if os.path.exists("../datasets/{}.p".format(gn)): origG = nx.read_gpickle("../datasets/{}.p".format(gn)) else: print("we load edgelist into an nx.obj") prs_files = glob("../ProdRules/{}*prs".format(gn)) for f in prs_files: prod_rules = get_prod_rules(f) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: # print (id, lhs, rhs, prob) g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # exit() # Takes this out # ToDo: We nee to get these rules in the right format num_nodes = origG.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) import pickle pickle.dump({ 'origG': origG, 'hstars': Hstars }, open('../Results/{}_hstars.p'.format(gn), "wb")) if os.path.exists('../Results/{}_hstars.p'.format(gn)): print("Pickle written")
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ DBG = True if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")" num_nodes = n if DBG: print "Starting max size ..." t_start = time.time() g.set_max_size(num_nodes) print "Done with max size, took %s seconds" % (time.time() - t_start) hstars_lst = [] print " ", for i in range(0, runs): print '>', rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def get_hrg_production_rules(edgelist_data_frame, graph_name, tw=False, n_subg=2, n_nodes=300, nstats=False): from growing import derive_prules_from t_start = time.time() df = edgelist_data_frame if df.shape[1] == 4: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True) # whole graph elif df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph else: G = nx.from_pandas_dataframe(df, 'src', 'trg') G.name = graph_name print "==> read in graph took: {} seconds".format(time.time() - t_start) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = n_subg n = n_nodes if num_nodes >= 500: print 'Grande' t_start = time.time() for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) Process(target=td.new_visit, args=( T, G, prod_rules, )).start() else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) print_treewidth(T) exit() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) '''print "++++++++++" df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t") if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name) else: print "Trouble saving" print "-----------" print [type(x) for x in rules[0]] ''' ''' Graph Generation of Synthetic Graphs Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the original graph ''' hStars = grow_exact_size_hrg_graphs_from_prod_rules( rules, graph_name, G.number_of_nodes(), 10) print '... hStart graphs:', len(hStars) d = {graph_name + "_hstars": hStars} with open(r"Results/{}_hstars.pickle".format(graph_name), "wb") as output_file: cPickle.dump(d, output_file) if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)): print "File saved" '''if nstats:
G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() graph_checks(G) print print "--------------------" print "-Tree Decomposition-" print "--------------------" if num_nodes >= 500: for Gprime in gs.rwr_sample(G, 2, 100): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = hrg.binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = hrg.binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) def flatten(tup): if type(tup) == frozenset:
def sampled_subgraphs_cliquetree(orig, tree_path): files = glob(tree_path + "*.dimacs.tree") prod_rules = {} graph_name = orig for fname in files: print '... input file:', fname df = Pandas_DataFrame_From_Edgelist([orig])[0] if df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) else: G = nx.from_pandas_dataframe(df, 'src', 'trg') print nx.info(G) with open(fname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) if DEBUG: print tree.keys() # root = list(tree)[0] root = frozenset(cbags[1]) if DEBUG: print '.. Root:', root T = td.make_rooted(tree, root) if DEBUG: print '.. T rooted:', len(T) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) td.new_visit( T, G, prod_rules) # ToDo: here is where something funny is goin on. if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] print '... prod_rules size', len(prod_rules.keys()) # - production rules number - rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) print graph_name graph_name = os.path.basename(graph_name) print graph_name outdf_fname = "./ProdRules/" + graph_name + ".prules" if not os.path.isfile(outdf_fname + ".bz2"): print '...', outdf_fname, "written" df.to_csv(outdf_fname + ".bz2", compression="bz2") else: print '...', outdf_fname, "file exists" return
def dimacs_td_ct(tdfname): """ tree decomp to clique-tree """ print '... input file:', tdfname fname = tdfname graph_name = os.path.basename(fname) gname = graph_name.split('.')[0] gfname = "datasets/out." + gname tdh = os.path.basename(fname).split('.')[1] # tree decomp heuristic tfname = gname + "." + tdh G = load_edgelist(gfname) if DEBUG: print nx.info(G) print with open(fname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) if DEBUG: print tree.keys() root = list(tree)[0] if DEBUG: print '.. Root:', root root = frozenset(cbags[1]) if DEBUG: print '.. Root:', root T = td.make_rooted(tree, root) if DEBUG: print '.. T rooted:', len(T) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) prod_rules = {} td.new_visit(T, G, prod_rules) if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) outdf_fname = "./ProdRules/" + tfname + ".prules" if not os.path.isfile(outdf_fname + ".bz2"): print '...', outdf_fname, "written" df.to_csv(outdf_fname + ".bz2", compression="bz2") else: print '...', outdf_fname, "file exists" return
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname=""): """" orig: path to original/refernce input graph tdfname: path fragment for a set of td pro rules gname: graph name (str) returns: """ # if whole tree path # else, assume a path fragment print '... input graph :', os.path.basename(orig) print '... td path frag :', tdfname G = load_edgelist(orig) # load edgelist into a graph obj N = G.number_of_nodes() M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname files = glob(tdfname + "*.dimacs.tree") prod_rules = {} stacked_df = pd.DataFrame() mat_dict = {} for i, x in enumerate(sorted(files)): mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i if DBG: print os.path.basename(x).split(".")[0].split("_")[-1] for tfname in sorted(files): tname = os.path.basename(tfname).split(".") tname = "_".join(tname[:2]) with open(tfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DBG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) # root = list(T)[0] # root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) td.new_visit(T, G, prod_rules) from json import dumps # print dumps(prod_rules, indent=4, sort_keys=True) for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] if DBG: print "--------------------" if DBG: print '- Prod. Rules' if DBG: print "--------------------" rules = [] # print dumps(prod_rules, indent=4, sort_keys=True) id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x] sid += 1 id += 1 df = pd.DataFrame(rules) df['cate'] = tname stacked_df = pd.concat([df, stacked_df]) # print df.shape print "\nStacked prod rules\n", "~" * 20 print " ", stacked_df.shape if args['verb']: print stacked_df.to_string() stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t") if os.path.exists( "../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname) print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20 stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name] iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df) print " ", iso_union.shape if args['verb']: print iso_union.to_string() print "\nIsomorphic intersection of the prod rules\n", "~" * 20 print " ", iso_interx.shape iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname)) if os.path.exists( '../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
def dimacs_td_ct_fast(oriG, tdfname): """ tree decomp to clique-tree parameters: orig: filepath to orig (input) graph in edgelist tdfname: filepath to tree decomposition from INDDGO synthg: when the input graph is a syth (orig) graph Todo: currently not handling sythg in this version of dimacs_td_ct """ G = oriG if G is None: return (1) prod_rules = {} t_basename = os.path.basename(tdfname) out_tdfname = os.path.basename(t_basename) + ".prs" if os.path.exists("../ProdRules/" + out_tdfname): # print "==> exists:", out_tdfname return out_tdfname # else: # print ("create folder ../ProdRules") print "../ProdRules/" + out_tdfname, tdfname with open(tdfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) print type(G) exit() td.new_visit(T, G, prod_rules) if 0: print "--------------------" if 0: print "- Production Rules -" if 0: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if 0: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 # print rules if 0: print "--------------------" if 0: print '- P. Rules', len(rules) if 0: print "--------------------" # ToDo. # Let's save these rules to file or print proper # write_prod_rules_to_tsv(rules, out_tdfname) # g = pcfg.Grammar('S') # for (id, lhs, rhs, prob) in rules: # g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # Synthetic Graphs # hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20) # # metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen' # metricx = ['gcd','avgdeg'] # metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True) return out_tdfname
def gcd(): num_nodes = 1000 ba_G = nx.barabasi_albert_graph(num_nodes, 3) er_G = nx.erdos_renyi_graph(num_nodes, .1) ws_G = nx.watts_strogatz_graph(num_nodes, 8, .1) nws_G = nx.newman_watts_strogatz_graph(num_nodes, 8, .1) graphs = [ba_G, er_G, ws_G, nws_G] samples = 50 for G in graphs: chunglu_M = [] for i in range(0, samples): chunglu_M.append(nx.expected_degree_graph(G.degree())) HRG_M, degree = HRG.stochastic_hrg(G, samples) pHRG_M = PHRG.probabilistic_hrg(G, samples) kron_M = [] rmat_M = [] for i in range(0, samples): P = kronfit(G) k = math.log(num_nodes, 2) kron_M.append( product.kronecker_random_graph(int(math.floor(k)), P, directed=False)) df_g = metrics.external_rage(G) gcd_chunglu = [] gcd_phrg = [] gcd_hrg = [] gcd_kron = [] for chunglu_M_s in chunglu_M: df_chunglu = metrics.external_rage(chunglu_M_s) rgfd = metrics.tijana_eval_rgfd(df_g, df_chunglu) gcm_g = metrics.tijana_eval_compute_gcm(df_g) gcm_h = metrics.tijana_eval_compute_gcm(df_chunglu) gcd_chunglu.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h)) for HRG_M_s in HRG_M: df_hrg = metrics.external_rage(HRG_M_s) rgfd = metrics.tijana_eval_rgfd(df_g, df_hrg) gcm_g = metrics.tijana_eval_compute_gcm(df_g) gcm_h = metrics.tijana_eval_compute_gcm(df_hrg) gcd_hrg.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h)) for pHRG_M_s in pHRG_M: df_phrg = metrics.external_rage(pHRG_M_s) rgfd = metrics.tijana_eval_rgfd(df_g, df_phrg) gcm_g = metrics.tijana_eval_compute_gcm(df_g) gcm_h = metrics.tijana_eval_compute_gcm(df_phrg) gcd_phrg.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h)) for kron_M_s in kron_M: df_kron = metrics.external_rage(kron_M_s) rgfd = metrics.tijana_eval_rgfd(df_g, df_kron) gcm_g = metrics.tijana_eval_compute_gcm(df_g) gcm_h = metrics.tijana_eval_compute_gcm(df_kron) gcd_kron.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h)) print gcd_chunglu print gcd_hrg print gcd_phrg print gcd_kron print print