def ba_control_hrg(v_lst): grow_graphs = False v_lst = [int(n) for n in v_lst] # set of nodes to generate BA graphs data = [] prules_lst = [] for n_v in v_lst: # nxgobj = nx.barabasi_albert_graph(n_v, np.random.choice(range(1,n_v))) nxgobj = nx.barabasi_albert_graph(n_v,3) nxgobj.name = "ba_%d_%d" %(nxgobj.number_of_nodes(), nxgobj.number_of_edges()) print "ba", nxgobj.number_of_nodes(), nxgobj.number_of_edges() data.append(nxgobj) prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(nxgobj) df = pd.DataFrame(list(prod_rules)) out_base_fname = "ba_cntrl_%d"%(n_v) ofname = "Results/" + out_base_fname + ".tsv" #_________________ df.to_csv(ofname, sep="\t", header=False, index=False) prules_lst.append(prod_rules) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in df.values: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = nxgobj.number_of_nodes() print " ","Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print " ","Done with max size" Hstars = [] num_samples = 10 for i in range(0, num_samples): try: rule_list = g.sample(num_nodes) except Exception, e: print str(e) traceback.print_exc() continue #sys.exit(1) hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) print " ", 'Save BA production rules' if os.path.exists(ofname): print '\tSaved to disk:',ofname if 0: metricx = ['degree','clust', 'hop', 'gcd'] metrics.network_properties([nxgobj], metricx, Hstars, name=nxgobj.name, out_tsv=False)
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) #print "n", n num_nodes = n if DEBUG: print "Starting max size" g.set_max_size(num_nodes) if DEBUG: print "Done with max size" hstars_lst = [] for i in range(0, runs): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def main(): gname = graph_name(sys.argv[1]) print gname concat_prs = "ProdRules/{}_concat.prs".format(gname) if not os.path.exists(concat_prs): G = load_edgelist(sys.argv[1]) print "[<>]", "red the graph" lcc = max(nx.connected_component_subgraphs(G), key=len) # find largest conn component Glst = sample_rand_subgraphs_in(lcc) # print "[<>]", "got the Glst LCCs" concat_phrg_prod_rules([x for x in Glst], G.name) # subgraphs base prod rules dimacs_files = glob("datasets/{}*.dimacs".format(gname)) var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for gfname in dimacs_files: for ve in var_el_lst: multiprocessing.Process(target=dimacs_inddgo_tree_decomps, args=( ve, gfname, )).start() print "[<>]", "checks on the edgelist vs the orig graph" ## -- convert_dimacs_trees_to_cliquetrees(gname) print "[<>]", "convert_dimacs_trees_to_cliquetrees" ## -- elfiles = glob(".tmp_edgelists/{}*tsv".format(gname)) subgraphs = [load_edgelist(f) for f in elfiles] prod_rules = [] prod_rules = [ phrg.probabilistic_hrg_deriving_prod_rules(G) for G in subgraphs ] import itertools prod_rules = list(itertools.chain.from_iterable(prod_rules)) pd.DataFrame(prod_rules).to_csv(concat_prs, sep="\t", header=False, index=False) ## -- dimacs_files = glob("datasets/{}*.dimacs".format(gname)) var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for gfname in dimacs_files: for ve in var_el_lst: multiprocessing.Process(target=dimacs_inddgo_tree_decomps, args=( ve, gfname, )).start() print "[<>]", "checks on the edgelist vs the orig graph" print "[<>]", "concat hrg prod_rules:", concat_prs
def Hstar_Graphs_Control(G, graph_name, axs=None): # Derive the prod rules in a naive way, where prod_rules = phrg.probabilistic_hrg_learning(G) pp.pprint(prod_rules) exit() g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 0: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['gcd'] # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') if DBG: print ">", graph_name metrics.network_properties([G], metricx, Hstars, name=graph_name, out_tsv=True)
def exec_call(arg_a): """ call_out = "" args = ["python exact_phrg.py --orig {} --prs".format(arg_a)] print args while not call_out: popen = subprocess.Popen(args, stdout=subprocess.PIPE, shell=True) popen.wait() out, err = popen.communicate() call_out = out.split('\n') print call_out, out, err """ G = load_edgelist(arg_a) prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(G) pp.pprint(prod_rules)
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ nslog("grow_exact_size_hrg_graphs_from_prod_rules") DBG = True if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")" exit() # temp pls remove me num_nodes = n if DBG: print "Starting max size" g.set_max_size(num_nodes) if DBG: print "Done with max size" hstars_lst = [] print " ", for i in range(0, runs): print '>', rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
# ['r8.1', 'A,B,C,D,E,F', ['0,B:T', '0,C:T', '0,D:T', '0,E:T', '0,F:T', '0,A:T', 'A,0,D,E,F:N'] ,0.5] # ] g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print 'Grammar g loaded.' # Synthetic Graphs #num_nodes = int(sys.argv[-1]) g.set_max_size(num_nodes) hStars = [] for i in range(20): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hStars.append(hstar) print i, hstar.number_of_nodes(), hstar.number_of_edges() metricx = ['degree', 'hops', 'clust', 'gcd'] metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True) # parser = get_parser() # args = vars(parser.parse_args()) # try: # main(args) # except Exception, e: # print str(e) # traceback.print_exc() # sys.exit(1) # sys.exit(0)
def dimacs_td_ct(tdfname, synthg=False): """ tree decomp to clique-tree """ if isinstance(tdfname, list): [dimacs_td_ct(f) for f in tdfname] # print '... input file:', tdfname fname = tdfname graph_name = os.path.basename(fname) gname = graph_name.split('.')[0] if synthg: gfname = 'datasets/' + gname + ".dimacs" else: gfname = "datasets/out." + gname print os.path.basename(fname).split('.')[-2] tdh = os.path.basename(fname).split('.')[-2] # tree decomp heuristic tfname = gname + "." + tdh if synthg: G = load_edgelist(tdfname.split('.')[0] + ".dimacs") else: G = load_edgelist(gfname) if DEBUG: print nx.info(G) if not os.path.exists(fname): print fname, 'this file does not exist (possible failure in the TD step)' return '' with open(fname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) if DEBUG: print tree.keys() root = list(tree)[0] if DEBUG: print '.. Root:', root root = frozenset(cbags[1]) if DEBUG: print '.. Root:', root T = td.make_rooted(tree, root) if DEBUG: print '.. T rooted:', len(T) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) prod_rules = {} td.new_visit(T, G, prod_rules) if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) outdf_fname = "ProdRules/" + tfname + "_iprules.tsv" if not os.path.isfile(outdf_fname): # print '...',outdf_fname, "written" df.to_csv(outdf_fname, header=False, index=False, sep="\t") else: print '\t', outdf_fname, "file exists" return outdf_fname
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname="", iargs=""): # if whole tree path # else, assume a path fragment print '... path fragment:', tdfname print '... input graph :', orig G = load_edgelist(orig) # load edgelist into a graph obj N = G.number_of_nodes() M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname files = glob(tdfname+"*.dimacs.tree") prod_rules = {} stacked_df = pd.DataFrame() mat_dict = {} for i,x in enumerate(sorted(files)): mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]]=i if DBG: print os.path.basename(x).split(".")[0].split("_")[-1] for tfname in files: tname = os.path.basename(tfname).split(".") tname = "_".join(tname[:2]) with open(tfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DBG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) # root = list(T)[0] # root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) td.new_visit(T, G, prod_rules) for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] if DBG: print "--------------------" if DBG: print '- Prod. Rules' if DBG: print "--------------------" rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x] sid += 1 id += 1 df = pd.DataFrame(rules) print df.shape df['cate'] = tname stacked_df = pd.concat([df, stacked_df]) if iargs['cnts']: return stacked_df,mat_dict else: np_sqr_mtrx = jaccard_coeff_isomorphic_rules_check(stacked_df, mat_dict) print gname df = pd.DataFrame(np_sqr_mtrx, columns=[x for x in sorted(mat_dict.keys())]) df.index = sorted(mat_dict.keys()) df.to_csv("Results/{}_isom_jaccardsim.tsv".format(gname), sep=",") return stacked_df,mat_dict #ToDo: not sure if I want to return this
def get_hrg_production_rules(edgelist_data_frame, graph_name, tw=False, n_subg=2, n_nodes=300): from tdec.growing import derive_prules_from nslog("get_hrg_production_rules") df = edgelist_data_frame if df.shape[1] == 4: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True) # whole graph elif df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph else: G = nx.from_pandas_dataframe(df, 'src', 'trg') G.name = graph_name G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = n_subg n = n_nodes if num_nodes >= 500: print 'Grande' for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T #td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) if tw: print_treewidth(T) exit() ## -- print("prod_rules:", len(prod_rules), type(prod_rules)) if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) print "++++++++++" df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t") if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name) else: print "Trouble saving" print "-----------" print[type(x) for x in rules[0]] ''' Graph Generation of Synthetic Graphs Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the original graph ''' hStars = grow_exact_size_hrg_graphs_from_prod_rules( rules, graph_name, G.number_of_nodes(), 10) print '... hStart graphs:', len(hStars) if 0: metricx = [ 'degree', 'hops', 'clust', 'assort', 'kcore', 'eigen', 'gcd' ] metricx = ['gcd'] metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=False)
def dimacs_td_ct_fast(oriG, tdfname): """ tree decomp to clique-tree parameters: orig: filepath to orig (input) graph in edgelist tdfname: filepath to tree decomposition from INDDGO synthg: when the input graph is a syth (orig) graph Todo: currently not handling sythg in this version of dimacs_td_ct """ G = oriG if G is None: return (1) graph_checks(G) # --- graph checks prod_rules = {} t_basename = os.path.basename(tdfname) out_tdfname = os.path.basename(t_basename) + ".prs" if os.path.exists("ProdRules/" + out_tdfname): print "==> exists:", out_tdfname return out_tdfname if 0: print "ProdRules/" + out_tdfname, tdfname with open(tdfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) td.new_visit(T, G, prod_rules) if 0: print "--------------------" if 0: print "- Production Rules -" if 0: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if 0: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 # print rules if 0: print "--------------------" if 0: print '- P. Rules', len(rules) if 0: print "--------------------" ''' # ToDo. # Let's save these rules to file or print proper df = DataFrame(rules) print "out_tdfname:", out_tdfname df.to_csv("ProdRules/" + out_tdfname, sep="\t", header=False, index=False) ''' # g = pcfg.Grammar('S') # for (id, lhs, rhs, prob) in rules: # g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # Synthetic Graphs # hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20) # # metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen' # metricx = ['gcd','avgdeg'] # metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True) return ""