def will_prod_rules_fire(prs_files_lst, nbr_nodes): if not len(prs_files_lst): return ret_val = [] for fname in prs_files_lst: # Read the subset of prod rules df = pd.read_csv(fname, header=None, sep="\t", dtype={ 0: str, 1: list, 2: list, 3: float }) g = pcfg.Grammar('S') from td_isom_jaccard_sim import listify_rhs for (id, lhs, rhs, prob) in df.values: rhs = listify_rhs(rhs) g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob))) num_nodes = nbr_nodes[0] # print "Starting max size", 'n=', num_nodes[0], type(num_nodes) return ret_val
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ if n <=0: sys.exit(1) print runs print n print gname for i,x in enumerate(prod_rules): print i,' ', x[:1] g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print '... pcfg.Grammar' g.set_max_size(n) print "Done with max size" if DBG: print '*' * 40 hstars_lst = [] for i in range(0, runs): rule_list = g.sample(n) print 'g.sample' hstar = PHRG.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs): if len(df.columns) == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts') else: G = nx.from_pandas_dataframe(df, 'src', 'trg') # force to unrepeated edgesA if 0: print nx.info(G) G = G.to_undirected() if 0: print nx.info(G) exit() # Derive the prod rules in a naive way, where prod_rules = PHRG.probabilistic_hrg_learning(G) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 1: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['eigen'] g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') print ">", graph_name metrics.network_properties([g], metricx, Hstars, name=graph_name, out_tsv=True)
def Growing_Network_Using_Final_State_ProdRules(pddf, prod_rules, nSlices, kSlice, axs): ''' Grow a synthetic graph up to the end of block kSlice using HRG rules from the final (whole) state of the graph. pddf: pandas df prod_rules: production rules learned on the entire graph nSlices: total number of blocks (pseudo-states of the graph) kSlice: the current slice axs: axes to plot to ''' span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1)) mask = (pddf['ts'] >= pddf['ts'].min()) & ( pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1)) ldf = pddf.loc[mask] G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) num_nodes = G.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" num_samples = 20 print '*' * 40 tdf = pd.DataFrame() for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] df = pd.DataFrame.from_dict(hstar.degree().items()) # tdf = pd.concat([df.groupby([1]).count(), df.groupby([1]).count()], axis=1) tdf = tdf[0].mean(axis=1) tdf.plot(ax=axs, color='r', label='Orig') # Orig Graph tdf = pd.DataFrame.from_dict(G.degree().items()) gb = tdf.groupby([1]).count() gb[0].plot(ax=axs, color='b', label='Orig') axs.set_xscale('log') '''
def probe_stacked_prs_likelihood_tofire(df, fname="", nbr_nodes=0): Info("probe stacked prs likelihood tofire") g = pcfg.Grammar('S') df = df[['rnbr', 'lhs', 'rhs', 'prob']] # ToDo: need to drop the gname column for (id, lhs, rhs, prob) in df.values.tolist(): # 21Nov17 g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob))) num_nodes = int(nbr_nodes) g.set_max_size(num_nodes) try: g.set_max_size(num_nodes) except Exception, e: # print "Done with max size" print "\t:", e # return False os._exit(1)
def Hstar_Graphs_Control(G, graph_name, axs): print '-', Hstar_Graphs_Control, '-' # Derive the prod rules in a naive way, where prod_rules = PHRG.probabilistic_hrg_learning(G) print prod_rules g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 1: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['degree', 'gcd'] # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') if DBG: print ">", graph_name metrics.network_properties([G], metricx, Hstars, name=graph_name, out_tsv=True)
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ if n <= 0: sys.exit(1) # print runs # for i,x in enumerate(prod_rules): # print i,' ', x g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: rhs = [f[1:-1] for f in re.findall("'.+?'", rhs)] prob = float(prob) g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1)) # mask = (pddf['ts'] >= pddf['ts'].min()) & (pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1)) # ldf = pddf.loc[mask] # # G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) # num_nodes = n if DBG: print "Starting max size" g.set_max_size(num_nodes) if DBG: print "Done with max size" # # num_samples = 20 if DBG: print '*' * 40 hstars_lst = [] for i in range(0, runs): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] hstars_lst.append(hstar) # print rule_list return hstars_lst
def hstar_fixed_graph_gen(args): import networkx as nx orig_fname = args['grow'][0] gn = graph_name(orig_fname) if os.path.exists("../datasets/{}.p".format(gn)): origG = nx.read_gpickle("../datasets/{}.p".format(gn)) else: print("we load edgelist into an nx.obj") prs_files = glob("../ProdRules/{}*prs".format(gn)) for f in prs_files: prod_rules = get_prod_rules(f) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: # print (id, lhs, rhs, prob) g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # exit() # Takes this out # ToDo: We nee to get these rules in the right format num_nodes = origG.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) import pickle pickle.dump({ 'origG': origG, 'hstars': Hstars }, open('../Results/{}_hstars.p'.format(gn), "wb")) if os.path.exists('../Results/{}_hstars.p'.format(gn)): print("Pickle written")
def grow_graphs_using_rules(production_rules, n=0, recrncs=1): from PHRG import grow if n == 0: return prod_rules = production_rules rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) #print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print "Starting max size" num_nodes = n g.set_max_size(num_nodes) print "Done with max size" Hstars = [] for i in range(0, recrncs): rule_list = g.sample(num_nodes) hstar = grow(rule_list, g)[0] print '\tPHRG -> run:', i, str(hstar.number_of_nodes()), str( hstar.number_of_edges()) g = hstar Hstars.append(hstar) return Hstars
def synthetic_graph_generator(ref, graph_model): G = ref synth_graph = None n = ref.number_of_nodes() if 'hrg' in graph_model: prod_rules = PHRG.probabilistic_hrg_deriving_prod_rules( G) # derive rules g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() # print "Starting max size",'n=',num_nodes g.set_max_size(num_nodes) # print "Done with max size" Hstars = [] rule_list = g.sample(num_nodes) synth_graph = PHRG.grow(rule_list, g)[0] return synth_graph
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ DBG = True if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")" num_nodes = n if DBG: print "Starting max size ..." t_start = time.time() g.set_max_size(num_nodes) print "Done with max size, took %s seconds" % (time.time() - t_start) hstars_lst = [] print " ", for i in range(0, runs): print '>', rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def probabilistic_hrg(G, num_samples=1): graphletG = [] #print G.number_of_nodes() #print G.number_of_edges() G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() # print G.number_of_nodes() # print G.number_of_edges() graph_checks(G) print print "--------------------" print "-Tree Decomposition-" print "--------------------" if num_nodes >= 500: for Gprime in gs.rwr_sample(G, 2, 100): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) print print "--------------------" print "- Production Rules -" print "--------------------" for k in prod_rules.iterkeys(): #print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. #print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) #print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] for i in range(0, num_samples): rule_list = g.sample(num_nodes) # print rule_list hstar = grow(rule_list, g)[0] # print "H* nodes: " + str(hstar.number_of_nodes()) # print "H* edges: " + str(hstar.number_of_edges()) Hstars.append(hstar) return (Hstars)
G = nx.Graph() G.add_edge(1, 2) G.add_edge(2, 3) G.add_edge(2, 4) G.add_edge(3, 4) G.add_edge(3, 5) G.add_edge(4, 6) G.add_edge(5, 6) G.add_edge(1, 5) num_nodes = G.number_of_nodes() prod_rules = {} p_rules = probabilistic_hrg(G) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in p_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob, True)) print '> prod rules added to Grammar g' # g.set_max_size(num_nodes * 4) print '> max-size set.' rids = g.sample(num_nodes * 4) print rids new_graph = pg.gen(rids, g) print "nodes: ", new_graph.number_of_nodes() print "edges: ", new_graph.number_of_edges() print print