Ejemplo n.º 1
0
def will_prod_rules_fire(prs_files_lst, nbr_nodes):
    if not len(prs_files_lst): return
    ret_val = []

    for fname in prs_files_lst:
        # Read the subset of prod rules
        df = pd.read_csv(fname,
                         header=None,
                         sep="\t",
                         dtype={
                             0: str,
                             1: list,
                             2: list,
                             3: float
                         })
        g = pcfg.Grammar('S')
        from td_isom_jaccard_sim import listify_rhs
        for (id, lhs, rhs, prob) in df.values:
            rhs = listify_rhs(rhs)
            g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob)))

        num_nodes = nbr_nodes[0]
        # 		print "Starting max size", 'n=', num_nodes[0], type(num_nodes)

    return ret_val
Ejemplo n.º 2
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
  """
  Args:
    rules: production rules (model)
    gname: graph name
    n:     target graph order (number of nodes)
    runs:  how many graphs to generate
  Returns: list of synthetic graphs

  """
  if n <=0: sys.exit(1)

  print runs
  print n
  print gname
  for i,x in enumerate(prod_rules):
    print i,'  ', x[:1]
  
  g = pcfg.Grammar('S')
  for (id, lhs, rhs, prob) in prod_rules:
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))
  print '... pcfg.Grammar'

  g.set_max_size(n)
  print "Done with max size"

  if DBG: print '*' * 40
  hstars_lst = []
  for i in range(0, runs):
    rule_list = g.sample(n)
    print 'g.sample'
    hstar = PHRG.grow(rule_list, g)[0]
    hstars_lst.append(hstar)

  return hstars_lst
Ejemplo n.º 3
0
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs):
    if len(df.columns) == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts')
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    # force to unrepeated edgesA
    if 0: print nx.info(G)
    G = G.to_undirected()
    if 0: print nx.info(G)
    exit()
    # Derive the prod rules in a naive way, where
    prod_rules = PHRG.probabilistic_hrg_learning(G)
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size"
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 1:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['eigen']
        g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        print ">", graph_name
        metrics.network_properties([g],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
Ejemplo n.º 4
0
def Growing_Network_Using_Final_State_ProdRules(pddf, prod_rules, nSlices,
                                                kSlice, axs):
    '''
  Grow a synthetic graph up to the end of block kSlice using HRG rules
  from the final (whole) state of the graph.
        pddf: pandas df
  prod_rules: production rules learned on the entire graph
     nSlices: total number of blocks (pseudo-states of the graph)
      kSlice: the current slice
         axs: axes to plot to
  '''

    span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1))
    mask = (pddf['ts'] >= pddf['ts'].min()) & (
        pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1))
    ldf = pddf.loc[mask]

    G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])

    num_nodes = G.number_of_nodes()
    print "Starting max size"
    g.set_max_size(num_nodes)
    print "Done with max size"

    num_samples = 20
    print '*' * 40
    tdf = pd.DataFrame()
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        df = pd.DataFrame.from_dict(hstar.degree().items())
        #
        tdf = pd.concat([df.groupby([1]).count(),
                         df.groupby([1]).count()],
                        axis=1)

    tdf = tdf[0].mean(axis=1)
    tdf.plot(ax=axs, color='r', label='Orig')
    # Orig Graph
    tdf = pd.DataFrame.from_dict(G.degree().items())
    gb = tdf.groupby([1]).count()
    gb[0].plot(ax=axs, color='b', label='Orig')
    axs.set_xscale('log')
    '''
Ejemplo n.º 5
0
def probe_stacked_prs_likelihood_tofire(df, fname="", nbr_nodes=0):
    Info("probe stacked prs likelihood tofire")
    g = pcfg.Grammar('S')
    df = df[['rnbr', 'lhs', 'rhs',
             'prob']]  # ToDo: need to drop the gname column
    for (id, lhs, rhs, prob) in df.values.tolist():  # 21Nov17
        g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob)))
    num_nodes = int(nbr_nodes)
    g.set_max_size(num_nodes)
    try:
        g.set_max_size(num_nodes)
    except Exception, e:  # print "Done with max size"
        print "\t:", e
        # return False
        os._exit(1)
Ejemplo n.º 6
0
def Hstar_Graphs_Control(G, graph_name, axs):
    print '-', Hstar_Graphs_Control, '-'
    # Derive the prod rules in a naive way, where
    prod_rules = PHRG.probabilistic_hrg_learning(G)
    print prod_rules
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size", 'n=', num_nodes
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 1:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['degree', 'gcd']
        # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        if DBG: print ">", graph_name
        metrics.network_properties([G],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
Ejemplo n.º 7
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
  Args:
    rules: production rules (model)
    gname: graph name
    n:     target graph order (number of nodes)
    runs:  how many graphs to generate
  Returns: list of synthetic graphs

  """
    if n <= 0: sys.exit(1)

    # print runs
    # for i,x in enumerate(prod_rules):
    #   print i,'  ', x

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        rhs = [f[1:-1] for f in re.findall("'.+?'", rhs)]
        prob = float(prob)
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1))
    # mask = (pddf['ts'] >= pddf['ts'].min()) & (pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1))
    # ldf = pddf.loc[mask]
    #
    # G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])
    #
    num_nodes = n
    if DBG: print "Starting max size"
    g.set_max_size(num_nodes)
    if DBG: print "Done with max size"
    #
    # num_samples = 20
    if DBG: print '*' * 40
    hstars_lst = []
    for i in range(0, runs):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    # print rule_list

    return hstars_lst
Ejemplo n.º 8
0
def hstar_fixed_graph_gen(args):
    import networkx as nx

    orig_fname = args['grow'][0]
    gn = graph_name(orig_fname)
    if os.path.exists("../datasets/{}.p".format(gn)):
        origG = nx.read_gpickle("../datasets/{}.p".format(gn))
    else:
        print("we load edgelist into an nx.obj")

    prs_files = glob("../ProdRules/{}*prs".format(gn))
    for f in prs_files:
        prod_rules = get_prod_rules(f)
        g = pcfg.Grammar('S')
        for (id, lhs, rhs, prob) in prod_rules:
            # print (id, lhs, rhs, prob)
            g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

        # exit() # Takes this out
        # ToDo: We nee to get these rules in the right format

        num_nodes = origG.number_of_nodes()

        print "Starting max size"
        g.set_max_size(num_nodes)

        print "Done with max size"

        Hstars = []

        num_samples = 20
        print '*' * 40
        for i in range(0, num_samples):
            rule_list = g.sample(num_nodes)
            hstar = PHRG.grow(rule_list, g)[0]
            Hstars.append(hstar)
    import pickle
    pickle.dump({
        'origG': origG,
        'hstars': Hstars
    }, open('../Results/{}_hstars.p'.format(gn), "wb"))
    if os.path.exists('../Results/{}_hstars.p'.format(gn)):
        print("Pickle written")
Ejemplo n.º 9
0
def grow_graphs_using_rules(production_rules, n=0, recrncs=1):
    from PHRG import grow

    if n == 0:
        return
    prod_rules = production_rules
    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            #print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
            sid += 1
        id += 1

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    print "Starting max size"
    num_nodes = n
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    for i in range(0, recrncs):
        rule_list = g.sample(num_nodes)
        hstar = grow(rule_list, g)[0]
        print '\tPHRG -> run:', i, str(hstar.number_of_nodes()), str(
            hstar.number_of_edges())
        g = hstar
    Hstars.append(hstar)

    return Hstars
Ejemplo n.º 10
0
def synthetic_graph_generator(ref, graph_model):
    G = ref
    synth_graph = None
    n = ref.number_of_nodes()

    if 'hrg' in graph_model:
        prod_rules = PHRG.probabilistic_hrg_deriving_prod_rules(
            G)  # derive rules

        g = pcfg.Grammar('S')
        for (id, lhs, rhs, prob) in prod_rules:
            g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

        num_nodes = G.number_of_nodes()
        # print "Starting max size",'n=',num_nodes
        g.set_max_size(num_nodes)
        # print "Done with max size"
        Hstars = []
        rule_list = g.sample(num_nodes)
        synth_graph = PHRG.grow(rule_list, g)[0]

    return synth_graph
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
    Args:
        rules: production rules (model)
        gname: graph name
        n:		 target graph order (number of nodes)
        runs:	how many graphs to generate

    Returns: list of synthetic graphs

    """
    DBG = True
    if n <= 0: sys.exit(1)

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    print
    print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")"

    num_nodes = n
    if DBG: print "Starting max size ..."
    t_start = time.time()
    g.set_max_size(num_nodes)
    print "Done with max size, took %s seconds" % (time.time() - t_start)

    hstars_lst = []
    print "	",
    for i in range(0, runs):
        print '>',
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    return hstars_lst
Ejemplo n.º 12
0
def probabilistic_hrg(G, num_samples=1):

		graphletG = []

		#print G.number_of_nodes()
		#print G.number_of_edges()

		G.remove_edges_from(G.selfloop_edges())
		giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
		G = nx.subgraph(G, giant_nodes)

		num_nodes = G.number_of_nodes()

		# print G.number_of_nodes()
		# print G.number_of_edges()

		graph_checks(G)

		print
		print "--------------------"
		print "-Tree Decomposition-"
		print "--------------------"

		if num_nodes >= 500:
				for Gprime in gs.rwr_sample(G, 2, 100):
						T = td.quickbb(Gprime)
						root = list(T)[0]
						T = td.make_rooted(T, root)
						T = binarize(T)
						root = list(T)[0]
						root, children = T
						td.new_visit(T, G, prod_rules)
		else:
				T = td.quickbb(G)
				root = list(T)[0]
				T = td.make_rooted(T, root)
				T = binarize(T)
				root = list(T)[0]
				root, children = T
				td.new_visit(T, G, prod_rules)

		print
		print "--------------------"
		print "- Production Rules -"
		print "--------------------"

		for k in prod_rules.iterkeys():
				#print k
				s = 0
				for d in prod_rules[k]:
						s += prod_rules[k][d]
				for d in prod_rules[k]:
						prod_rules[k][d] = float(prod_rules[k][d]) / float(s)	# normailization step to create probs not counts.
						#print '\t -> ', d, prod_rules[k][d]

		rules = []
		id = 0
		for k, v in prod_rules.iteritems():
				sid = 0
				for x in prod_rules[k]:
						rhs = re.findall("[^()]+", x)
						rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
						#print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
						sid += 1
				id += 1

		g = pcfg.Grammar('S')
		for (id, lhs, rhs, prob) in rules:
				g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

		print "Starting max size"

		g.set_max_size(num_nodes)

		print "Done with max size"

		Hstars = []

		for i in range(0, num_samples):
				rule_list = g.sample(num_nodes)
				# print rule_list
				hstar = grow(rule_list, g)[0]
				# print "H* nodes: " + str(hstar.number_of_nodes())
				# print "H* edges: " + str(hstar.number_of_edges())
				Hstars.append(hstar)

		return (Hstars)
Ejemplo n.º 13
0
Archivo: PHRG.py Proyecto: nddsg/PHRG
    G.add_edge(1, 2)
    G.add_edge(2, 3)
    G.add_edge(2, 4)
    G.add_edge(3, 4)
    G.add_edge(3, 5)
    G.add_edge(4, 6)
    G.add_edge(5, 6)
    G.add_edge(1, 5)

    num_nodes = G.number_of_nodes()

    prod_rules = {}
    p_rules = probabilistic_hrg(G)

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in p_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob, True))
    print '> prod rules added to Grammar g'  #
    g.set_max_size(num_nodes * 4)
    print '> max-size set.'

    rids = g.sample(num_nodes * 4)
    print rids

    new_graph = pg.gen(rids, g)

    print "nodes: ", new_graph.number_of_nodes()
    print "edges: ", new_graph.number_of_edges()
    print
    print