コード例 #1
0
def hrg_clique_tree (G):
  if G is None: return

  #  ------------------ ##
  #  tree decomposition
  #  ------------------ ##
  num_nodes = G.number_of_nodes()

  prod_rules = {}
  if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 300):
      T = td.quickbb(Gprime)
      root = list(T)[0]
      T = td.make_rooted(T, root)
      T = phrg.binarize(T)
      root = list(T)[0]
      root, children = T
      td.new_visit(T, G, prod_rules)
  else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    td.new_visit(T, G, prod_rules)

  # pprint.pprint (children)
  return root, children
コード例 #2
0
def tree_decomposition(g):
    """
      Rule extraction procedure
    """
    if g is None: return []

    g.remove_edges_from(g.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(g), key=len)
    g = nx.subgraph(g, giant_nodes)

    graph_checks(g)

    tree_decomp_l = []
    if g.number_of_nodes() >= 500:
        for g_prime in gs.rwr_sample(g, 2, 300):
            _t = td.quickbb(g_prime)
            root = list(_t)[0]
            _t = td.make_rooted(_t, root)
            _t = binarize(_t)
            tree_decomp_l.append(_t)
    else:
        _t = td.quickbb(g)
        root = list(_t)[0]
        _t = td.make_rooted(_t, root)
        # _t = binarize(_t)
        tree_decomp_l.append(_t)

    return tree_decomp_l
コード例 #3
0
def probabilistic_hrg_deriving_prod_rules (G, K=1, n=None, gname=""):
  '''
	Rule extraction procedure

		'''
  if G is None: return

  G.remove_edges_from(G.selfloop_edges())
  giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
  G = nx.subgraph(G, giant_nodes)

  if n is None:
    num_nodes = G.number_of_nodes()
  else:
    num_nodes = n

  graph_checks(G)

  if DEBUG: print
  if DEBUG: print "--------------------"
  if DEBUG: print "-Tree Decomposition-"
  if DEBUG: print "--------------------"

  if num_nodes >= 500:
    for j,Gprime in enumerate(gs.rwr_sample(G, K, num_nodes)):
      if gname is "":
        nx.write_edgelist(Gprime, '/tmp/sampled_subgraph_200_{}.tsv'.format(j), delimiter="\t", data=False)
      else:
        nx.write_edgelist(Gprime, '/tmp/{}{}.tsv'.format(gname, j), delimiter="\t", data=False)
        print "...  files written: /tmp/{}{}.tsv".format(gname, j)

  return
コード例 #4
0
def derive_production_rules(G):
    """

  Parameters
  ----------
  G : input graph
  """
    from PHRG import graph_checks, binarize
    prod_rules = {}

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    graph_checks(G)

    print
    print "--------------------"
    print "-Tree Decomposition-"
    print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 100):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)

    print
    print "--------------------"
    print "- Production Rules -"
    print "--------------------"

    for k in prod_rules.iterkeys():
        print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            #print '\t -> ', d, prod_rules[k][d]

    return prod_rules
コード例 #5
0
def get_hrg_production_rules(fname):
    import graph_sampler as gs
    G = load_edgelist(fname)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    Info(str(G.number_of_nodes()))
    if G.number_of_nodes() >= 500:
        Info('Grande')
        for Gprime in gs.rwr_sample(G, 2, 300):
            td([Gprime])
コード例 #6
0
def nx_edges_to_nddgo_graph_sampling(graph):
    G = graph
    nbr_nodes = 200
    K = int(math.ceil(.25*G.number_of_nodes()/nbr_nodes))
    for j,Gprime in enumerate(gs.rwr_sample(G, K, nbr_nodes)):
        if gname is "":
            # nx.write_edgelist(Gprime, '/tmp/sampled_subgraph_200_{}.tsv'.format(j), delimiter="\t", data=False)
            gprime_lst.append(Gprime)
        else:
            # nx.write_edgelist(Gprime, '/tmp/{}{}.tsv'.format(gname, j), delimiter="\t", data=False)
            gprime_lst.append(Gprime)
        print "...  files written: /tmp/{}{}.tsv".format(gname, j)

    return gprime_lst
コード例 #7
0
def stochastic_hrg(G, num_graphs=1):
    print "++" * 4, num_graphs
    # Graph much be connected
    if not nx.is_connected(G):
        print "Graph must be connected"
        G = list(nx.connected_component_subgraphs(G))[0]

    # Graph must be simple
    G.remove_edges_from(G.selfloop_edges())
    if G.number_of_selfloops() > 0:
        print "Graph must be not contain self-loops"
        exit()

    num_nodes = G.number_of_nodes()
    #print "Number of Nodes:\t" + str(num_nodes)

    num_edges = G.number_of_edges()
    #print "Number of Edges:\t" + str(num_edges)

    # To parse a large graph we use 10 samples of size 500 each. It is
    # possible to parse the whole graph, but the approximate
    # decomposition method we use is still quite slow.
    if num_nodes > 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            pr.prod_rules = {}
            T = td.quickbb(Gprime)
            prod_rules = pr.learn_production_rules(Gprime, T)
    else:
        T = td.quickbb(G)
        prod_rules = pr.learn_production_rules(G, T)

    print "  -- stochastic hrg -> Rule Induction Complete"

    Gstar = []
    Dstar = []

    for run in range(0, num_graphs):
        nG, nD = sg.grow(prod_rules, num_nodes, 1)
        Gstar.append(nG)
        Dstar.append(nD)

    return Gstar, Dstar
コード例 #8
0
def learn_grammars_production_rules(input_graph):
    G = input_graph
    # print G.number_of_nodes()
    # print G.number_of_edges()
    num_nodes = G.number_of_nodes()

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    graph_checks(G)

    if dbg:
        print
        print "--------------------"
        print "-Tree Decomposition-"
        print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 100):
            T = tw.quickbb(Gprime)
            root = list(T)[0]
            T = tw.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            tw.new_visit(T, G, prod_rules)
    else:
        T = tw.quickbb(G)
        root = list(T)[0]
        T = tw.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        tw.new_visit(T, G, prod_rules)

    # return
    return prod_rules
コード例 #9
0
def nx_edges_to_nddgo_graph_sampling(graph, n, m, peo_h):
    G = graph
    if n is None and m is None: return
    # n = G.number_of_nodes()
    # m = G.number_of_edges()
    nbr_nodes = 256
    basefname = 'datasets/{}_{}'.format(G.name, peo_h)

    K = int(math.ceil(.25 * G.number_of_nodes() / nbr_nodes))
    print "--", nbr_nodes, K, '--'

    for j, Gprime in enumerate(gs.rwr_sample(G, K, nbr_nodes)):
        # if gname is "":
        #	 # nx.write_edgelist(Gprime, '/tmp/sampled_subgraph_200_{}.tsv'.format(j), delimiter="\t", data=False)
        #	 gprime_lst.append(Gprime)
        # else:
        #	 # nx.write_edgelist(Gprime, '/tmp/{}{}.tsv'.format(gname, j), delimiter="\t", data=False)
        #	 gprime_lst.append(Gprime)
        # # print "...	files written: /tmp/{}{}.tsv".format(gname, j)

        edges = Gprime.edges()
        edges = [(int(e[0]), int(e[1])) for e in edges]
        df = pd.DataFrame(edges)
        df.sort_values(by=[0], inplace=True)

        ofname = basefname + "_{}.dimacs".format(j)

        with open(ofname, 'w') as f:
            f.write('c {}\n'.format(G.name))
            f.write('p edge\t{}\t{}\n'.format(n, m))
            # for e in df.iterrows():
            output_edges = lambda x: f.write("e\t{}\t{}\n".format(x[0], x[1]))
            df.apply(output_edges, axis=1)
        # f.write("e\t{}\t{}\n".format(e[0]+1,e[1]+1))
        if os.path.exists(ofname): print 'Wrote: {}'.format(ofname)

    return basefname
コード例 #10
0
ファイル: PHRG.py プロジェクト: nddsg/PHRG
def probabilistic_hrg_learning(G, num_samples=1, n=None, prod_rules=None):

    graphletG = []

    # print G.number_of_nodes()
    # print G.number_of_edges()

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes
    else:
        num_nodes = n

    # print G.number_of_nodes()
    # print G.number_of_edges()

    graph_checks(G)

    # print
    # print "--------------------"
    # print "-Tree Decomposition-"
    # print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)
    # print 'root', [x for x in T[0]]#, type(root)
    # import pprint as pp
    # pp.pprint([x for x in T])
    '''
		for x in T:
			if isinstance(x,(frozenset)):
				print '\t',x
			else:
				print [type(s) for s in x if isinstance(x,(list))]
		'''
    ##while isinstance(T,(tuple,list,)) and len(T):
    ##	for x in T:
    ##		if isinstance(x,(frozenset)):
    ##			print'\t',	x
    ##		else:
    ##			T = x

    # print
    # print "--------------------"
    # print "- Production Rules -"
    # print "--------------------"

    for k in prod_rules.iterkeys():
        # print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            # print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            # print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
            sid += 1
        id += 1

    return rules
コード例 #11
0
ファイル: PHRG.py プロジェクト: nddsg/PHRG
def probabilistic_hrg(G, n=None):
    '''
			Rule extraction procedure

					'''
    if G is None: return

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes()
    else:
        num_nodes = n

    graph_checks(G)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "-Tree Decomposition-"
    if DEBUG: print "--------------------"
    prod_rules = {}
    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    # pp.pprint(prod_rules)

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    return rules
コード例 #12
0
print G.number_of_nodes()
print G.number_of_edges()

G.remove_edges_from(G.selfloop_edges())
giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
G = nx.subgraph(G, giant_nodes)

graph_checks(G)

print
print "--------------------"
print "-Tree Decomposition-"
print "--------------------"

if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 100):
        T = tw.quickbb(Gprime)
        root = list(T)[0]
        T = tw.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        tw.new_visit(T, G, prod_rules)
else:
    T = tw.quickbb(G)
    root = list(T)[0]
    T = tw.make_rooted(T, root)
    T = binarize(T)
    root = list(T)[0]
    root, children = T
    tw.new_visit(T, G, prod_rules)
コード例 #13
0
def probabilistic_hrg (G, num_samples=1, n=None):
  '''
  Args:
  ------------
	G: input graph (nx obj)
	num_samples:   (int) in the 'grow' process, this is number of
	               synthetic graphs to generate
	n: (int) num_nodes; number of nodes in the resulting graphs
	Returns: List of synthetic graphs (H^stars)
  '''
  graphletG = []

  if DEBUG: print G.number_of_nodes()
  if DEBUG: print G.number_of_edges()

  G.remove_edges_from(G.selfloop_edges())
  giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
  G = nx.subgraph(G, giant_nodes)

  if n is None:
    num_nodes = G.number_of_nodes()
  else:
    num_nodes = n

  if DEBUG: print G.number_of_nodes()
  if DEBUG: print G.number_of_edges()

  graph_checks(G)

  if DEBUG: print
  if DEBUG: print "--------------------"
  if DEBUG: print "-Tree Decomposition-"
  if DEBUG: print "--------------------"

  prod_rules = {}
  if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 300):
      T = td.quickbb(Gprime)
      root = list(T)[0]
      T = td.make_rooted(T, root)
      T = binarize(T)
      root = list(T)[0]
      root, children = T
      td.new_visit(T, G, prod_rules, TD)
  else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = binarize(T)
    root = list(T)[0]
    root, children = T

    td.new_visit(T, G, prod_rules, TD)

  if DEBUG: print
  if DEBUG: print "--------------------"
  if DEBUG: print "- Production Rules -"
  if DEBUG: print "--------------------"

  for k in prod_rules.iterkeys():
    if DEBUG: print k
    s = 0
    for d in prod_rules[k]:
      s += prod_rules[k][d]
    for d in prod_rules[k]:
      prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
      if DEBUG: print '\t -> ', d, prod_rules[k][d]

  rules = []
  id = 0
  for k, v in prod_rules.iteritems():
    sid = 0
    for x in prod_rules[k]:
      rhs = re.findall("[^()]+", x)
      rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
      if DEBUG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
      sid += 1
    id += 1
  # print rules
  exit()

  g = pcfg.Grammar('S')
  for (id, lhs, rhs, prob) in rules:
    # print type(id), type(lhs), type(rhs), type(prob)
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

  if DEBUG: print "Starting max size"
  num_nodes = num_nodes
  num_samples = num_samples

  g.set_max_size(num_nodes)

  if DEBUG: print "Done with max size"

  Hstars = []

  for i in range(0, num_samples):
    rule_list = g.sample(num_nodes)
    # print rule_list
    hstar = grow(rule_list, g)[0]
    # print "H* nodes: " + str(hstar.number_of_nodes())
    # print "H* edges: " + str(hstar.number_of_edges())
    Hstars.append(hstar)

  return Hstars
コード例 #14
0
G.remove_edges_from(G.selfloop_edges())
if G.number_of_selfloops() > 0:
    print "Graph must be not contain self-loops"
    exit()

num_nodes = G.number_of_nodes()
print "Number of Nodes:\t" + str(num_nodes)

num_edges = G.number_of_edges()
print "Number of Edges:\t" + str(num_edges)

# To parse a large graph we use 10 samples of size 500 each. It is
# possible to parse the whole graph, but the approximate
# decomposition method we use is still quite slow.
if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 10, 500):
        pr.prod_rules = {}
        T = td.quickbb(Gprime)
        prod_rules = pr.learn_production_rules(Gprime, T)
else:
    T = td.quickbb(G)
    prod_rules = pr.learn_production_rules(G, T)

print "Rule Induction Complete"

Gstar = []
Dstar = []
Gstargl = []
Ggl = []
for run in range(0, 20):
    if num_nodes < 100:
コード例 #15
0
def get_hrg_production_rules(edgelist_data_frame,
                             graph_name,
                             tw=False,
                             n_subg=2,
                             n_nodes=300,
                             nstats=False):
    from growing import derive_prules_from

    t_start = time.time()
    df = edgelist_data_frame
    if df.shape[1] == 4:
        G = nx.from_pandas_dataframe(df, 'src', 'trg',
                                     edge_attr=True)  # whole graph
    elif df.shape[1] == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    G.name = graph_name
    print "==> read in graph took: {} seconds".format(time.time() - t_start)

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        t_start = time.time()
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            # td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
            Process(target=td.new_visit, args=(
                T,
                G,
                prod_rules,
            )).start()
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

        print_treewidth(T)
        exit()

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    '''print "++++++++++"
    df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t")
    if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): 
        print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name)
    else:
        print "Trouble saving"
    print "-----------"
    print [type(x) for x in rules[0]] '''
    '''
    Graph Generation of Synthetic Graphs
    Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the 
    original graph
    '''
    hStars = grow_exact_size_hrg_graphs_from_prod_rules(
        rules, graph_name, G.number_of_nodes(), 10)
    print '... hStart graphs:', len(hStars)
    d = {graph_name + "_hstars": hStars}
    with open(r"Results/{}_hstars.pickle".format(graph_name),
              "wb") as output_file:
        cPickle.dump(d, output_file)
    if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)):
        print "File saved"
    '''if nstats:
コード例 #16
0
ファイル: salPHRG.py プロジェクト: abitofalchemy/hrg_nets
def probabilistic_hrg(G, num_samples=1):

		graphletG = []

		#print G.number_of_nodes()
		#print G.number_of_edges()

		G.remove_edges_from(G.selfloop_edges())
		giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
		G = nx.subgraph(G, giant_nodes)

		num_nodes = G.number_of_nodes()

		# print G.number_of_nodes()
		# print G.number_of_edges()

		graph_checks(G)

		print
		print "--------------------"
		print "-Tree Decomposition-"
		print "--------------------"

		if num_nodes >= 500:
				for Gprime in gs.rwr_sample(G, 2, 100):
						T = td.quickbb(Gprime)
						root = list(T)[0]
						T = td.make_rooted(T, root)
						T = binarize(T)
						root = list(T)[0]
						root, children = T
						td.new_visit(T, G, prod_rules)
		else:
				T = td.quickbb(G)
				root = list(T)[0]
				T = td.make_rooted(T, root)
				T = binarize(T)
				root = list(T)[0]
				root, children = T
				td.new_visit(T, G, prod_rules)

		print
		print "--------------------"
		print "- Production Rules -"
		print "--------------------"

		for k in prod_rules.iterkeys():
				#print k
				s = 0
				for d in prod_rules[k]:
						s += prod_rules[k][d]
				for d in prod_rules[k]:
						prod_rules[k][d] = float(prod_rules[k][d]) / float(s)	# normailization step to create probs not counts.
						#print '\t -> ', d, prod_rules[k][d]

		rules = []
		id = 0
		for k, v in prod_rules.iteritems():
				sid = 0
				for x in prod_rules[k]:
						rhs = re.findall("[^()]+", x)
						rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
						#print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
						sid += 1
				id += 1

		g = pcfg.Grammar('S')
		for (id, lhs, rhs, prob) in rules:
				g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

		print "Starting max size"

		g.set_max_size(num_nodes)

		print "Done with max size"

		Hstars = []

		for i in range(0, num_samples):
				rule_list = g.sample(num_nodes)
				# print rule_list
				hstar = grow(rule_list, g)[0]
				# print "H* nodes: " + str(hstar.number_of_nodes())
				# print "H* edges: " + str(hstar.number_of_edges())
				Hstars.append(hstar)

		return (Hstars)
コード例 #17
0
ファイル: HRG.py プロジェクト: nddsg/HRG
    print "Graph must be not contain self-loops";
    exit()

num_nodes = G.number_of_nodes()
print "Number of Nodes:\t" + str(num_nodes)

num_edges = G.number_of_edges()
print "Number of Edges:\t" + str(num_edges)



# To parse a large graph we use 10 samples of size 500 each. It is
# possible to parse the whole graph, but the approximate
# decomposition method we use is still quite slow.
if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 10, 500):
        pr.prod_rules = {}
        T = td.quickbb(Gprime)
        prod_rules = pr.learn_production_rules(Gprime, T)
else:
    T = td.quickbb(G)
    prod_rules = pr.learn_production_rules(G, T)

print "Rule Induction Complete"

Gstar = []
Dstar = []
Gstargl = []
Ggl = []
for run in range(0, 20):
    if num_nodes < 100: