예제 #1
0
def ba_control_hrg(v_lst):
	grow_graphs = False
	v_lst = [int(n) for n in v_lst] # set of nodes to generate BA graphs
	data = []
	prules_lst = []
	for n_v in v_lst:
		# nxgobj = nx.barabasi_albert_graph(n_v, np.random.choice(range(1,n_v)))
		nxgobj = nx.barabasi_albert_graph(n_v,3)
		nxgobj.name = "ba_%d_%d" %(nxgobj.number_of_nodes(), nxgobj.number_of_edges())

		print "ba", nxgobj.number_of_nodes(), nxgobj.number_of_edges()
		data.append(nxgobj)
		prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(nxgobj)
		df = pd.DataFrame(list(prod_rules))
		out_base_fname = "ba_cntrl_%d"%(n_v)
		ofname = "Results/" + out_base_fname + ".tsv" #_________________
		df.to_csv(ofname, sep="\t", header=False, index=False)


		prules_lst.append(prod_rules)
		g = pcfg.Grammar('S')
		for (id, lhs, rhs, prob) in df.values:
			g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

		num_nodes = nxgobj.number_of_nodes()

		print "	","Starting max size", 'n=', num_nodes
		g.set_max_size(num_nodes)
		print "	","Done with max size"

		Hstars = []
		num_samples = 10
		for i in range(0, num_samples):
			try:
				rule_list = g.sample(num_nodes)
			except Exception, e:
				print str(e)
				traceback.print_exc()
				continue #sys.exit(1)

			hstar = phrg.grow(rule_list, g)[0]
			Hstars.append(hstar)
		print "	", 'Save BA production rules'



		if os.path.exists(ofname):
				print '\tSaved to disk:',ofname
		if 0:
			metricx = ['degree','clust', 'hop', 'gcd']
			metrics.network_properties([nxgobj], metricx, Hstars, name=nxgobj.name, out_tsv=False)
예제 #2
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
  Args:
    rules: production rules (model)
    gname: graph name
    n:     target graph order (number of nodes)
    runs:  how many graphs to generate

  Returns: list of synthetic graphs

  """
    if n <= 0: sys.exit(1)

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    #print "n", n
    num_nodes = n
    if DEBUG: print "Starting max size"
    g.set_max_size(num_nodes)
    if DEBUG: print "Done with max size"

    hstars_lst = []
    for i in range(0, runs):
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    return hstars_lst
예제 #3
0
def main():
    gname = graph_name(sys.argv[1])
    print gname
    concat_prs = "ProdRules/{}_concat.prs".format(gname)

    if not os.path.exists(concat_prs):
        G = load_edgelist(sys.argv[1])
        print "[<>]", "red the graph"
        lcc = max(nx.connected_component_subgraphs(G),
                  key=len)  # find largest conn component
        Glst = sample_rand_subgraphs_in(lcc)  #
        print "[<>]", "got the Glst LCCs"

        concat_phrg_prod_rules([x for x in Glst],
                               G.name)  # subgraphs base prod rules

        dimacs_files = glob("datasets/{}*.dimacs".format(gname))
        var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for gfname in dimacs_files:
            for ve in var_el_lst:
                multiprocessing.Process(target=dimacs_inddgo_tree_decomps,
                                        args=(
                                            ve,
                                            gfname,
                                        )).start()
        print "[<>]", "checks on the edgelist vs the orig graph"

        ## --
        convert_dimacs_trees_to_cliquetrees(gname)
        print "[<>]", "convert_dimacs_trees_to_cliquetrees"

        ## --
        elfiles = glob(".tmp_edgelists/{}*tsv".format(gname))
        subgraphs = [load_edgelist(f) for f in elfiles]
        prod_rules = []
        prod_rules = [
            phrg.probabilistic_hrg_deriving_prod_rules(G) for G in subgraphs
        ]
        import itertools
        prod_rules = list(itertools.chain.from_iterable(prod_rules))
        pd.DataFrame(prod_rules).to_csv(concat_prs,
                                        sep="\t",
                                        header=False,
                                        index=False)

        ## --
        dimacs_files = glob("datasets/{}*.dimacs".format(gname))
        var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for gfname in dimacs_files:
            for ve in var_el_lst:
                multiprocessing.Process(target=dimacs_inddgo_tree_decomps,
                                        args=(
                                            ve,
                                            gfname,
                                        )).start()

        print "[<>]", "checks on the edgelist vs the orig graph"

    print "[<>]", "concat hrg prod_rules:", concat_prs
예제 #4
0
def Hstar_Graphs_Control(G, graph_name, axs=None):

    # Derive the prod rules in a naive way, where
    prod_rules = phrg.probabilistic_hrg_learning(G)
    pp.pprint(prod_rules)
    exit()
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size", 'n=', num_nodes
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 0:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['gcd']
        # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        if DBG: print ">", graph_name
        metrics.network_properties([G],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
예제 #5
0
def exec_call(arg_a):
    """
	call_out = ""
	args = ["python exact_phrg.py --orig {} --prs".format(arg_a)]
	print args
	while not call_out:
		popen = subprocess.Popen(args, stdout=subprocess.PIPE, shell=True)
		popen.wait()
		out, err = popen.communicate()
		call_out = out.split('\n')
	print call_out, out, err
	"""
    G = load_edgelist(arg_a)
    prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(G)
    pp.pprint(prod_rules)
예제 #6
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
	Args:
		rules: production rules (model)
		gname: graph name
		n:     target graph order (number of nodes)
		runs:  how many graphs to generate

	Returns: list of synthetic graphs

	"""
    nslog("grow_exact_size_hrg_graphs_from_prod_rules")
    DBG = True
    if n <= 0: sys.exit(1)

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    print
    print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")"
    exit()  # temp pls remove me

    num_nodes = n
    if DBG: print "Starting max size"
    g.set_max_size(num_nodes)
    if DBG: print "Done with max size"

    hstars_lst = []
    print "  ",
    for i in range(0, runs):
        print '>',
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    return hstars_lst
예제 #7
0
# ['r8.1', 'A,B,C,D,E,F',  ['0,B:T', '0,C:T', '0,D:T', '0,E:T', '0,F:T', '0,A:T', 'A,0,D,E,F:N'] ,0.5]
# ]

g = pcfg.Grammar('S')
for (id, lhs, rhs, prob) in rules:
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

print 'Grammar g loaded.'
# Synthetic Graphs
#num_nodes = int(sys.argv[-1])
g.set_max_size(num_nodes)

hStars = []
for i in range(20):
    rule_list = g.sample(num_nodes)
    hstar = phrg.grow(rule_list, g)[0]
    hStars.append(hstar)
    print i, hstar.number_of_nodes(), hstar.number_of_edges()

metricx = ['degree', 'hops', 'clust', 'gcd']
metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True)

# parser = get_parser()
# args = vars(parser.parse_args())
# try:
#     main(args)
# except Exception, e:
#     print str(e)
#     traceback.print_exc()
#     sys.exit(1)
# sys.exit(0)
def dimacs_td_ct(tdfname, synthg=False):
    """ tree decomp to clique-tree """
    if isinstance(tdfname, list): [dimacs_td_ct(f) for f in tdfname]
    #	print '... input file:', tdfname

    fname = tdfname
    graph_name = os.path.basename(fname)
    gname = graph_name.split('.')[0]
    if synthg:
        gfname = 'datasets/' + gname + ".dimacs"
    else:
        gfname = "datasets/out." + gname
    print os.path.basename(fname).split('.')[-2]
    tdh = os.path.basename(fname).split('.')[-2]  # tree decomp heuristic
    tfname = gname + "." + tdh

    if synthg:
        G = load_edgelist(tdfname.split('.')[0] + ".dimacs")
    else:
        G = load_edgelist(gfname)

    if DEBUG: print nx.info(G)
    if not os.path.exists(fname):
        print fname, 'this file does not exist (possible failure in the TD step)'
        return ''

    with open(fname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())
    if DEBUG: print tree.keys()
    root = list(tree)[0]
    if DEBUG: print '.. Root:', root
    root = frozenset(cbags[1])
    if DEBUG: print '.. Root:', root
    T = td.make_rooted(tree, root)
    if DEBUG: print '.. T rooted:', len(T)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)

    prod_rules = {}
    td.new_visit(T, G, prod_rules)

    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])

            sid += 1
        id += 1

    df = pd.DataFrame(rules)

    outdf_fname = "ProdRules/" + tfname + "_iprules.tsv"

    if not os.path.isfile(outdf_fname):
        #		print '...',outdf_fname, "written"
        df.to_csv(outdf_fname, header=False, index=False, sep="\t")
    else:
        print '\t', outdf_fname, "file exists"

    return outdf_fname
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname="", iargs=""):
	# if whole tree path
	# else, assume a path fragment
	print '... path fragment:', tdfname
	print '... input graph	:', orig


	G = load_edgelist(orig) # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks

	G.name = gname

	files = glob(tdfname+"*.dimacs.tree")
	prod_rules = {}
	stacked_df = pd.DataFrame()

	mat_dict = {}
	for i,x in enumerate(sorted(files)):
		mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]]=i
		if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]

	for tfname in files:
		tname = os.path.basename(tfname).split(".")
		tname = "_".join(tname[:2])

		with open(tfname, 'r') as f:	# read tree decomp from inddgo
			lines = f.readlines()
			lines = [x.rstrip('\r\n') for x in lines]

		cbags = {}
		bags = [x.split() for x in lines if x.startswith('B')]

		for b in bags:
			cbags[int(b[1])] = [int(x) for x in b[3:]]	# what to do with bag size?

		edges = [x.split()[1:] for x in lines if x.startswith('e')]
		edges = [[int(k) for k in x] for x in edges]

		tree = defaultdict(set)
		for s, t in edges:
			tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
			if DBG: print '.. # of keys in `tree`:', len(tree.keys())

		root = list(tree)[0]
		root = frozenset(cbags[1])
		T = td.make_rooted(tree, root)
		# nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

		T = phrg.binarize(T)
		# root = list(T)[0]
		# root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		# print ">>",len(T)

		td.new_visit(T, G, prod_rules)


		for k in prod_rules.iterkeys():
			if DBG: print k
			s = 0
			for d in prod_rules[k]:
				s += prod_rules[k][d]
			for d in prod_rules[k]:
				prod_rules[k][d] = float(prod_rules[k][d]) / float(s)	# normailization step to create probs not counts.
				if DBG: print '\t -> ', d, prod_rules[k][d]

		if DBG: print "--------------------"
		if DBG: print '- Prod. Rules'
		if DBG: print "--------------------"
		rules = []
		id = 0
		for k, v in prod_rules.iteritems():
			sid = 0
			for x in prod_rules[k]:
				rhs = re.findall("[^()]+", x)
				rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
				if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]
				sid += 1
			id += 1

		df = pd.DataFrame(rules)
		print df.shape
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])

	if iargs['cnts']:
		return stacked_df,mat_dict
	else:
		np_sqr_mtrx = jaccard_coeff_isomorphic_rules_check(stacked_df, mat_dict)
		print gname
		df = pd.DataFrame(np_sqr_mtrx, columns=[x for x in sorted(mat_dict.keys())])
		df.index = sorted(mat_dict.keys())
		df.to_csv("Results/{}_isom_jaccardsim.tsv".format(gname), sep=",")
		
		return stacked_df,mat_dict #ToDo: not sure if I want to return this
예제 #10
0
def get_hrg_production_rules(edgelist_data_frame,
                             graph_name,
                             tw=False,
                             n_subg=2,
                             n_nodes=300):
    from tdec.growing import derive_prules_from
    nslog("get_hrg_production_rules")

    df = edgelist_data_frame
    if df.shape[1] == 4:
        G = nx.from_pandas_dataframe(df, 'src', 'trg',
                                     edge_attr=True)  # whole graph
    elif df.shape[1] == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    G.name = graph_name

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            #td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

    if tw:
        print_treewidth(T)
        exit()
    ## --
    print("prod_rules:", len(prod_rules), type(prod_rules))

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    print "++++++++++"
    df.to_csv('ProdRules/{}_prs.tsv'.format(G.name),
              header=False,
              index=False,
              sep="\t")
    if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)):
        print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name)
    else:
        print "Trouble saving"

    print "-----------"

    print[type(x) for x in rules[0]]
    '''
	Graph Generation of Synthetic Graphs
	Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the
	original graph
	'''
    hStars = grow_exact_size_hrg_graphs_from_prod_rules(
        rules, graph_name, G.number_of_nodes(), 10)
    print '... hStart graphs:', len(hStars)

    if 0:
        metricx = [
            'degree', 'hops', 'clust', 'assort', 'kcore', 'eigen', 'gcd'
        ]
        metricx = ['gcd']
        metrics.network_properties([G],
                                   metricx,
                                   hStars,
                                   name=graph_name,
                                   out_tsv=False)
def dimacs_td_ct_fast(oriG, tdfname):
    """ tree decomp to clique-tree 
	parameters:
		orig:			filepath to orig (input) graph in edgelist
		tdfname:	filepath to tree decomposition from INDDGO
		synthg:		when the input graph is a syth (orig) graph
	Todo: 
		currently not handling sythg in this version of dimacs_td_ct
  """
    G = oriG
    if G is None: return (1)
    graph_checks(G)  # --- graph checks
    prod_rules = {}

    t_basename = os.path.basename(tdfname)
    out_tdfname = os.path.basename(t_basename) + ".prs"
    if os.path.exists("ProdRules/" + out_tdfname):
        print "==> exists:", out_tdfname
        return out_tdfname
    if 0: print "ProdRules/" + out_tdfname, tdfname

    with open(tdfname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())

    root = list(tree)[0]
    root = frozenset(cbags[1])
    T = td.make_rooted(tree, root)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    # td.new_visit(T, G, prod_rules, TD)
    # print ">>",len(T)

    td.new_visit(T, G, prod_rules)

    if 0: print "--------------------"
    if 0: print "- Production Rules -"
    if 0: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if 0:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1
    # print rules
    if 0: print "--------------------"
    if 0: print '- P. Rules', len(rules)
    if 0: print "--------------------"
    '''
  # ToDo.
  # Let's save these rules to file or print proper
  df = DataFrame(rules)
  print "out_tdfname:", out_tdfname
  df.to_csv("ProdRules/" + out_tdfname, sep="\t", header=False, index=False)
  '''

    # g = pcfg.Grammar('S')
    # for (id, lhs, rhs, prob) in rules:
    #	g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # Synthetic Graphs
    #	hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20)
    #	# metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen'
    #	metricx = ['gcd','avgdeg']
    #	metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True)

    return ""