예제 #1
0
def base_graph_edgelist_to_prod_rules(pickle_fname):
    """
	if lcc has more than 500 nodes
	sample the lcc 2 x 300
	lcc1,lcc2 <- sample_graph(g, 2, 300)
	edgelist <- lcc1,lcc2
	1prs_out <- tree1, tree2

	:param pickle_fname:
	:return:
	"""
    Info("base_graph_edgelist_to_prod_rules")
    G = nx.read_gpickle(pickle_fname)
    subgraph = max(nx.connected_component_subgraphs(G), key=len)
    results = []
    if subgraph.number_of_nodes() > 500:
        for k, Gprime in enumerate(gs.rwr_sample(subgraph, 2,
                                                 300)):  # ret generator
            print k
            gname = os.path.basename(pickle_fname).rstrip('.p')
            Gprime.name = gname
            cc_fname = write_tmp_edgelist(Gprime,
                                          k)  # subgraph to temp edgelist
            results.append(cc_fname)
    else:
        cc_fname = write_tmp_edgelist(G)
        results.append(cc_fname)
    return results
예제 #2
0
def ref_graph_largest_conn_componet(fname):
    df = Pandas_DataFrame_From_Edgelist([fname])[0]
    G = nx.from_pandas_dataframe(df, source='src', target='trg')
    Gc = max(nx.connected_component_subgraphs(G), key=len)
    gname = graph_name(fname)
    num_nodes = Gc.number_of_nodes()
    subg_fnm_lst = []

    ## sample of the graph larger than 500 nodes
    if num_nodes >= 500:
        cnt = 0
        for Gprime in gs.rwr_sample(G, 2, 300):
            subg_fnm_lst.append('.{}_lcc_{}.edl'.format(gname, cnt))
            try:
                nx.write_edgelist(Gprime,
                                  '.{}_lcc_{}.edl'.format(gname, cnt),
                                  data=False)
                cnt += 1
            except Exception, e:
                print(str(e), '\n!!Error writing to disk')
                return ""
예제 #3
0
def nx_edges_to_nddgo_graph_sampling(graph, n, m, peo_h):
    G = graph
    if n is None and m is None: return
    # n = G.number_of_nodes()
    # m = G.number_of_edges()
    nbr_nodes = 256
    basefname = 'datasets/{}_{}'.format(G.name, peo_h)

    K = int(math.ceil(.25 * G.number_of_nodes() / nbr_nodes))
    #	print "--", nbr_nodes, K, '--';

    for j, Gprime in enumerate(gs.rwr_sample(G, K, nbr_nodes)):
        # if gname is "":
        #	 # nx.write_edgelist(Gprime, '/tmp/sampled_subgraph_200_{}.tsv'.format(j), delimiter="\t", data=False)
        #	 gprime_lst.append(Gprime)
        # else:
        #	 # nx.write_edgelist(Gprime, '/tmp/{}{}.tsv'.format(gname, j), delimiter="\t", data=False)
        #	 gprime_lst.append(Gprime)
        # # print "...	files written: /tmp/{}{}.tsv".format(gname, j)

        edges = Gprime.edges()
        edges = [(int(e[0]), int(e[1])) for e in edges]
        df = pd.DataFrame(edges)
        df.sort_values(by=[0], inplace=True)

        ofname = basefname + "_{}.dimacs".format(j)
        if os.path.exists(ofname): break

        with open(ofname, 'w') as f:
            f.write('c {}\n'.format(G.name))
            f.write('p edge\t{}\t{}\n'.format(n, m))
            # for e in df.iterrows():
            output_edges = lambda x: f.write("e\t{}\t{}\n".format(x[0], x[1]))
            df.apply(output_edges, axis=1)
        # f.write("e\t{}\t{}\n".format(e[0]+1,e[1]+1))
        if os.path.exists(ofname): print('Wrote: {}'.format(ofname))

    return basefname
def get_sampled_gpickled_graphs(G):
	G.remove_edges_from(G.selfloop_edges())
	print ([x.number_of_nodes() for x in sorted(nx.connected_component_subgraphs(G), key=len)])
	# print ([x.number_of_nodes() for x in list(nx.connected_component_subgraphs(G))])
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	num_nodes = G.number_of_nodes()
	graph_checks(G)

	prod_rules = {}
	K = 2
	n = 300

	j = 0
	if G.number_of_nodes() >500:
		for Gprime in rwr_sample(G, K, n):
			nx.write_gpickle(Gprime, "../datasets/{}_{}.p".format(gn,str(j)))
			T = quickbb(Gprime)
			root = list(T)[0]
			T = make_rooted(T, root)
			T = binarize(T)
			root = list(T)[0]
			root, children = T
			# td.new_visit(T, G, prod_rules, TD)
			new_visit(T, G, prod_rules)
			j += 1
	else:
		nx.write_gpickle (G, "../datasets/{}.p".format (gn))
		T = quickbb (G)
		root = list (T)[0]
		T = make_rooted (T, root)
		T = binarize (T)
		root = list (T)[0]
		root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		new_visit (T, G, prod_rules)
	## 
	return prod_rules
예제 #5
0
def get_hrg_production_rules(edgelist_data_frame,
                             graph_name,
                             tw=False,
                             trials=10,
                             n_subg=2,
                             n_nodes=300,
                             nstats=False):
    from core.growing import derive_prules_from

    t_start = time.time()
    df = edgelist_data_frame
    if df.shape[1] == 4:
        G = nx.from_pandas_dataframe(df, 'src', 'trg',
                                     edge_attr=True)  # whole graph
    elif df.shape[1] == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    G.name = graph_name
    print "==> read in graph took: {} seconds".format(time.time() - t_start)

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        t_start = time.time()
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            # td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
            Process(target=td.new_visit, args=(
                T,
                G,
                prod_rules,
            )).start()
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)


#		print_treewidth(T)
#		exit()

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    '''print "++++++++++"
	df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t")
	if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): 
		print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name)
	else:
		print "Trouble saving"
	print "-----------"
	print [type(x) for x in rules[0]] '''
    '''
	Graph Generation of Synthetic Graphs
	Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the 
	original graph
	'''
    hStars = grow_exact_size_hrg_graphs_from_prod_rules(
        rules, graph_name, G.number_of_nodes(), trials)
    print '... hStart graphs:', len(hStars)

    if not os.path.exists(r"Results/"): os.makedirs(r"Results/")

    with open(r"Results/{}_hstars.pickle".format(graph_name),
              "wb") as output_file:
        cPickle.dump(hStars, output_file)
    if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)):
        print "File saved"
    '''if nstats:
예제 #6
0
def new_main(args):
    if not (args['base'] is None):
        Info("<- converts to dimacs")
        gn = graph_name(args['base'][0])
        f = "../datasets/" + gn + "*.p"
        files = glob(f)
        dimacs_lst = transform_edgelist_to_dimacs(files)
        results = []
        trees = explode_to_trees(dimacs_lst, results)

        pp.pprint(files)
        pp.pprint(dimacs_lst)
        pp.pprint(trees)
        print
        pp.pprint(results)
        exit(0)
    elif not (args['orig'] is None):
        Info("<- converts edgelist gpickle")
        f = args['orig'][0]
        g = load_edgelist(f)  # full graph
        Info("# of conn comp: %d" %
             len(list(nx.connected_component_subgraphs(g))))
        g = largest_conn_comp(f)  # largerst conn comp
        if isinstance(g, list):
            for k, Gprime in enumerate(g):
                subg_out_fname = max(graph_name(f).split("."), key=len)
                subg_out_fname = "../datasets/" + subg_out_fname
                subg_out_fname += "_{}.p".format(k)
                cc_fname = nx.write_gpickle(
                    Gprime, subg_out_fname)  # subgraph to temp edgelist
                if os.path.exists(subg_out_fname):
                    Info("Wrote %s" % subg_out_fname)
        else:
            subg_out_fname = max(graph_name(f).split("."), key=len)
            subg_out_fname = "../datasets/" + subg_out_fname
            subg_out_fname += ".p"
            cc_fname = nx.write_gpickle(g, subg_out_fname)
            if os.path.exists(subg_out_fname):
                Info("Wrote %s" % subg_out_fname)
        print("done")
        exit()
    elif not (args['edgelist2dimacs'] is None):
        f = args['edgelist2dimacs'][0]
        pfname = graph_name(f)
        pfname = "../datasets/{}.p".format(pfname)
        if not os.path.exists(pfname):
            Info("File not found, please run:")
            Info("  python explodingTree.py --orig path/to/edgelist")
        G = load_edgelist(f)
        subgraph = max(nx.connected_component_subgraphs(G), key=len)
        gprime_lst = []
        if subgraph.number_of_nodes() > 500:
            for j, Gprime in enumerate(gs.rwr_sample(subgraph, 2, 300)):
                Gprime.name = G.name + "_%d" % j
                gprime_lst.append(convert_graph_obj_2dimacs([Gprime]))
            print[x for x in gprime_lst]

    elif not (args['prules'] is None):
        gn = graph_name(args['prules'][0])
        print gn
        f = "../datasets/" + gn + "*.tree"
        files = glob(f)
        f = "../datasets/" + gn + "*.p"
        graphs = glob(f)
        for g in graphs:
            for f in files:
                dimacs_td_ct_fast(g, f)  # dimacs to tree (decomposition)
        exit(0)
    elif not (args['td'] is None):
        origG = args['td'][0]
        dimacs_f = glob("../datasets/" + graph_name(args['td'][0]) +
                        "*.dimacs")
        ''' "Explode to trees" '''  # ToDo
        var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for j, f in enumerate(dimacs_f):
            print f
            gn = graph_name(f)
            dimacs_file = "../datasets/{}.dimacs".format(gn)
            p = mp.Pool(processes=2)
            for vael in var_els:
                p.apply_async(dimacs_nddgo_tree_simple,
                              args=(
                                  dimacs_file,
                                  vael,
                              ),
                              callback=collect_results_trees)
            # xt.dimacs_nddgo_tree_simple(f, vael)
        p.close()
        p.join()

        # dimacs_td_ct_fast(oriG, tdfname) # dimacs to tree (decomposition)
    else:
        sys.exit(0)

        #	dimacs_convert_orig_graph(args['orig'])
        pickle_fname = "../datasets/" + f + ".p"
        g = nx.read_gpickle(pickle_fname)
        subgraph = max(nx.connected_component_subgraphs(g), key=len)
        if subgraph.number_of_nodes() > 500:
            for Gprime in gs.rwr_sample(subgraph, 2, 300):
                edgelist_in_dimacs_out(Gprime)
예제 #7
0
def get_phrg_production_rules (argmnts):
	args = argmnts

	t_start = time.time()
	df = tdf.Pandas_DataFrame_From_Edgelist(args['orig'])[0]
	if df.shape[1] == 4:
		G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True)	# whole graph
	elif df.shape[1] == 3:
		G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])	# whole graph
	else:
		G = nx.from_pandas_dataframe(df, 'src', 'trg')
	G.name = graph_name(args['orig'][0])
	print "==> read in graph took: {} seconds".format(time.time() - t_start)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)

	num_nodes = G.number_of_nodes()

	phrg.graph_checks(G)

	if DBG: print
	if DBG: print "--------------------"
	if not DBG: print "-Tree Decomposition-"
	if DBG: print "--------------------"

	prod_rules = {}
	K = 2
	n = 300
	if num_nodes >= 500:
		print 'Grande'
		t_start = time.time()
		for Gprime in gs.rwr_sample(G, K, n):
			T = td.quickbb(Gprime)
			root = list(T)[0]
			T = td.make_rooted(T, root)
			T = phrg.binarize(T)
			root = list(T)[0]
			root, children = T
			# td.new_visit(T, G, prod_rules, TD)
			td.new_visit(T, G, prod_rules)
			Process(target=td.new_visit, args=(T, G, prod_rules,)).start()
	else:
		T = td.quickbb(G)
		root = list(T)[0]
		T = td.make_rooted(T, root)
		T = phrg.binarize(T)
		root = list(T)[0]
		root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		td.new_visit(T, G, prod_rules)

		# print_treewidth(T) # TODO: needs to be fixed
		# exit()

	if DBG: print
	if DBG: print "--------------------"
	if DBG: print "- Production Rules -"
	if DBG: print "--------------------"

	for k in prod_rules.iterkeys():
		if DBG: print k
		s = 0
		for d in prod_rules[k]:
			s += prod_rules[k][d]
		for d in prod_rules[k]:
			prod_rules[k][d] = float(prod_rules[k][d]) / float(
				s)	# normailization step to create probs not counts.
			if DBG: print '\t -> ', d, prod_rules[k][d]

	rules = []
	id = 0
	for k, v in prod_rules.iteritems():
		sid = 0
		for x in prod_rules[k]:
			rhs = re.findall("[^()]+", x)
			rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
			if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
			sid += 1
		id += 1

	df = pd.DataFrame(rules)
	# pp.pprint(df.values.tolist()); exit()

	df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(G.name), header=False, index=False, sep="\t")
	if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(G.name)):
		print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(G.name)
	else:
		print "Trouble saving"
	print "-----------"
	print [type(x) for x in rules[0]]

	'''
예제 #8
0
def get_hrg_production_rules_given(G,
                                   tw=False,
                                   n_subg=2,
                                   n_nodes=300,
                                   nstats=False):
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        t_start = time.time()
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            # td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
            Process(target=td.new_visit, args=(
                T,
                G,
                prod_rules,
            )).start()
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

        # print_treewidth(T)
        exit()

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    '''
	Graph Generation of Synthetic Graphs
	Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the 
	original graph
	exact change to fixed 
	'''
    # hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 10)
    hStars = grow_hrg_graphs_with_infinity(rules,
                                           graph_name,
                                           G.number_of_nodes(),
                                           10,
                                           rnbr=1)
    print '... hStart graphs:', len(hStars)
    d = {graph_name + "_hstars": hStars}
    with open(r"Results/{}_hstars.pickle".format(graph_name),
              "wb") as output_file:
        cPickle.dump(d, output_file)
    if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)):
        print "File saved"
    '''if nstats: