コード例 #1
0
def edgelist_dimacs_graph(orig_graph, peo_h, prn_tw=False):
    fname = orig_graph
    gname = os.path.basename(fname).split(".")
    gname = sorted(gname, reverse=True, key=len)[0]

    if ".tar.bz2" in fname:
        from tdec.read_tarbz2 import read_tarbz2_file
        edglst = read_tarbz2_file(fname)
        df = pd.DataFrame(edglst, dtype=int)
        G = nx.from_pandas_dataframe(df, source=0, target=1)
    else:
        G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
    # print "...",	G.number_of_nodes(), G.number_of_edges()
    # from numpy import max
    # print "...",	max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels

    N = max(G.nodes())
    M = G.number_of_edges()
    # +++ Graph Checks
    if G is None: sys.exit(1)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    graph_checks(G)
    # --- graph checks

    G.name = gname

    # print "...",	G.number_of_nodes(), G.number_of_edges()
    if G.number_of_nodes() > 500 and not prn_tw:
        return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M,
                                                 peo_h=peo_h), gname)
    else:
        return (nx_edges_to_nddgo_graph(G, n=N, m=M, varel=peo_h), gname)
コード例 #2
0
def edgelist_dimacs_graph(orig_graph, peo_h):
    fname = orig_graph
    gname = os.path.basename(fname).split(".")
    gname = sorted(gname, reverse=True, key=len)[0]

    G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
    # print "...",  G.number_of_nodes(), G.number_of_edges()
    # from numpy import max
    # print "...",  max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels

    N = max(G.nodes())
    M = G.number_of_edges()
    # +++ Graph Checks
    if G is None: sys.exit(1)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    graph_checks(G)
    # --- graph checks

    G.name = gname

    # print "...",  G.number_of_nodes(), G.number_of_edges()
    #if G.number_of_nodes() > 500:
    #    return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M, peo_h=peo_h), gname)
    #else:
    return (nx_edges_to_nddgo_graph(G, n=N, m=M, peoh=peo_h), gname)
コード例 #3
0
def derive_production_rules(G):
    """

  Parameters
  ----------
  G : input graph
  """
    from PHRG import graph_checks, binarize
    prod_rules = {}

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    graph_checks(G)

    print
    print "--------------------"
    print "-Tree Decomposition-"
    print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 100):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)

    print
    print "--------------------"
    print "- Production Rules -"
    print "--------------------"

    for k in prod_rules.iterkeys():
        print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            #print '\t -> ', d, prod_rules[k][d]

    return prod_rules
コード例 #4
0
def convert_nx_gObjs_to_dimacs_gObjs(nx_gObjs):
    '''
	Take list of graphs and convert to dimacs
	'''
    dimacs_glst = []
    for G in nx_gObjs:
        N = max(G.nodes())
        M = G.number_of_edges()
        # +++ Graph Checks
        if G is None: sys.exit(1)

        G.remove_edges_from(G.selfloop_edges())
        giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
        G = nx.subgraph(G, giant_nodes)
        graph_checks(G)
        # --- graph checks
        G.name = "synthG_{}_{}".format(N, M)

        from tdec.arbolera import nx_edges_to_nddgo_graph
        dimacs_glst.append(nx_edges_to_nddgo_graph(G, n=N, m=M, save_g=True))

    return dimacs_glst
コード例 #5
0
def isomorphic_test_on_prod_rules(orig, tdfname, gname=""):
	""""
	orig: path to original/refernce input graph
	tdfname: path fragment for a set of td pro rules
	gname: graph name (str)
	returns:
    """

	# if whole tree path
	# else, assume a path fragment
	print '... input graph  :', os.path.basename(orig)
	print '... prod rules path frag :', tdfname

	G = load_edgelist(orig)  # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks
	G.name = gname
	print "\t", nx.info(G)

	files = glob(tdfname + "*.prs")
	stacked_df = pd.DataFrame()
	# mat_dict = {}
	# for i, x in enumerate(sorted(files)):
	# 	mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i
	# 	if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]
	for prs in sorted(files):
		tname = os.path.basename(prs).split(".")
		tname = "_".join(tname[:2])
		# print prs
		# with open(prs, 'r') as f:  # read tree decomp from inddgo
		# 	lines = f.readlines()
		# 	lines = [x.rstrip('\r\n') for x in lines]
		df = pd.read_csv(prs, sep="\t", header=None)
		print tname
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])
	# print df.shape
	print "\nStacked prod rules\n", "~" * 20
	print "  ", stacked_df.shape
	if args['verb']: print stacked_df.to_string()
	stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t")
	if os.path.exists(
			"../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname)

	print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20
	stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name]
	iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df)
	print "  ", iso_union.shape
	if args['verb']: print iso_union.to_string()

	print "\nIsomorphic intersection of the prod rules\n", "~" * 20
	iso_interx = iso_interx[[1,2,3,4]]
	#  print iso_interx.head(); exit()
	iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname), header=False, index=False, sep="\t")
	if os.path.exists('../Results/{}_isom_interxn.tsv'.format(gname)):
		print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
コード例 #6
0
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname=""):
	""""
	orig: path to original/refernce input graph
	tdfname: path fragment for a set of td pro rules
	gname: graph name (str)
	returns:
    """

	# if whole tree path
	# else, assume a path fragment
	print '... input graph  :', os.path.basename(orig)
	print '... td path frag :', tdfname

	G = load_edgelist(orig)  # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks

	G.name = gname

	files = glob(tdfname + "*.dimacs.tree")
	prod_rules = {}
	stacked_df = pd.DataFrame()

	mat_dict = {}
	for i, x in enumerate(sorted(files)):
		mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i
		if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]

	for tfname in sorted(files):
		tname = os.path.basename(tfname).split(".")
		tname = "_".join(tname[:2])

		with open(tfname, 'r') as f:  # read tree decomp from inddgo
			lines = f.readlines()
			lines = [x.rstrip('\r\n') for x in lines]

		cbags = {}
		bags = [x.split() for x in lines if x.startswith('B')]

		for b in bags:
			cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

		edges = [x.split()[1:] for x in lines if x.startswith('e')]
		edges = [[int(k) for k in x] for x in edges]

		tree = defaultdict(set)
		for s, t in edges:
			tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
			if DBG: print '.. # of keys in `tree`:', len(tree.keys())

		root = list(tree)[0]
		root = frozenset(cbags[1])
		T = td.make_rooted(tree, root)
		# nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

		T = phrg.binarize(T)
		# root = list(T)[0]
		# root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		# print ">>",len(T)

		td.new_visit(T, G, prod_rules)
		from json import dumps
		# print dumps(prod_rules, indent=4, sort_keys=True)

		for k in prod_rules.iterkeys():
			if DBG: print k
			s = 0
			for d in prod_rules[k]:
				s += prod_rules[k][d]
			for d in prod_rules[k]:
				prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
				if DBG: print '\t -> ', d, prod_rules[k][d]

		if DBG: print "--------------------"
		if DBG: print '- Prod. Rules'
		if DBG: print "--------------------"
		rules = []
		# print dumps(prod_rules, indent=4, sort_keys=True)

		id = 0
		for k, v in prod_rules.iteritems():
			sid = 0
			for x in prod_rules[k]:
				rhs = re.findall("[^()]+", x)
				rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
				if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]
				sid += 1
			id += 1

		df = pd.DataFrame(rules)
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])
		# print df.shape
	print "\nStacked prod rules\n", "~" * 20
	print "  ", stacked_df.shape
	if args['verb']: print stacked_df.to_string()
	stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t")
	if os.path.exists(
		"../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname)

	print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20
	stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name]
	iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df)
	print "  ", iso_union.shape
	if args['verb']: print iso_union.to_string()

	print "\nIsomorphic intersection of the prod rules\n", "~" * 20
	print "  ", iso_interx.shape
	iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname))
	if os.path.exists(
		'../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
コード例 #7
0
        gname = os.path.basename(fname).split('.')[0]
    print "... ", gname
    if args['sampling']:
        mapping_d = map_original_node_ids(fname)
        G1 = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
        G = nx.relabel_nodes(G1, mapping_d)
    else:
        G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)

    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()

    # +++ Graph Checks
    if G is None: sys.exit(1)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    graph_checks(G)
    # --- graph checks

    G.name = gname
    print "... info", nx.info(G)
    try:
        nx_edges_to_nddgo_graph(G, num_nodes, num_edges)
    except Exception, e:
        print 'ERROR, UNEXPECTED EXCEPTION'
        print str(e)
        traceback.print_exc()
        sys.exit(1)
    sys.exit(0)