Esempio n. 1
0
def edgelist_dimacs_graph(orig_graph, peo_h, prn_tw=False):
    fname = orig_graph
    gname = os.path.basename(fname).split(".")
    gname = sorted(gname, reverse=True, key=len)[0]

    if ".tar.bz2" in fname:
        from tdec.read_tarbz2 import read_tarbz2_file
        edglst = read_tarbz2_file(fname)
        df = pd.DataFrame(edglst, dtype=int)
        G = nx.from_pandas_dataframe(df, source=0, target=1)
    else:
        G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
    # print "...",	G.number_of_nodes(), G.number_of_edges()
    # from numpy import max
    # print "...",	max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels

    N = max(G.nodes())
    M = G.number_of_edges()
    # +++ Graph Checks
    if G is None: sys.exit(1)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    graph_checks(G)
    # --- graph checks

    G.name = gname

    # print "...",	G.number_of_nodes(), G.number_of_edges()
    if G.number_of_nodes() > 500 and not prn_tw:
        return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M,
                                                 peo_h=peo_h), gname)
    else:
        return (nx_edges_to_nddgo_graph(G, n=N, m=M, varel=peo_h), gname)
Esempio n. 2
0
def convert_nx_gObjs_to_dimacs_gObjs(nx_gObjs):
    '''
	Take list of graphs and convert to dimacs
	'''
    dimacs_glst = []
    for G in nx_gObjs:
        N = max(G.nodes())
        M = G.number_of_edges()
        # +++ Graph Checks
        if G is None: sys.exit(1)

        G.remove_edges_from(G.selfloop_edges())
        giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
        G = nx.subgraph(G, giant_nodes)
        graph_checks(G)
        # --- graph checks
        if G.name is None:
            G.name = "synthG_{}_{}".format(N, M)

        from tdec.arbolera import nx_edges_to_nddgo_graph
        dimacs_glst.append(nx_edges_to_nddgo_graph(G, n=N, m=M, save_g=True))

    return dimacs_glst
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname="", iargs=""):
	# if whole tree path
	# else, assume a path fragment
	print '... path fragment:', tdfname
	print '... input graph	:', orig


	G = load_edgelist(orig) # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks

	G.name = gname

	files = glob(tdfname+"*.dimacs.tree")
	prod_rules = {}
	stacked_df = pd.DataFrame()

	mat_dict = {}
	for i,x in enumerate(sorted(files)):
		mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]]=i
		if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]

	for tfname in files:
		tname = os.path.basename(tfname).split(".")
		tname = "_".join(tname[:2])

		with open(tfname, 'r') as f:	# read tree decomp from inddgo
			lines = f.readlines()
			lines = [x.rstrip('\r\n') for x in lines]

		cbags = {}
		bags = [x.split() for x in lines if x.startswith('B')]

		for b in bags:
			cbags[int(b[1])] = [int(x) for x in b[3:]]	# what to do with bag size?

		edges = [x.split()[1:] for x in lines if x.startswith('e')]
		edges = [[int(k) for k in x] for x in edges]

		tree = defaultdict(set)
		for s, t in edges:
			tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
			if DBG: print '.. # of keys in `tree`:', len(tree.keys())

		root = list(tree)[0]
		root = frozenset(cbags[1])
		T = td.make_rooted(tree, root)
		# nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

		T = phrg.binarize(T)
		# root = list(T)[0]
		# root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		# print ">>",len(T)

		td.new_visit(T, G, prod_rules)


		for k in prod_rules.iterkeys():
			if DBG: print k
			s = 0
			for d in prod_rules[k]:
				s += prod_rules[k][d]
			for d in prod_rules[k]:
				prod_rules[k][d] = float(prod_rules[k][d]) / float(s)	# normailization step to create probs not counts.
				if DBG: print '\t -> ', d, prod_rules[k][d]

		if DBG: print "--------------------"
		if DBG: print '- Prod. Rules'
		if DBG: print "--------------------"
		rules = []
		id = 0
		for k, v in prod_rules.iteritems():
			sid = 0
			for x in prod_rules[k]:
				rhs = re.findall("[^()]+", x)
				rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
				if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]
				sid += 1
			id += 1

		df = pd.DataFrame(rules)
		print df.shape
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])

	if iargs['cnts']:
		return stacked_df,mat_dict
	else:
		np_sqr_mtrx = jaccard_coeff_isomorphic_rules_check(stacked_df, mat_dict)
		print gname
		df = pd.DataFrame(np_sqr_mtrx, columns=[x for x in sorted(mat_dict.keys())])
		df.index = sorted(mat_dict.keys())
		df.to_csv("Results/{}_isom_jaccardsim.tsv".format(gname), sep=",")
		
		return stacked_df,mat_dict #ToDo: not sure if I want to return this
def dimacs_td_ct_fast(oriG, tdfname):
    """ tree decomp to clique-tree 
	parameters:
		orig:			filepath to orig (input) graph in edgelist
		tdfname:	filepath to tree decomposition from INDDGO
		synthg:		when the input graph is a syth (orig) graph
	Todo: 
		currently not handling sythg in this version of dimacs_td_ct
  """
    G = oriG
    if G is None: return (1)
    graph_checks(G)  # --- graph checks
    prod_rules = {}

    t_basename = os.path.basename(tdfname)
    out_tdfname = os.path.basename(t_basename) + ".prs"
    if os.path.exists("ProdRules/" + out_tdfname):
        print "==> exists:", out_tdfname
        return out_tdfname
    if 0: print "ProdRules/" + out_tdfname, tdfname

    with open(tdfname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())

    root = list(tree)[0]
    root = frozenset(cbags[1])
    T = td.make_rooted(tree, root)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    # td.new_visit(T, G, prod_rules, TD)
    # print ">>",len(T)

    td.new_visit(T, G, prod_rules)

    if 0: print "--------------------"
    if 0: print "- Production Rules -"
    if 0: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if 0:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1
    # print rules
    if 0: print "--------------------"
    if 0: print '- P. Rules', len(rules)
    if 0: print "--------------------"
    '''
  # ToDo.
  # Let's save these rules to file or print proper
  df = DataFrame(rules)
  print "out_tdfname:", out_tdfname
  df.to_csv("ProdRules/" + out_tdfname, sep="\t", header=False, index=False)
  '''

    # g = pcfg.Grammar('S')
    # for (id, lhs, rhs, prob) in rules:
    #	g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # Synthetic Graphs
    #	hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20)
    #	# metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen'
    #	metricx = ['gcd','avgdeg']
    #	metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True)

    return ""