def edgelist_dimacs_graph(orig_graph, peo_h, prn_tw=False): fname = orig_graph gname = os.path.basename(fname).split(".") gname = sorted(gname, reverse=True, key=len)[0] if ".tar.bz2" in fname: from tdec.read_tarbz2 import read_tarbz2_file edglst = read_tarbz2_file(fname) df = pd.DataFrame(edglst, dtype=int) G = nx.from_pandas_dataframe(df, source=0, target=1) else: G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) # print "...", G.number_of_nodes(), G.number_of_edges() # from numpy import max # print "...", max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels N = max(G.nodes()) M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname # print "...", G.number_of_nodes(), G.number_of_edges() if G.number_of_nodes() > 500 and not prn_tw: return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M, peo_h=peo_h), gname) else: return (nx_edges_to_nddgo_graph(G, n=N, m=M, varel=peo_h), gname)
def edgelist_dimacs_graph(orig_graph, peo_h): fname = orig_graph gname = os.path.basename(fname).split(".") gname = sorted(gname, reverse=True, key=len)[0] G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) # print "...", G.number_of_nodes(), G.number_of_edges() # from numpy import max # print "...", max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels N = max(G.nodes()) M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname # print "...", G.number_of_nodes(), G.number_of_edges() #if G.number_of_nodes() > 500: # return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M, peo_h=peo_h), gname) #else: return (nx_edges_to_nddgo_graph(G, n=N, m=M, peoh=peo_h), gname)
def derive_production_rules(G): """ Parameters ---------- G : input graph """ from PHRG import graph_checks, binarize prod_rules = {} G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() graph_checks(G) print print "--------------------" print "-Tree Decomposition-" print "--------------------" if num_nodes >= 500: for Gprime in gs.rwr_sample(G, 2, 100): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) print print "--------------------" print "- Production Rules -" print "--------------------" for k in prod_rules.iterkeys(): print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. #print '\t -> ', d, prod_rules[k][d] return prod_rules
def convert_nx_gObjs_to_dimacs_gObjs(nx_gObjs): ''' Take list of graphs and convert to dimacs ''' dimacs_glst = [] for G in nx_gObjs: N = max(G.nodes()) M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = "synthG_{}_{}".format(N, M) from tdec.arbolera import nx_edges_to_nddgo_graph dimacs_glst.append(nx_edges_to_nddgo_graph(G, n=N, m=M, save_g=True)) return dimacs_glst
def isomorphic_test_on_prod_rules(orig, tdfname, gname=""): """" orig: path to original/refernce input graph tdfname: path fragment for a set of td pro rules gname: graph name (str) returns: """ # if whole tree path # else, assume a path fragment print '... input graph :', os.path.basename(orig) print '... prod rules path frag :', tdfname G = load_edgelist(orig) # load edgelist into a graph obj N = G.number_of_nodes() M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname print "\t", nx.info(G) files = glob(tdfname + "*.prs") stacked_df = pd.DataFrame() # mat_dict = {} # for i, x in enumerate(sorted(files)): # mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i # if DBG: print os.path.basename(x).split(".")[0].split("_")[-1] for prs in sorted(files): tname = os.path.basename(prs).split(".") tname = "_".join(tname[:2]) # print prs # with open(prs, 'r') as f: # read tree decomp from inddgo # lines = f.readlines() # lines = [x.rstrip('\r\n') for x in lines] df = pd.read_csv(prs, sep="\t", header=None) print tname df['cate'] = tname stacked_df = pd.concat([df, stacked_df]) # print df.shape print "\nStacked prod rules\n", "~" * 20 print " ", stacked_df.shape if args['verb']: print stacked_df.to_string() stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t") if os.path.exists( "../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname) print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20 stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name] iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df) print " ", iso_union.shape if args['verb']: print iso_union.to_string() print "\nIsomorphic intersection of the prod rules\n", "~" * 20 iso_interx = iso_interx[[1,2,3,4]] # print iso_interx.head(); exit() iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname), header=False, index=False, sep="\t") if os.path.exists('../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname=""): """" orig: path to original/refernce input graph tdfname: path fragment for a set of td pro rules gname: graph name (str) returns: """ # if whole tree path # else, assume a path fragment print '... input graph :', os.path.basename(orig) print '... td path frag :', tdfname G = load_edgelist(orig) # load edgelist into a graph obj N = G.number_of_nodes() M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname files = glob(tdfname + "*.dimacs.tree") prod_rules = {} stacked_df = pd.DataFrame() mat_dict = {} for i, x in enumerate(sorted(files)): mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i if DBG: print os.path.basename(x).split(".")[0].split("_")[-1] for tfname in sorted(files): tname = os.path.basename(tfname).split(".") tname = "_".join(tname[:2]) with open(tfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DBG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) # root = list(T)[0] # root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) td.new_visit(T, G, prod_rules) from json import dumps # print dumps(prod_rules, indent=4, sort_keys=True) for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] if DBG: print "--------------------" if DBG: print '- Prod. Rules' if DBG: print "--------------------" rules = [] # print dumps(prod_rules, indent=4, sort_keys=True) id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x] sid += 1 id += 1 df = pd.DataFrame(rules) df['cate'] = tname stacked_df = pd.concat([df, stacked_df]) # print df.shape print "\nStacked prod rules\n", "~" * 20 print " ", stacked_df.shape if args['verb']: print stacked_df.to_string() stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t") if os.path.exists( "../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname) print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20 stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name] iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df) print " ", iso_union.shape if args['verb']: print iso_union.to_string() print "\nIsomorphic intersection of the prod rules\n", "~" * 20 print " ", iso_interx.shape iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname)) if os.path.exists( '../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
gname = os.path.basename(fname).split('.')[0] print "... ", gname if args['sampling']: mapping_d = map_original_node_ids(fname) G1 = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) G = nx.relabel_nodes(G1, mapping_d) else: G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) num_nodes = G.number_of_nodes() num_edges = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname print "... info", nx.info(G) try: nx_edges_to_nddgo_graph(G, num_nodes, num_edges) except Exception, e: print 'ERROR, UNEXPECTED EXCEPTION' print str(e) traceback.print_exc() sys.exit(1) sys.exit(0)