def dimacs_nddgo_tree_simple(fname, heuristic): Info("dimacs nddgo tree simple") ret_lst = [] nddgoout = "" outfname = "{}_{}.tree".format(fname, heuristic) if os.path.exists(outfname): Info("{} already exists".format(outfname)) return (None, None) # p = mp.Pool(processes=2) if platform.system() == "Linux": args = [ "bin/linux/serial_wis -f {} -nice -{} -w {} -decompose_only". format(fname, heuristic, outfname) ] #print "\t", args else: args = [ "bin/mac/serial_wis -f {} -nice -{} -w {} -decompose_only".format( fname, heuristic, outfname) ] #pp.pprint(args) popen = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) popen.wait() out, err = popen.communicate() print(out, err) return (out, err)
def base_graph_edgelist_to_prod_rules(pickle_fname): """ if lcc has more than 500 nodes sample the lcc 2 x 300 lcc1,lcc2 <- sample_graph(g, 2, 300) edgelist <- lcc1,lcc2 1prs_out <- tree1, tree2 :param pickle_fname: :return: """ Info("base_graph_edgelist_to_prod_rules") G = nx.read_gpickle(pickle_fname) subgraph = max(nx.connected_component_subgraphs(G), key=len) results = [] if subgraph.number_of_nodes() > 500: for k, Gprime in enumerate(gs.rwr_sample(subgraph, 2, 300)): # ret generator print k gname = os.path.basename(pickle_fname).rstrip('.p') Gprime.name = gname cc_fname = write_tmp_edgelist(Gprime, k) # subgraph to temp edgelist results.append(cc_fname) else: cc_fname = write_tmp_edgelist(G) results.append(cc_fname) return results
def explode_to_trees(files, results_trees): """ explode into tree decompositions :param files: :param results_trees: :return: """ Info("Explode to trees") var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for j, dimacs_file in enumerate(files): results = [] p = mp.Pool(processes=2) for vael in var_els: print "\t", vael # dimacs_nddgo_tree_simple(dimacs_file[0], vael) p.apply_async(dimacs_nddgo_tree_simple, args=( dimacs_file[0], vael, ), callback=collect_results_trees) p.close() p.join() # What is below? if j == 0: asp_arr = np.array(results) continue prs_np = np.array(results) asp_arr = np.append(asp_arr, prs_np) print "pp.pprint(asp_arr)" pp.pprint(asp_arr)
def main(args): orig_fname = args['orig'][0] gname = graph_name(orig_fname) Info(os.getcwd()) dir = "../datasets" p_files = [ x[0] + "/" + f for x in os.walk(dir) for f in x[2] if f.endswith(".p") ] orig_p = [x for x in p_files if gname in x] print if not len(orig_p): print("converting to gpickle", "\n", "-" * 40) g = load_edgelist(orig_fname) nx.write_gpickle(g, dir + "/{}.p".format(gname)) orig_p = dir + "/{}.p".format(gname) results = [] transform_edgelist_to_dimacs([orig_fname]) # files = [x.rstrip(".p") for x in orig_p] # print files # exit() # print results_trees = [] explode_to_tree(orig_fname, results_lst) # pp.pprint( [x[0]+"/"+f for x in os.walk(dir) for f in x[2] if f.endswith(".tree")]) # results_prs =[] # print star_dot_trees_to_prod_rules([orig_fname], results_lst)
def convert_nx_gObjs_to_dimacs_gObjs(nx_gObjs): Info( "Take list of graphs and convert to dimacs:convert_nx_gObjs_to_dimacs") dimacs_glst = [] for G in nx_gObjs: N = max(G.nodes()) M = G.number_of_edges() from core.arbolera import nx_edges_to_nddgo_graph dimacs_glst.append(nx_edges_to_nddgo_graph(G, n=N, m=M, save_g=True)) return dimacs_glst
def write_tmp_edgelist(sg, k): Info("write_tmp_edgelist") from core.graph_format_converter import edgelist_in_dimacs_out if k is None: tmp_f = "../datasets/{}.tsv".format(sg.name) else: tmp_f = "../datasets/{}_{}.tsv".format(sg.name, k) try: nx.write_edgelist(sg, tmp_f, data=False) # edgelist_in_dimacs_out(tmp_f)# # print "::> edgelist to dimacs %s" % tmp_f except Exception, e: print str(e) exit()
def transform_edgelist_to_dimacs(files, f_str=None): """ edgelist to dimacs graph format :param files: list of file path(s) :return: """ Info("edgelist to dimacs graph format\n" + "-" * 40) rslt = [] for f in files: g = nx.read_gpickle(f) g.name = graph_name(f) rslt.append(convert_nx_gObjs_to_dimacs_gObjs([g])) return rslt
def write_prod_rules_to_tsv(prules, out_name): Info("write_prod_rules_to_tsv") from pandas import DataFrame df = DataFrame(prules) # print "out_tdfname:", out_name if not os.path.exists("../ProdRules"): os.mkdir("../ProdRules") try: df.to_csv("../ProdRules/" + out_name, sep="\t", header=False, index=False) finally: print "\tWrote", "../ProdRules/" + out_name if os.path.exists("../ProdRules/" + out_name): print "Wrote", "../ProdRules/" + out_name
def new_main(args): if not (args['base'] is None): Info("<- converts to dimacs") gn = graph_name(args['base'][0]) f = "../datasets/" + gn + "*.p" files = glob(f) dimacs_lst = transform_edgelist_to_dimacs(files) results = [] trees = explode_to_trees(dimacs_lst, results) pp.pprint(files) pp.pprint(dimacs_lst) pp.pprint(trees) print pp.pprint(results) exit(0) elif not (args['orig'] is None): Info("<- converts edgelist gpickle") f = args['orig'][0] g = load_edgelist(f) # full graph Info("# of conn comp: %d" % len(list(nx.connected_component_subgraphs(g)))) g = largest_conn_comp(f) # largerst conn comp if isinstance(g, list): for k, Gprime in enumerate(g): subg_out_fname = max(graph_name(f).split("."), key=len) subg_out_fname = "../datasets/" + subg_out_fname subg_out_fname += "_{}.p".format(k) cc_fname = nx.write_gpickle( Gprime, subg_out_fname) # subgraph to temp edgelist if os.path.exists(subg_out_fname): Info("Wrote %s" % subg_out_fname) else: subg_out_fname = max(graph_name(f).split("."), key=len) subg_out_fname = "../datasets/" + subg_out_fname subg_out_fname += ".p" cc_fname = nx.write_gpickle(g, subg_out_fname) if os.path.exists(subg_out_fname): Info("Wrote %s" % subg_out_fname) print("done") exit() elif not (args['edgelist2dimacs'] is None): f = args['edgelist2dimacs'][0] pfname = graph_name(f) pfname = "../datasets/{}.p".format(pfname) if not os.path.exists(pfname): Info("File not found, please run:") Info(" python explodingTree.py --orig path/to/edgelist") G = load_edgelist(f) subgraph = max(nx.connected_component_subgraphs(G), key=len) gprime_lst = [] if subgraph.number_of_nodes() > 500: for j, Gprime in enumerate(gs.rwr_sample(subgraph, 2, 300)): Gprime.name = G.name + "_%d" % j gprime_lst.append(convert_graph_obj_2dimacs([Gprime])) print[x for x in gprime_lst] elif not (args['prules'] is None): gn = graph_name(args['prules'][0]) print gn f = "../datasets/" + gn + "*.tree" files = glob(f) f = "../datasets/" + gn + "*.p" graphs = glob(f) for g in graphs: for f in files: dimacs_td_ct_fast(g, f) # dimacs to tree (decomposition) exit(0) elif not (args['td'] is None): origG = args['td'][0] dimacs_f = glob("../datasets/" + graph_name(args['td'][0]) + "*.dimacs") ''' "Explode to trees" ''' # ToDo var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for j, f in enumerate(dimacs_f): print f gn = graph_name(f) dimacs_file = "../datasets/{}.dimacs".format(gn) p = mp.Pool(processes=2) for vael in var_els: p.apply_async(dimacs_nddgo_tree_simple, args=( dimacs_file, vael, ), callback=collect_results_trees) # xt.dimacs_nddgo_tree_simple(f, vael) p.close() p.join() # dimacs_td_ct_fast(oriG, tdfname) # dimacs to tree (decomposition) else: sys.exit(0) # dimacs_convert_orig_graph(args['orig']) pickle_fname = "../datasets/" + f + ".p" g = nx.read_gpickle(pickle_fname) subgraph = max(nx.connected_component_subgraphs(g), key=len) if subgraph.number_of_nodes() > 500: for Gprime in gs.rwr_sample(subgraph, 2, 300): edgelist_in_dimacs_out(Gprime)
nx.write_gpickle (G, "../datasets/{}.p".format (gn)) T = quickbb (G) root = list (T)[0] T = make_rooted (T, root) T = binarize (T) root = list (T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) new_visit (T, G, prod_rules) ## return prod_rules if __name__ == '__main__': if len (sys.argv) < 2: Info ("Usage:") Info ("python xplotree_subgraphs_prs.py path/to/orig_net_edgelist") sys.exit (1) elif sys.argv[1] == "-ut": fname = "/Users/sal.aguinaga/KynKon/datasets/out.as20000102" else: fname = sys.argv[1] if not os.path.exists (fname): Info ("Path to edgeslits does not exists.") sys.exit (1) gn = graph_name (fname) prsfname = '../ProdRules/{}.tsv.phrg.prs'.format(gn) if os.path.exists(prsfname): Info('{} already exists'.format(prsfname)) sys.exit(0)
# prs_files = glob("ProdRules/{}*prs".format(gn)) # staked_prs_df = stack_prod_rules_bygroup_into_list(prs_files) # from core.stacked_prod_rules # print "*************" # recompute the probabilities for the group of prs # df = recompute_probabilities(staked_prs_df) # from core.baseball # # test if stacked prs can fire # stck_fired = probe_stacked_prs_likelihood_tofire(df, graph_name(f), el_base_info_d[graph_name(f)]) # print (stck_fired) # # # break if __name__ == '__main__': import sys from core.utils import graph_name if len(sys.argv) < 2: Info("add an out.* dataset with its full path") exit() f = sys.argv[1] f = "../datasets/" + graph_name(f) + "*.tree" ftrees = glob(f) orig = sys.argv[ 1] #"/Users/sal.aguinaga/KynKon/datasets/out.karate_club_graph" from core.utils import graph_name import networkx as nx gn = graph_name(orig) f = "../datasets/" + gn + "*.p" results = [] for p in glob(f): pp.pprint(p)