def can_the_intersection_fire(args): print print "Test intersectin (isomorphic) production rules subset" print "-" * 40 from core.baseball import recompute_probabilities from core.will_prod_rules_fire import probe_stacked_prs_likelihood_tofire from explodingTree import graph_name prs_file = args['fire'] origG = load_edgelist(args['orig']) origG.name = graph_name(args['orig']) nbrnodes = origG.number_of_nodes() # edgelist base info dict # from core.utils import edgelist_basic_info # el_base_info_d = edgelist_basic_info([args['orig']]) # import pprint as pp # pp.pprint(el_base_info_d) stacked_prs_fsf = stack_prod_rules_bygroup_into_list([prs_file]) # from core.stacked_prod_rules df = recompute_probabilities(stacked_prs_fsf) # from core.baseball, recompute the probabilities stck_fired = probe_stacked_prs_likelihood_tofire(df, graph_name(args['orig']), nbr_nodes= nbrnodes) # origG.number_of_nodes()) # el_base_info_d[graph_name(args['orig'])]) # can stacked prs fire? # Info("{}".format(stck_fired['fired_b'])) # print type(stck_fired) # exit() pickle.dump({'origG': origG, 'Hstars': stck_fired[1]}, open('../Results/{}_hstr_from_prs_intxn.p'.format(origG.name), "wb")) if os.path.exists('../Results/{}_hstr_from_prs_intxn.p'.format(origG.name)): print ("Pickle written")
def explode_to_trees(files, results_trees): print("\nExplode to trees") print("-" * 40) var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] if len(files) == 1: gn = xt.graph_name(files) dimacs_file = "../datasets/{}.dimacs".format(gn) print( " ", gn, ) exit() p = mp.Pool(processes=2) for vael in var_els: p.apply_async(xt.dimacs_nddgo_tree_simple, args=( dimacs_file, vael, ), callback=collect_results) # xt.dimacs_nddgo_tree_simple(f, vael) p.close() p.join() print(results_lst) for j, f in enumerate(files): gn = xt.graph_name(f) dimacs_file = "../datasets/{}.dimacs".format(gn) print( " ", gn, ) p = mp.Pool(processes=2) for vael in var_els: p.apply_async(xt.dimacs_nddgo_tree_simple, args=( dimacs_file, vael, ), callback=collect_results) # xt.dimacs_nddgo_tree_simple(f, vael) p.close() p.join() print(results_lst) if j == 0: asp_arr = np.array(results_trees) continue prs_np = np.array(results_trees) asp_arr = np.append(asp_arr, prs_np)
def main(): parser = get_parser() inargs = vars(parser.parse_args()) print inargs ifname = inargs['orig'][0] gname = graph_name(ifname) #fgFiles = glob('FakeGraphs/*'+gname +"*") #print (len(fgFiles), "number of files") print("%% EvalUnion %%") runEvalUnion(gname) exit() print("%%") prsfiles = glob('ProdRules/{}_lcc_{}.prs'.format(gname, [x for x in [0, 1]])) mdf = pd.DataFrame() # masterDF for f in prsfiles: # concat prod rules files df = pd.read_csv(f, sep="\t", header=None) mdf = pd.concat([df, mdf]) mdf.to_csv('ProdRules/{}_concat.prs'.format(gname), sep="\t", header=None, index=None) return
def main(args): orig_fname = args['orig'][0] gname = graph_name(orig_fname) Info(os.getcwd()) dir = "../datasets" p_files = [ x[0] + "/" + f for x in os.walk(dir) for f in x[2] if f.endswith(".p") ] orig_p = [x for x in p_files if gname in x] print if not len(orig_p): print("converting to gpickle", "\n", "-" * 40) g = load_edgelist(orig_fname) nx.write_gpickle(g, dir + "/{}.p".format(gname)) orig_p = dir + "/{}.p".format(gname) results = [] transform_edgelist_to_dimacs([orig_fname]) # files = [x.rstrip(".p") for x in orig_p] # print files # exit() # print results_trees = [] explode_to_tree(orig_fname, results_lst) # pp.pprint( [x[0]+"/"+f for x in os.walk(dir) for f in x[2] if f.endswith(".tree")]) # results_prs =[] # print star_dot_trees_to_prod_rules([orig_fname], results_lst)
def main(): gname = graph_name(sys.argv[1]) print gname concat_prs = "ProdRules/{}_concat.prs".format(gname) if not os.path.exists(concat_prs): G = load_edgelist(sys.argv[1]) print "[<>]", "red the graph" lcc = max(nx.connected_component_subgraphs(G), key=len) # find largest conn component Glst = sample_rand_subgraphs_in(lcc) # print "[<>]", "got the Glst LCCs" concat_phrg_prod_rules([x for x in Glst], G.name) # subgraphs base prod rules dimacs_files = glob("datasets/{}*.dimacs".format(gname)) var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for gfname in dimacs_files: for ve in var_el_lst: multiprocessing.Process(target=dimacs_inddgo_tree_decomps, args=( ve, gfname, )).start() print "[<>]", "checks on the edgelist vs the orig graph" ## -- convert_dimacs_trees_to_cliquetrees(gname) print "[<>]", "convert_dimacs_trees_to_cliquetrees" ## -- elfiles = glob(".tmp_edgelists/{}*tsv".format(gname)) subgraphs = [load_edgelist(f) for f in elfiles] prod_rules = [] prod_rules = [ phrg.probabilistic_hrg_deriving_prod_rules(G) for G in subgraphs ] import itertools prod_rules = list(itertools.chain.from_iterable(prod_rules)) pd.DataFrame(prod_rules).to_csv(concat_prs, sep="\t", header=False, index=False) ## -- dimacs_files = glob("datasets/{}*.dimacs".format(gname)) var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] for gfname in dimacs_files: for ve in var_el_lst: multiprocessing.Process(target=dimacs_inddgo_tree_decomps, args=( ve, gfname, )).start() print "[<>]", "checks on the edgelist vs the orig graph" print "[<>]", "concat hrg prod_rules:", concat_prs
def get_phrg_production_rules_onsubgraphs(argmnts): args = argmnts gn = graph_name(args['orig'][0]) f = "../datasets/" + gn + "*.p" files = glob(f) prod_rules = {} rules = [] id = 0 for f in files: Gprime = nx.read_gpickle(f) Gprime = reset_graph_nodes(Gprime) pp.pprint(Gprime.nodes()) T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, Gprime, prod_rules) # Process(target=td.new_visit, args=(T, Gprime, prod_rules,)).start() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) # pp.pprint(df.values.tolist()); exit() df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(gn), header=False, index=False, sep="\t") if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(gn)): print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(gn) else: print "Trouble saving" '''
def transform_edgelist_to_dimacs(files): print("Transform to dimacs") print("-" * 40) p = mp.Pool(processes=2) for f in files: print(" {}".format(f)) gn = xt.graph_name(f) if os.path.exists('../datasets/{}.dimacs'.format(gn)): continue gfname = "../datasets/{}.p".format(gn) g = nx.read_gpickle(gfname) g.name = gn p.apply_async(xt.convert_nx_gObjs_to_dimacs_gObjs, args=([g], ), callback=collect_results) # xt.convert_nx_gObjs_to_dimacs_gObjs([g]) p.close() p.join()
def explode_to_tree(fname, results_trees): print("\nExplode to tree") print("-" * 40) var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm'] gn = xt.graph_name(str(fname)) dimacs_file = "../datasets/{}.dimacs".format(gn) p = mp.Pool(processes=2) for vael in var_els: p.apply_async(xt.dimacs_nddgo_tree_simple, args=( dimacs_file, vael, ), callback=collect_results) # xt.dimacs_nddgo_tree_simple(f, vael) p.close() p.join() if os.path.exists(dimacs_file): print("\n {}".format(dimacs_file))
def star_dot_trees_to_prod_rules(files, results_prs): print("Star dot trees to Production Rules") print("-" * 40) for j, f in enumerate(files): gn = xt.graph_name(f) trees = glob("../datasets/{}*.tree".format(gn)) pp = mp.Pool(processes=2) for t in trees: prs_fname = "../ProdRules/{}.prs".format(os.path.basename(t)) if os.path.exists(prs_fname): print(" {} file exits".format(prs_fname)) continue oriG = xt.load_edgelist(f) pp.apply_async(dimacs_td_ct_fast, args=( oriG, t, ), callback=collect_results) pp.close() pp.join() print(results_lst)
def get_phrg_production_rules (argmnts): args = argmnts t_start = time.time() df = tdf.Pandas_DataFrame_From_Edgelist(args['orig'])[0] if df.shape[1] == 4: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True) # whole graph elif df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph else: G = nx.from_pandas_dataframe(df, 'src', 'trg') G.name = graph_name(args['orig'][0]) print "==> read in graph took: {} seconds".format(time.time() - t_start) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = 2 n = 300 if num_nodes >= 500: print 'Grande' t_start = time.time() for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) Process(target=td.new_visit, args=(T, G, prod_rules,)).start() else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) # print_treewidth(T) # TODO: needs to be fixed # exit() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) # pp.pprint(df.values.tolist()); exit() df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(G.name), header=False, index=False, sep="\t") if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(G.name)): print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(G.name) else: print "Trouble saving" print "-----------" print [type(x) for x in rules[0]] '''
df = pd.concat([df,x]) if 0: print j, "shape", df.shape gb =df.groupby(['k']) print (gb['cc'].mean().to_string()) print "-"*10 orig__clust_coef = metrics.clustering_coefficients_single(G) gb = orig__clust_coef.groupby(['k']) print (gb['cc'].mean().to_string()) #synth_clust_coef = results ''' if __name__ == '__main__': parser = get_parser() args = vars(parser.parse_args()) gname = graph_name(args['orig'][0]) if args['nstats']: # main_network_stats(args) exit() elif args['chunglu']: print 'Generate chunglu graphs given an edgelist' sys.exit(0) elif args['kron']: print 'Generate chunglu graphs given an edgelist' sys.exit(0) elif args['prs']: print ('get_phrg_production_rules_onsubgraphs') get_phrg_production_rules_onsubgraphs(args) #elif args['samp']: # print 'Sample K subgraphs of n nodes'
def tst_prod_rules_isom_intrxn(fname, origfname): """ Test the isomorphic subset of rules :param fname: isom intersection rules file :param origfname: reference input network (dataset) edgelist file :return: """ # Get the original file fdf = Pandas_DataFrame_From_Edgelist([origfname]) origG = nx.from_pandas_dataframe(fdf[0], 'src', 'trg') origG.name = graph_name(origfname) print origG.name, "+" * 80 # Read the subset of prod rules df = pd.read_csv(fname, header=None, sep="\t", dtype={ 0: str, 1: list, 2: list, 3: float }) g = pcfg.Grammar('S') if not willFire_check(df): print "-" * 10, fname, "contains production rules that WillNotFire" return None else: print "+" * 40 # Process dataframe from td_isom_jaccard_sim import listify_rhs for (id, lhs, rhs, prob) in df.values: rhs = listify_rhs(rhs) g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob))) print "\n", "." * 40 #print 'Added the rules to the datastructure' num_nodes = origG.number_of_nodes() # print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) # print "Done with max size" Hstars = [] ofname = "FakeGraphs/" + origG.name + "_isom_ntrxn.shl" database = shelve.open(ofname) num_samples = 20 # print '~' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) print hstar.number_of_nodes(), hstar.number_of_edges() print '-' * 40 database['hstars'] = Hstars database.close()