def edgelist_in_dimacs_out(ifname): ''' args: graph is the input nx graph returns: output filename ''' g = nx.read_edgelist(ifname, data=False) g.name = graph_name(ifname) ofname = '../datasets/{}.dimacs'.format(g.name) if path.exists(ofname): return None n = g.number_of_nodes() m = g.number_of_edges() edges = g.edges() edges = [(int(e[0]), int(e[1])) for e in edges] df = pd.DataFrame(edges) df.sort_values(by=[0], inplace=True) with open(ofname, 'w') as f: f.write('c {}\n'.format(g.name)) f.write('p edge\t{}\t{}\n'.format(n + 1, m)) output_edges = lambda x: f.write("e\t{}\t{}\n".format( x[0] + 1, x[1] + 1)) df.apply(output_edges, axis=1) if path.exists(ofname): Info("Wrote: %s" % ofname) # ToDo: a few extra checks could be added return ofname
def proc_prod_rules_orig(fname): gn = graph_name(fname) df = pd.read_csv(fname, header=None, sep="\t") df['rhs'] = df[2].apply(listify_rhs) print df['rhs'].apply(lambda x: [k for k in x if 'N' in k]).head() df['rhs_n'] = df['rhs'].apply(lambda x: len([k for k in x if 'N' in k])) df['lhs_n'] = df[1].apply(lambda x: len(x.split(","))) print df.head()
def proc_prod_rules_single(fname): gn = graph_name(fname) PRS_dir = "../ProdRules/" files = glob(PRS_dir + "*{}*prs*".format(gn)) pp.pprint(files) # print(os.getcwd()) # print rhs_nonterm_nbrs = lambda RHS: [x for x in RHS if "N" in RHS] mdf = pd.DataFrame() for f in files: df = pd.read_csv(f, header=None, sep="\t") df['varel'] = (os.path.basename(f).split(".")[2]) df['rhs'] = df[2].apply(listify_rhs) df['lhs_n'] = df[1].apply(lambda x: len(x.split(","))) df['rhs_n'] = df['rhs'].apply( lambda rhs: len([x for x in df['rhs'].values[0] if 'N' in x])) df['rhs_n'] = df['rhs'].apply( lambda x: len([k for k in x if 'N' in k])) # df['rhs_t'] = df['rhs'].apply(lambda rhs: len([x for x in df['rhs'].values[0] if 'N' in x]) ) # # df['rhs_t'] # print df.apply(lambda x: (len(x[1].split(",")), len(x['rhs'])), axis=1) # '"lhs:", len([x.split(',') for x in df[1]]), "rhs:", len(df['rhs'].values) mdf = pd.concat([df, mdf]) # print(mdf.head()) # print len(df.loc[0]['rhs']) # print (df['lhs'].shape) print df.head() gb = mdf.groupby('varel').groups # print (gb['lexm']) # print (df.describe()) # df.boxplot(ax=xa[1]) # print # print (mdf.tail()) fig, xa = plt.subplots(1, len(gb.keys())) # mdf.groupby('varel').hist(ax=xa[0]) # Visualize pairplot of df # sns.pairplot(mdf, hue='varel'); for j, ve in enumerate(gb.keys()): print(ve) # mdf[mdf['varel']==ve][['lhs_n','rhs_n']].hist(ax=xa[0]) # xa[0].histogram(mdf[mdf['varel']==ve].lhs_n) numBins = 4 xa[j].hist(mdf[mdf['varel'] == ve][['lhs_n', 'rhs_n']], numBins, alpha=0.8) if j == 0: xa[j].legend(('lhs_n', 'rhs_n')) xa[j].set_title(ve) # mdf[mdf['varel']==ve].hist(ax=xa[0],x=ve,y=) # mdf.loc(gb[ve].values).head() #.hist(ax=xa[j],label=ve) # print (mdf.loc(gb[ve])) plt.savefig('tmpfig', bbox_inches='tight')
def hstar_fixed_graph_gen(args): import networkx as nx orig_fname = args['grow'][0] gn = graph_name(orig_fname) if os.path.exists("../datasets/{}.p".format(gn)): origG = nx.read_gpickle("../datasets/{}.p".format(gn)) else: print("we load edgelist into an nx.obj") prs_files = glob("../ProdRules/{}*prs".format(gn)) for f in prs_files: prod_rules = get_prod_rules(f) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: # print (id, lhs, rhs, prob) g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # exit() # Takes this out # ToDo: We nee to get these rules in the right format num_nodes = origG.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) import pickle pickle.dump({ 'origG': origG, 'hstars': Hstars }, open('../Results/{}_hstars.p'.format(gn), "wb")) if os.path.exists('../Results/{}_hstars.p'.format(gn)): print("Pickle written")
def edgelist_to_dimacs(fname): g = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) g.name = graph_name(fname) dimacsFiles = convert_nx_gObjs_to_dimacs_gObjs([g]) return dimacsFiles #convert_nx_gObjs_to_dimacs_gObjs([g])
from utils import Info, graph_name import sys, os import pprint as pp from glob import glob from isomorph_overlap_hl import stack_prod_rules_bygroup_into_list from prs import proc_prod_rules_orig results = [] def prs_count_per(prs_lst): for f in prs_lst: pp.pprint([os.path.basename(f), len(open(f).readlines())]) if __name__ == '__main__': if len(sys.argv) < 2: Info("add an out.* dataset with its full path") exit() f = sys.argv[1] gn = graph_name(f) f = "../ProdRules/" + gn + "*.prs" files = glob(f) prs_cnt_per = prs_count_per(files) # prs_stack = stack_prod_rules_bygroup_into_list(files) sys.exit(0)