def mk2set(ind ,randseed): p,n,psamp,nsamp,ptrain,ntrain = main.alt_lc_get_graphs(randseed) ba.dumpfile(p,f"{foldername}/ptest_{ind}.pick") ba.dumpfile(n,f"{foldername}/ntest_{ind}.pick") rdk.nx_to_moses(ptrain,f"{foldername}/ptrain_{ind}.csv") rdk.nx_to_moses(ntrain,f"{foldername}/ntrain_{ind}.csv")
def getnx(fname): cachename = fname + ".cache" if os.path.isfile(cachename): return ba.loadfile(cachename) with gzip.open(fname, 'rb') as fi: smiles = fi.read() atomz = list( rut.smiles_strings_to_nx( [line.split()[1] for line in smiles.split(b'\n')[:-1]])) random.seed(123) random.shuffle(atomz) ba.dumpfile(atomz, cachename) return atomz
def loadsmi(fname, randseed=123): '''can we load from cache?''' cachename = fname + ".cache" if os.path.isfile(cachename): graphs = ba.loadfile(cachename) else: g = list(rut.smi_to_nx(fname)) graphs = lu.pre_process(g) ba.dumpfile(graphs, cachename) random.seed(randseed) random.shuffle(graphs) return graphs
def getnx(fname, randseed=123): '''can we load from cache?''' cachename = fname + ".cache" if os.path.isfile(cachename): graphs = ba.loadfile(cachename) else: '''if not load normaly and write a cache''' with gzip.open(fname, 'rb') as fi: smiles = fi.read() graphs = list( rut.smiles_strings_to_nx( [line.split()[1] for line in smiles.split(b'\n')[:-1]])) graphs = lu.pre_process(graphs) ba.dumpfile(graphs, cachename) '''shuffle and return''' random.seed(randseed) random.shuffle(graphs) return graphs
def format_abc(a, b, c, sav='res.pickle'): cm = [np.mean(x) for x in list(zip(*a))] om = [np.mean(x) for x in list(zip(*b))] gm = [np.mean(x) for x in list(zip(*c))] cs = [np.std(x) for x in list(zip(*a))] os = [np.std(x) for x in list(zip(*b))] gs = [np.std(x) for x in list(zip(*c))] logger.log(51, f'combined {cm}') logger.log(51, f'originals {om}') logger.log(51, f'generated {gm}') logger.log(41, f'combined{cs}') logger.log(41, f'originals only{os}') logger.log(41, f'generated only{gs}') ts = np.array(args.trainsizes) gen = np.array([ e * ((args.n_steps - args.burnin) // args.emit + 1) for e in args.trainsizes ]) ba.dumpfile([(ts + gen, ts, gen), (cm, om, gm), (cs, os, gs), (a, b, c)], sav)
def get_params_punk(): trve = lambda pp: [i for d in [pp.a, pp.b] for i in d.obs['true'].values] dnames = "human1 human2 human3 human4 smartseq2 celseq2 celseq".split() loader = lambda x, seed: load.loadgruen_single( f"../data/punk/{x}", subsample=sampnum, seed=seed) return dnames, loader, trve if __name__ == "__main__": task, t2, rep = map(int, sys.argv[1].strip().split(' ')) dnames, loader, trve = get_params_100() other = dnames[t2] self = dnames[task] result = get_score(self, other, loader, trve, seed=rep) print("res: ", result) ba.dumpfile(result, "res/" + sys.argv[1].replace(" ", '_')) print("all good") # use median instead of mean! TODO # add error baro to plot -> quantiles! plot points! fit forrcoeff linear # check the seed value for subsampling # look at 100 def res(indices, reps): print(dnames) for i in range(indices): indexrepeats = np.array( [ba.loadfile(f"res/{i}_{r}") for r in range(reps)]) print(indexrepeats.mean(axis=0).tolist())
def save(self): ba.dumpfile(self.data, self.fname) ba.jdumpfile(self.data_int, self.fname_int)