def lc(model='aae', rep=0): p = ba.loadfile(f"{path}/ptest_{rep}.pick") n = ba.loadfile(f"{path}/ntest_{rep}.pick") scorer = main.make_scorer(p, n) p = rdk.moses_to_nx(f"{path}/ptrain_{rep}.csv") n = rdk.moses_to_nx(f"{path}/ntrain_{rep}.csv") combined = [] base = [] newonly = [] opALL = list(rdk.smi_to_nx(f"{path}/ptrain_{rep}/{model}/gen")) onALL = list(rdk.smi_to_nx(f"{path}/ntrain_{rep}/{model}/gen")) for size in main.args.trainsizes: op = opALL[:size] on = onALL[:size] f = lambda a, b: (main.vectorize(a + b), [1] * len(a) + [0] * len(b)) newonly.append(scorer(f(op, on))) #base.append(scorer(f(p,n))) #combined.append(scorer(f(p+op,n+on))) return combined, base, newonly
def getnx(fname): cachename = fname + ".cache" if os.path.isfile(cachename): return ba.loadfile(cachename) with gzip.open(fname, 'rb') as fi: smiles = fi.read() atomz = list( rut.smiles_strings_to_nx( [line.split()[1] for line in smiles.split(b'\n')[:-1]])) random.seed(123) random.shuffle(atomz) ba.dumpfile(atomz, cachename) return atomz
def __init__(self, fname, fname_int): self.fname = fname if os.path.isfile(fname): self.data = ba.loadfile(fname) else: self.data = {} self.fname_int = fname_int if os.path.isfile(fname_int): self.data_int = ba.jloadfile(fname_int) else: self.data_int = {} # q_t:interval '''
def loadsmi(fname, randseed=123): '''can we load from cache?''' cachename = fname + ".cache" if os.path.isfile(cachename): graphs = ba.loadfile(cachename) else: g = list(rut.smi_to_nx(fname)) graphs = lu.pre_process(g) ba.dumpfile(graphs, cachename) random.seed(randseed) random.shuffle(graphs) return graphs
def getnx(fname, randseed=123): '''can we load from cache?''' cachename = fname + ".cache" if os.path.isfile(cachename): graphs = ba.loadfile(cachename) else: '''if not load normaly and write a cache''' with gzip.open(fname, 'rb') as fi: smiles = fi.read() graphs = list( rut.smiles_strings_to_nx( [line.split()[1] for line in smiles.split(b'\n')[:-1]])) graphs = lu.pre_process(graphs) ba.dumpfile(graphs, cachename) '''shuffle and return''' random.seed(randseed) random.shuffle(graphs) return graphs
also i should write an optimizer and add it to the ubergauss BAYES DOCS ARE: the idea is that we want to optimize a graph while using just few tries - load some graphs and pca - define an oracle - ask a few graphs - use scores and pca projection to do bayesian opt -> pca point -> inquire -> repeat ''' def poptimizer(optimizer): print([list(row) for row in optimizer._space._params]) print(list(optimizer._space._target)) graphs = ba.loadfile('../graken/chemtasks/119') random.shuffle(graphs) graphs = graphs[:100] #pca = KernelPCA(2, fit_inverse_transform = True)# might want to do normal PCA to 100 and then kernel.. #pca = PCA(2)# might want to do normal PCA to 100 and then kernel.. pca = TruncatedSVD(3)# might want to do normal PCA to 100 and then kernel.. vectorizer= eg.Vectorizer(r=2, d=1) graphsV = vectorizer.transform(graphs) zz = pca.fit_transform(graphsV) from sklearn.neighbors import NearestNeighbors as NN nn = NN(n_neighbors = 50 , metric = 'cosine').fit(graphsV) def oracle(graph): return -abs( sum([label=='C' for n,label in graph.nodes(data='label')]) - 10)
def res(indices, reps): print(dnames) for i in range(indices): indexrepeats = np.array( [ba.loadfile(f"res/{i}_{r}") for r in range(reps)]) print(indexrepeats.mean(axis=0).tolist())
0.6686666666666667, 0.6666666666666666 ], [ 0.33099999999999996, 0.3333333333333333, 0.33899999999999997, 0.3383333333333333, 0.3506666666666667, 0.3626666666666667 ]] b = [[ 0.014055445761538672, 0.010208928554075711, 0.01574801574802361, 0.018018509002319456, 0.03397057550292604, 0.01517307556898807 ], [ 0.015151090903151363, 0.017987650084309404, 0.020607442021431662, 0.011440668201153687, 0.008576453553512412, 0.01222929088522944 ], [ 0.012247448713915879, 0.013021349989749726, 0.015895492023421807, 0.025315783394730028, 0.015584892970081268, 0.032714251057027466 ]] x = [200, 400, 600, 800, 1000, 1200] #x,a,b,c = ba.loadfile("char_rnn.pickle") #x = [200,400,600,800,1200] #print (x,a,b) x, means, b, c = ba.loadfile(sys.argv[1]) print(" ", x[0]) for a, b in zip(means, ['combined ', 'original ', 'generated']): print(b, ' '.join(["%.3f" % aa for aa in a])) #learncurve(x,means,b,c )