Ejemplo n.º 1
0
def lc(model='aae', rep=0):
    p = ba.loadfile(f"{path}/ptest_{rep}.pick")
    n = ba.loadfile(f"{path}/ntest_{rep}.pick")
    scorer = main.make_scorer(p, n)
    p = rdk.moses_to_nx(f"{path}/ptrain_{rep}.csv")
    n = rdk.moses_to_nx(f"{path}/ntrain_{rep}.csv")

    combined = []
    base = []
    newonly = []
    opALL = list(rdk.smi_to_nx(f"{path}/ptrain_{rep}/{model}/gen"))
    onALL = list(rdk.smi_to_nx(f"{path}/ntrain_{rep}/{model}/gen"))
    for size in main.args.trainsizes:
        op = opALL[:size]
        on = onALL[:size]
        f = lambda a, b: (main.vectorize(a + b), [1] * len(a) + [0] * len(b))
        newonly.append(scorer(f(op, on)))
        #base.append(scorer(f(p,n)))
        #combined.append(scorer(f(p+op,n+on)))
    return combined, base, newonly
Ejemplo n.º 2
0
def getnx(fname):
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        return ba.loadfile(cachename)
    with gzip.open(fname, 'rb') as fi:
        smiles = fi.read()
    atomz = list(
        rut.smiles_strings_to_nx(
            [line.split()[1] for line in smiles.split(b'\n')[:-1]]))
    random.seed(123)
    random.shuffle(atomz)
    ba.dumpfile(atomz, cachename)
    return atomz
Ejemplo n.º 3
0
    def __init__(self, fname, fname_int):
        self.fname = fname
        if os.path.isfile(fname):
            self.data = ba.loadfile(fname)
        else:
            self.data = {}

        self.fname_int = fname_int
        if os.path.isfile(fname_int):
            self.data_int = ba.jloadfile(fname_int)
        else:
            self.data_int = {}  # q_t:interval
        '''
Ejemplo n.º 4
0
def loadsmi(fname, randseed=123):
    '''can we load from cache?'''
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        graphs = ba.loadfile(cachename)
    else:
        g = list(rut.smi_to_nx(fname))
        graphs = lu.pre_process(g)
        ba.dumpfile(graphs, cachename)

    random.seed(randseed)
    random.shuffle(graphs)
    return graphs
Ejemplo n.º 5
0
def getnx(fname, randseed=123):
    '''can we load from cache?'''
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        graphs = ba.loadfile(cachename)
    else:
        '''if not load normaly and write a cache'''
        with gzip.open(fname, 'rb') as fi:
            smiles = fi.read()
        graphs = list(
            rut.smiles_strings_to_nx(
                [line.split()[1] for line in smiles.split(b'\n')[:-1]]))
        graphs = lu.pre_process(graphs)
        ba.dumpfile(graphs, cachename)
    '''shuffle and return'''
    random.seed(randseed)
    random.shuffle(graphs)
    return graphs
Ejemplo n.º 6
0
also i should write an optimizer and add it to the ubergauss

BAYES DOCS ARE:
the idea is that we want to optimize a graph while using just few tries
- load some graphs and pca
- define an oracle
- ask a few graphs
- use scores and pca projection to do bayesian opt -> pca point -> inquire -> repeat
'''


def poptimizer(optimizer):
    print([list(row) for row in optimizer._space._params])
    print(list(optimizer._space._target))

graphs = ba.loadfile('../graken/chemtasks/119')
random.shuffle(graphs)
graphs = graphs[:100]
#pca = KernelPCA(2, fit_inverse_transform = True)# might want to do normal PCA to 100 and then kernel..
#pca = PCA(2)# might want to do normal PCA to 100 and then kernel..
pca = TruncatedSVD(3)# might want to do normal PCA to 100 and then kernel..
vectorizer= eg.Vectorizer(r=2, d=1)
graphsV = vectorizer.transform(graphs)
zz = pca.fit_transform(graphsV)
from sklearn.neighbors import NearestNeighbors as NN
nn = NN(n_neighbors = 50 , metric = 'cosine').fit(graphsV)



def oracle(graph):
    return -abs( sum([label=='C' for n,label in graph.nodes(data='label')])  - 10)
Ejemplo n.º 7
0
def res(indices, reps):
    print(dnames)
    for i in range(indices):
        indexrepeats = np.array(
            [ba.loadfile(f"res/{i}_{r}") for r in range(reps)])
        print(indexrepeats.mean(axis=0).tolist())
Ejemplo n.º 8
0
         0.6686666666666667, 0.6666666666666666
     ],
     [
         0.33099999999999996, 0.3333333333333333, 0.33899999999999997,
         0.3383333333333333, 0.3506666666666667, 0.3626666666666667
     ]]

b = [[
    0.014055445761538672, 0.010208928554075711, 0.01574801574802361,
    0.018018509002319456, 0.03397057550292604, 0.01517307556898807
],
     [
         0.015151090903151363, 0.017987650084309404, 0.020607442021431662,
         0.011440668201153687, 0.008576453553512412, 0.01222929088522944
     ],
     [
         0.012247448713915879, 0.013021349989749726, 0.015895492023421807,
         0.025315783394730028, 0.015584892970081268, 0.032714251057027466
     ]]

x = [200, 400, 600, 800, 1000, 1200]

#x,a,b,c = ba.loadfile("char_rnn.pickle")
#x = [200,400,600,800,1200]
#print (x,a,b)
x, means, b, c = ba.loadfile(sys.argv[1])
print("         ", x[0])
for a, b in zip(means, ['combined ', 'original ', 'generated']):
    print(b, ' '.join(["%.3f" % aa for aa in a]))
#learncurve(x,means,b,c )