Exemple #1
0
def main(iteration, quantile, uncertainty, prior_name, name, oracle):
    # Aggregate docking results using previous gamma
    score_dict = gather_scores(iteration, name)

    # Memoization of the sampled compounds, if they are docking scores
    # if oracle == 'docking':
    #     print('doing memoization')
    #     whole_path = os.path.join(script_dir, '..', 'data', 'drd3_scores.pickle')
    #     docking_whole_results = pickle.load(open(whole_path, 'rb'))
    #     # Only update memoization for successful dockings
    #     new_results = {key: value for key, value in score_dict.items() if value < 0}
    #     docking_whole_results.update(new_results)
    #     pickle.dump(docking_whole_results, open(whole_path, 'wb'))

    # Reweight and discard wrong samples
    dump_path = os.path.join(script_dir, 'results', name, 'samples.p')
    samples, weights = pickle.load(open(dump_path, 'rb'))

    dumper = Dumper()
    json_path = os.path.join(script_dir, 'results', name, 'params_gentrain.json')
    params = dumper.load(json_path)
    gamma = params['gamma']

    samples, weights, gamma = process_samples(score_dict, samples, weights, uncertainty=uncertainty, quantile=quantile,
                                              oracle=oracle, prev_gamma=gamma)
    params['gamma'] = gamma
    dumper.dump(dict_to_dump=params, dumping_path=json_path)
    params.pop('gamma')

    # Load an instance of previous model
    search_model = model_from_json(prior_name)

    # Retrieve the gentrain object and feed it with updated model
    savepath = os.path.join(params['savepath'], 'weights.pth')
    search_model.load(savepath)
    search_trainer = GenTrain(search_model, **params)

    # send to device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    search_model.to(device)
    search_trainer.device = device
    search_trainer.load_optim()

    # Update search model
    search_trainer.step('smiles', samples, weights)

    # Add model dumping at each epoch
    weights_path = os.path.join(search_trainer.savepath, f"weights_{iteration}.pth")
    torch.save(search_trainer.model.state_dict(), weights_path)
Exemple #2
0
        logprob = logprob * one_hot
        logprob_x = torch.sum(logprob.reshape(z.shape[0], -1), dim=1)

    return logprob_x.cpu()


if __name__ == '__main__':
    from utils import *
    from dgl_utils import send_graph_to_device
    from model import model_from_json
    import numpy as np

    print('Testing for a random batch of 12 molecules')

    model = model_from_json('kekule')

    x = np.random.randint(0, 33, size=(12, 54))
    x = torch.tensor(x, dtype=torch.long)

    z = model.sample_z_prior(n_mols=12)

    true_dec = model.decode(z)
    _, true_dec = torch.max(true_dec, dim=1)

    l_true = GenProb(true_dec, z, model)
    l = GenProb(x, z, model)

    print('logprob of the true decoded x |z ', l_true.cpu().detach())
    print('logprob of a randomly sampled x |z ', l.cpu().detach())
Exemple #3
0
# Loading the model :

# Loader for initial sample
loader = Loader(props=[],
                targets=[],
                csv_path=None,
                maps_path='../map_files',
                alphabet_name=alphabet,
                vocab='selfies',
                num_workers=0,
                test_only=True)

# Load model (on gpu if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # the model device
gp_device = 'cpu'  #'cuda' if torch.cuda.is_available() else 'cpu' # gaussian process device
model = model_from_json(model_name)
model.to(device)
model.eval()

iteration = 0

# ============ Iter loop ===============
while iteration < args.n_iters:

    # We fit the GP

    np.random.seed(iteration * random_seed)
    M = 500
    sgp = SparseGP(X_train, 0 * X_train, y_train, M)
    sgp.train_via_ADAM(X_train, 0 * X_train, y_train, X_test, X_test * 0,  \
        y_test, minibatch_size = 10 * M, max_iterations = args.epochs, learning_rate = 0.0005)
Exemple #4
0
                        "--cutoff",
                        help="Number of molecules to embed. -1 for all",
                        type=int,
                        default=-1)
    parser.add_argument('-n', '--name', type=str, default='inference_default')
    parser.add_argument('-v', '--vocab', type=str, default='selfies')
    parser.add_argument('-d', '--decode', action='store_true')
    parser.add_argument('--pca', action='store_false')  # PCA space plot

    # =====================

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    args, _ = parser.parse_known_args()

    # Load model (on gpu if available)
    model = model_from_json(args.name)
    model.to(device)
    model.eval()

    # Load dataframe with mols to embed
    if args.cutoff > 0:
        smiles_df = pd.read_csv(args.input, index_col=0,
                                nrows=args.cutoff)  # cutoff csv at nrows
    else:
        smiles_df = pd.read_csv(args.input, index_col=0)

    # Initialize dataloader with empty dataset
    dataloader = Loader(maps_path='map_files/',
                        vocab=args.vocab,
                        build_alphabet=False,
                        n_mols=args.cutoff,
Exemple #5
0
import model 
import data
import sys

# PREMENNE
test_dir = "../data/test"
generated_dir = "../data/test_generated"
if (len(sys.argv) > 1):
    num_of_test = int(sys.argv[1]) # zistit pocet od pouzivatela
else:
    num_of_test = 4

# NACITAT MODEL
# load json and create model
json_file = open('../model/modelStructure.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
myModel = model.model_from_json(loaded_model_json)
# load weights into new model
myModel.load_weights("../model/modelWeights.h5")
print("Model loaded from disk") 

# evaluate loaded model on test data
myModel.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


# SPUSTIT NA TESTOVACICH VZORKACH
testGene = data.testGenerator(test_dir) #generated_dir)
results = myModel.predict_generator(testGene,num_of_test,verbose=1)
data.saveResult(test_dir,results)
Exemple #6
0
        'lr': args.learning_rate,
        'clip_grad': args.clip_grad_norm,
        'beta': args.beta,
        'processes': args.procs,
        'optimizer': args.opti,
        'scheduler': args.sched,
        'alphabet_name': args.alphabet_name,
        'gamma': -1000,
        'DEBUG': True
    }
    dumper = Dumper(dumping_path=os.path.join(savepath,
                                              'params_gentrain.json'),
                    dic=params_gentrain)
    dumper.dump()

    prior_model_init = model_from_json(args.prior_name)
    print(prior_model_init)
    torch.save(prior_model_init.state_dict(),
               os.path.join(savepath, "weights.pth"))
    id_train = None

    for iteration in range(1, args.iters + 1):
        # SAMPLING
        slurm_sampler_path = os.path.join(script_dir, 'slurm_sampler.sh')
        if id_train is None:
            cmd = f'sbatch {slurm_sampler_path}'
        else:
            cmd = f'sbatch --depend=afterany:{id_train} {slurm_sampler_path}'
        extra_args = f' {args.prior_name} {args.name} {args.max_samples} {args.oracle} {args.cap_weights}'
        cmd = cmd + extra_args
        a = subprocess.run(cmd.split(),
Exemple #7
0
def main(prior_name, name, max_samples, diversity_picker, oracle, w_min):
    prior_model = model_from_json(prior_name)

    # We start by creating another prior instance, then replace it with the actual weights
    # name = search_vae
    search_model = model_from_json(prior_name)
    model_weights_path = os.path.join(script_dir, 'results', name,
                                      'weights.pth')
    search_model.load(model_weights_path)

    samples, weights = get_samples(prior_model,
                                   search_model,
                                   max=max_samples,
                                   w_min=w_min)

    # if diversity picker < max_samples, we subsample with rdkit picker :
    if 0 < diversity_picker < max_samples:
        mols = [Chem.MolFromSmiles(s) for s in samples]
        fps = [GetMorganFingerprint(x, 3) for x in mols]
        picker = MaxMinPicker()

        def distij(i, j, fps=fps):
            return 1 - DataStructs.DiceSimilarity(fps[i], fps[j])

        pickIndices = picker.LazyPick(distij, max_samples, diversity_picker)
        idces = list(pickIndices)
        samples = [samples[i] for i in idces]
        weights = [weights[i] for i in idces]

    # Since we don't maintain a dict for qed, we just give everything to the docker
    if oracle != 'docking' or True:
        dump_path = os.path.join(script_dir, 'results', name,
                                 'docker_samples.p')
        pickle.dump(samples, open(dump_path, 'wb'))

        # Dump for the trainer
        dump_path = os.path.join(script_dir, 'results', name, 'samples.p')
        pickle.dump((samples, weights), open(dump_path, 'wb'))

    else:
        # Memoization, we split the list into already docked ones and dump a simili-docking csv
        whole_path = os.path.join(script_dir, '..', 'data',
                                  'drd3_scores.pickle')
        docking_whole_results = pickle.load(open(whole_path, 'rb'))
        filtered_smiles = list()
        already_smiles = list()
        already_scores = list()
        for i, smile in enumerate(samples):
            if smile in docking_whole_results:
                already_smiles.append(smile)
                already_scores.append(docking_whole_results[smile])
            else:
                filtered_smiles.append(smile)

        # Dump simili-docking
        dump_path = os.path.join(script_dir, 'results', name,
                                 'docking_small_results', 'simili.csv')
        df = pd.DataFrame.from_dict({
            'smile': already_smiles,
            'score': already_scores
        })
        df.to_csv(dump_path)

        # Dump for the docker
        dump_path = os.path.join(script_dir, 'results', name,
                                 'docker_samples.p')
        pickle.dump(filtered_smiles, open(dump_path, 'wb'))

        # Dump for the trainer
        dump_path = os.path.join(script_dir, 'results', name, 'samples.p')
        pickle.dump((samples, weights), open(dump_path, 'wb'))