def main(iteration, quantile, uncertainty, prior_name, name, oracle): # Aggregate docking results using previous gamma score_dict = gather_scores(iteration, name) # Memoization of the sampled compounds, if they are docking scores # if oracle == 'docking': # print('doing memoization') # whole_path = os.path.join(script_dir, '..', 'data', 'drd3_scores.pickle') # docking_whole_results = pickle.load(open(whole_path, 'rb')) # # Only update memoization for successful dockings # new_results = {key: value for key, value in score_dict.items() if value < 0} # docking_whole_results.update(new_results) # pickle.dump(docking_whole_results, open(whole_path, 'wb')) # Reweight and discard wrong samples dump_path = os.path.join(script_dir, 'results', name, 'samples.p') samples, weights = pickle.load(open(dump_path, 'rb')) dumper = Dumper() json_path = os.path.join(script_dir, 'results', name, 'params_gentrain.json') params = dumper.load(json_path) gamma = params['gamma'] samples, weights, gamma = process_samples(score_dict, samples, weights, uncertainty=uncertainty, quantile=quantile, oracle=oracle, prev_gamma=gamma) params['gamma'] = gamma dumper.dump(dict_to_dump=params, dumping_path=json_path) params.pop('gamma') # Load an instance of previous model search_model = model_from_json(prior_name) # Retrieve the gentrain object and feed it with updated model savepath = os.path.join(params['savepath'], 'weights.pth') search_model.load(savepath) search_trainer = GenTrain(search_model, **params) # send to device device = 'cuda' if torch.cuda.is_available() else 'cpu' search_model.to(device) search_trainer.device = device search_trainer.load_optim() # Update search model search_trainer.step('smiles', samples, weights) # Add model dumping at each epoch weights_path = os.path.join(search_trainer.savepath, f"weights_{iteration}.pth") torch.save(search_trainer.model.state_dict(), weights_path)
logprob = logprob * one_hot logprob_x = torch.sum(logprob.reshape(z.shape[0], -1), dim=1) return logprob_x.cpu() if __name__ == '__main__': from utils import * from dgl_utils import send_graph_to_device from model import model_from_json import numpy as np print('Testing for a random batch of 12 molecules') model = model_from_json('kekule') x = np.random.randint(0, 33, size=(12, 54)) x = torch.tensor(x, dtype=torch.long) z = model.sample_z_prior(n_mols=12) true_dec = model.decode(z) _, true_dec = torch.max(true_dec, dim=1) l_true = GenProb(true_dec, z, model) l = GenProb(x, z, model) print('logprob of the true decoded x |z ', l_true.cpu().detach()) print('logprob of a randomly sampled x |z ', l.cpu().detach())
# Loading the model : # Loader for initial sample loader = Loader(props=[], targets=[], csv_path=None, maps_path='../map_files', alphabet_name=alphabet, vocab='selfies', num_workers=0, test_only=True) # Load model (on gpu if available) device = 'cuda' if torch.cuda.is_available() else 'cpu' # the model device gp_device = 'cpu' #'cuda' if torch.cuda.is_available() else 'cpu' # gaussian process device model = model_from_json(model_name) model.to(device) model.eval() iteration = 0 # ============ Iter loop =============== while iteration < args.n_iters: # We fit the GP np.random.seed(iteration * random_seed) M = 500 sgp = SparseGP(X_train, 0 * X_train, y_train, M) sgp.train_via_ADAM(X_train, 0 * X_train, y_train, X_test, X_test * 0, \ y_test, minibatch_size = 10 * M, max_iterations = args.epochs, learning_rate = 0.0005)
"--cutoff", help="Number of molecules to embed. -1 for all", type=int, default=-1) parser.add_argument('-n', '--name', type=str, default='inference_default') parser.add_argument('-v', '--vocab', type=str, default='selfies') parser.add_argument('-d', '--decode', action='store_true') parser.add_argument('--pca', action='store_false') # PCA space plot # ===================== device = 'cuda' if torch.cuda.is_available() else 'cpu' args, _ = parser.parse_known_args() # Load model (on gpu if available) model = model_from_json(args.name) model.to(device) model.eval() # Load dataframe with mols to embed if args.cutoff > 0: smiles_df = pd.read_csv(args.input, index_col=0, nrows=args.cutoff) # cutoff csv at nrows else: smiles_df = pd.read_csv(args.input, index_col=0) # Initialize dataloader with empty dataset dataloader = Loader(maps_path='map_files/', vocab=args.vocab, build_alphabet=False, n_mols=args.cutoff,
import model import data import sys # PREMENNE test_dir = "../data/test" generated_dir = "../data/test_generated" if (len(sys.argv) > 1): num_of_test = int(sys.argv[1]) # zistit pocet od pouzivatela else: num_of_test = 4 # NACITAT MODEL # load json and create model json_file = open('../model/modelStructure.json', 'r') loaded_model_json = json_file.read() json_file.close() myModel = model.model_from_json(loaded_model_json) # load weights into new model myModel.load_weights("../model/modelWeights.h5") print("Model loaded from disk") # evaluate loaded model on test data myModel.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # SPUSTIT NA TESTOVACICH VZORKACH testGene = data.testGenerator(test_dir) #generated_dir) results = myModel.predict_generator(testGene,num_of_test,verbose=1) data.saveResult(test_dir,results)
'lr': args.learning_rate, 'clip_grad': args.clip_grad_norm, 'beta': args.beta, 'processes': args.procs, 'optimizer': args.opti, 'scheduler': args.sched, 'alphabet_name': args.alphabet_name, 'gamma': -1000, 'DEBUG': True } dumper = Dumper(dumping_path=os.path.join(savepath, 'params_gentrain.json'), dic=params_gentrain) dumper.dump() prior_model_init = model_from_json(args.prior_name) print(prior_model_init) torch.save(prior_model_init.state_dict(), os.path.join(savepath, "weights.pth")) id_train = None for iteration in range(1, args.iters + 1): # SAMPLING slurm_sampler_path = os.path.join(script_dir, 'slurm_sampler.sh') if id_train is None: cmd = f'sbatch {slurm_sampler_path}' else: cmd = f'sbatch --depend=afterany:{id_train} {slurm_sampler_path}' extra_args = f' {args.prior_name} {args.name} {args.max_samples} {args.oracle} {args.cap_weights}' cmd = cmd + extra_args a = subprocess.run(cmd.split(),
def main(prior_name, name, max_samples, diversity_picker, oracle, w_min): prior_model = model_from_json(prior_name) # We start by creating another prior instance, then replace it with the actual weights # name = search_vae search_model = model_from_json(prior_name) model_weights_path = os.path.join(script_dir, 'results', name, 'weights.pth') search_model.load(model_weights_path) samples, weights = get_samples(prior_model, search_model, max=max_samples, w_min=w_min) # if diversity picker < max_samples, we subsample with rdkit picker : if 0 < diversity_picker < max_samples: mols = [Chem.MolFromSmiles(s) for s in samples] fps = [GetMorganFingerprint(x, 3) for x in mols] picker = MaxMinPicker() def distij(i, j, fps=fps): return 1 - DataStructs.DiceSimilarity(fps[i], fps[j]) pickIndices = picker.LazyPick(distij, max_samples, diversity_picker) idces = list(pickIndices) samples = [samples[i] for i in idces] weights = [weights[i] for i in idces] # Since we don't maintain a dict for qed, we just give everything to the docker if oracle != 'docking' or True: dump_path = os.path.join(script_dir, 'results', name, 'docker_samples.p') pickle.dump(samples, open(dump_path, 'wb')) # Dump for the trainer dump_path = os.path.join(script_dir, 'results', name, 'samples.p') pickle.dump((samples, weights), open(dump_path, 'wb')) else: # Memoization, we split the list into already docked ones and dump a simili-docking csv whole_path = os.path.join(script_dir, '..', 'data', 'drd3_scores.pickle') docking_whole_results = pickle.load(open(whole_path, 'rb')) filtered_smiles = list() already_smiles = list() already_scores = list() for i, smile in enumerate(samples): if smile in docking_whole_results: already_smiles.append(smile) already_scores.append(docking_whole_results[smile]) else: filtered_smiles.append(smile) # Dump simili-docking dump_path = os.path.join(script_dir, 'results', name, 'docking_small_results', 'simili.csv') df = pd.DataFrame.from_dict({ 'smile': already_smiles, 'score': already_scores }) df.to_csv(dump_path) # Dump for the docker dump_path = os.path.join(script_dir, 'results', name, 'docker_samples.p') pickle.dump(filtered_smiles, open(dump_path, 'wb')) # Dump for the trainer dump_path = os.path.join(script_dir, 'results', name, 'samples.p') pickle.dump((samples, weights), open(dump_path, 'wb'))