def Net(args): latent_descr = 50 * [('c', 20)] feature_descr = [('c', 20)] latent_size = 50 latent_input_size = 50 enc = gentrl.RNNEncoder(latent_size) dec = gentrl.DilConvDecoder(latent_input_size, args) model = gentrl.DIS_GENTRL(enc, dec, latent_descr, feature_descr, beta=0.001) return model
from moses.metrics.utils import get_n_rings, get_mol def get_num_rings_6(mol): r = mol.GetRingInfo() return len([x for x in r.AtomRings() if len(x) > 6]) def penalized_logP(mol_or_smiles, masked=False, default=-5): mol = get_mol(mol_or_smiles) if mol is None: return default reward = logP(mol) - SA(mol) - get_num_rings_6(mol) if masked and not mol_passes_filters(mol): return default return reward df['plogP'] = df['SMILES'].apply(penalized_logP) df.to_csv('train_plogp_plogpm.csv', index=None) enc = gentrl.RNNEncoder(latent_size=50) dec = gentrl.DilConvDecoder(latent_input_size=50) model = gentrl.GENTRL(enc, dec, 50 * [('c', 20)], [('c', 20)], beta=0.001) #model.cuda(); md = gentrl.MolecularDataset(sources=[ {'path':'train_plogp_plogpm.csv', 'smiles': 'SMILES', 'prob': 1, 'plogP' : 'plogP', }], props=['plogP']) from torch.utils.data import DataLoader train_loader = DataLoader(md, batch_size=50, shuffle=True, num_workers=1, drop_last=True) model.train_as_vaelp(train_loader, lr=1e-4) ! mkdir -p saved_gentrl model.save('./saved_gentrl/') #train-RL https://github.com/insilicomedicine/GENTRL/blob/master/examples/train_rl.ipynb
import torch from moses.metrics.utils import get_mol import pandas as pd import pickle import moses from moses.utils import CharVocab from rdkit import RDLogger RDLogger.DisableLog('rdApp.*') # Load vocab dataset_path = "../data/moses_qed_props.csv.gz" df = pd.read_csv(dataset_path, compression="gzip") vocab = CharVocab.from_data(df['SMILES']) enc = gentrl.RNNEncoder(vocab, latent_size=50) dec = gentrl.DilConvDecoder(vocab, latent_input_size=50, split_len=100) model = gentrl.GENTRL(enc, dec, 50 * [('c', 20)], [('c', 20)], beta=0.001) model.cuda() torch.cuda.set_device(0) moses_qed_props_model_path = "../models/moses/" model.load(moses_qed_props_model_path) model.cuda() import random from rdkit import RDLogger RDLogger.DisableLog('rdApp.*') generated = [] verbose_lim = 10000