def penalized_logp(molecule): """Calculates the penalized logP of a molecule. Refactored from https://github.com/wengong-jin/icml18-jtnn/blob/master/bo/run_bo.py See Junction Tree Variational Autoencoder for Molecular Graph Generation https://arxiv.org/pdf/1802.04364.pdf Section 3.2 Penalized logP is defined as: y(m) = logP(m) - SA(m) - cycle(m) y(m) is the penalized logP, logP(m) is the logP of a molecule, SA(m) is the synthetic accessibility score, cycle(m) is the largest ring size minus by six in the molecule. Args: molecule: Chem.Mol. A molecule. Returns: Float. The penalized logP value. """ log_p = Descriptors.MolLogP(molecule) sas_score = sascorer.calculateScore(molecule) largest_ring_size = get_largest_ring_size(molecule) cycle_score = max(largest_ring_size - 6, 0) return log_p - sas_score - cycle_score
def _reward(self): molecule = Chem.MolFromSmiles(self._state) if molecule is None: return -self.loss_fn(self.target_sas) sas = sascorer.calculateScore(molecule) return -self.loss_fn(sas - self.target_sas) * (self.discount_factor**( self.max_steps - self.num_steps_taken))
log_likelihoods = [] synthesis = [] sascores = [] ok_sascore = [] no_sascore = [] ok_ourscore = [] no_ourscore = [] with torch.no_grad(): for s in s_to_human_score.keys(): #for s in ['COc1ccc2cc1-c1c(O)cc(O)c3c(=O)cc(oc13)-c1ccc(O)c(c1)[C@H](C)c1c(O)cc3c(c1O)C(=O)C[C@@H]2O3']: m = Chem.MolFromSmiles(s) num_sc = CalcNumAtomStereoCenters( m) - CalcNumUnspecifiedAtomStereoCenters(m) synthesis.append(int(s_to_human_score[s])) sascores.append(calculateScore(m)) if args.stereo: isomers = list(EnumerateStereoisomers(m)) else: isomers = [m] likelihood = [] for s in isomers: s = Chem.MolToSmiles(s, isomericSmiles=args.stereo) s = s + 'Q' x = torch.tensor([c_to_i[i] for i in list(s)]).unsqueeze(0).to(device) output = model(x) likelihood.append(utils.output_to_likelihood(output, x)) a = max(likelihood)
from rdkit import Chem from rdkit.Contrib.SA_Score.sascorer import calculateScore with open('../id_smiles.txt') as f, open('data.txt', 'w') as w: for l in f: m_id, s1, s2 = l.split() m1, m2 = Chem.MolFromSmiles(s1), Chem.MolFromSmiles(s2) if m1 is None or m2 is None: continue c1, c2 = calculateScore(m1), calculateScore(m2) w.write(m_id + '\t' + str(c1) + '\t' + str(c2) + '\n')
smiles_f = open('./ZINC/smiles.txt') smiles_list = smiles_f.readlines() logPList = [] molWtList = [] TPSAList = [] QEDList = [] SASList = [] for smi in smiles_list: smi = smi.strip() m = Chem.MolFromSmiles(smi) molWt = ExactMolWt(m) logP = MolLogP(m) TPSA = CalcTPSA(m) _qed = qed(m) sas = calculateScore(m) logPList.append(logP) molWtList.append(molWt) TPSAList.append(TPSA) QEDList.append(_qed) SASList.append(sas) logPList = np.asarray(logPList) TPSAList = np.asarray(TPSAList) QEDList = np.asarray(QEDList) SASList = np.asarray(SASList) np.save('./ZINC/logP.npy', logPList) np.save('./ZINC/TPSA.npy', TPSAList) np.save('./ZINC/QED.npy', QEDList)