Example #1
0
def penalized_logp(molecule):
    """Calculates the penalized logP of a molecule.

  Refactored from
  https://github.com/wengong-jin/icml18-jtnn/blob/master/bo/run_bo.py
  See Junction Tree Variational Autoencoder for Molecular Graph Generation
  https://arxiv.org/pdf/1802.04364.pdf
  Section 3.2
  Penalized logP is defined as:
   y(m) = logP(m) - SA(m) - cycle(m)
   y(m) is the penalized logP,
   logP(m) is the logP of a molecule,
   SA(m) is the synthetic accessibility score,
   cycle(m) is the largest ring size minus by six in the molecule.

  Args:
    molecule: Chem.Mol. A molecule.

  Returns:
    Float. The penalized logP value.

  """
    log_p = Descriptors.MolLogP(molecule)
    sas_score = sascorer.calculateScore(molecule)
    largest_ring_size = get_largest_ring_size(molecule)
    cycle_score = max(largest_ring_size - 6, 0)
    return log_p - sas_score - cycle_score
 def _reward(self):
     molecule = Chem.MolFromSmiles(self._state)
     if molecule is None:
         return -self.loss_fn(self.target_sas)
     sas = sascorer.calculateScore(molecule)
     return -self.loss_fn(sas - self.target_sas) * (self.discount_factor**(
         self.max_steps - self.num_steps_taken))
log_likelihoods = []
synthesis = []
sascores = []
ok_sascore = []
no_sascore = []
ok_ourscore = []
no_ourscore = []

with torch.no_grad():
    for s in s_to_human_score.keys():
        #for s in ['COc1ccc2cc1-c1c(O)cc(O)c3c(=O)cc(oc13)-c1ccc(O)c(c1)[C@H](C)c1c(O)cc3c(c1O)C(=O)C[C@@H]2O3']:
        m = Chem.MolFromSmiles(s)
        num_sc = CalcNumAtomStereoCenters(
            m) - CalcNumUnspecifiedAtomStereoCenters(m)
        synthesis.append(int(s_to_human_score[s]))
        sascores.append(calculateScore(m))

        if args.stereo:
            isomers = list(EnumerateStereoisomers(m))
        else:
            isomers = [m]

        likelihood = []
        for s in isomers:
            s = Chem.MolToSmiles(s, isomericSmiles=args.stereo)
            s = s + 'Q'
            x = torch.tensor([c_to_i[i]
                              for i in list(s)]).unsqueeze(0).to(device)
            output = model(x)
            likelihood.append(utils.output_to_likelihood(output, x))
        a = max(likelihood)
Example #4
0
from rdkit import Chem
from rdkit.Contrib.SA_Score.sascorer import calculateScore

with open('../id_smiles.txt') as f, open('data.txt', 'w') as w:
    for l in f:
        m_id, s1, s2 = l.split()
        m1, m2 = Chem.MolFromSmiles(s1), Chem.MolFromSmiles(s2)
        if m1 is None or m2 is None: continue
        c1, c2 = calculateScore(m1), calculateScore(m2)
        w.write(m_id + '\t' + str(c1) + '\t' + str(c2) + '\n')
Example #5
0
smiles_f = open('./ZINC/smiles.txt')
smiles_list = smiles_f.readlines()

logPList = []
molWtList = []
TPSAList = []
QEDList = []
SASList = []
for smi in smiles_list:
    smi = smi.strip()
    m = Chem.MolFromSmiles(smi)
    molWt = ExactMolWt(m)
    logP = MolLogP(m)
    TPSA = CalcTPSA(m)
    _qed = qed(m)
    sas = calculateScore(m)

    logPList.append(logP)
    molWtList.append(molWt)
    TPSAList.append(TPSA)
    QEDList.append(_qed)
    SASList.append(sas)

logPList = np.asarray(logPList)
TPSAList = np.asarray(TPSAList)
QEDList = np.asarray(QEDList)
SASList = np.asarray(SASList)

np.save('./ZINC/logP.npy', logPList)
np.save('./ZINC/TPSA.npy', TPSAList)
np.save('./ZINC/QED.npy', QEDList)