Ejemplo n.º 1
0
def CalculateQED(mol, wtype="mean"):
    """
    #################################################################
    Calculation QED descriptor under different weights
    
    A descriptor a measure of drug-likeness based on the concept of desirability
    -Ref.: Bickerton, G. Richard, et al.
           Nat Chem, 4.2 (2012): 90.
            
    Quantitative Estimate of Drug-likeness 
    
    ---->qed
    
    Usage:
        
        result = CalculateQED(mol,wtype='mean')
        
        Input: mol is a molecular object
        
        Output: result is a numeric values
    #################################################################
    """
    if wtype == "mean":
        qed = QED.weights_mean(mol)
    elif wtype == "max":
        qed = QED.weights_max(mol)
    elif wtype == "none":
        qed = QED.weights_none(mol)
    else:
        #msg = "invalid wtype has been input"
        qed = None

    return round(qed, 2)
Ejemplo n.º 2
0
 def testNCI200(self):
   for d in readTestData(dataNCI200):
     self.assertAlmostEqual(QED.qed(d.mol), d.expected,
                            msg='QED not equal to expected in line {}'.format(d.lineNo))
     # Check that adding hydrogens will not change the result
     # This is currently not the case. Hydrogens change the number of rotatable bonds and the
     # number of alerts.
     mol = Chem.AddHs(d.mol)
     self.assertAlmostEqual(QED.qed(mol), d.expected,
                            msg='QED not equal to expected in line {}'.format(d.lineNo))
Ejemplo n.º 3
0
 def test_examples(self):
   # Paroxetine 0.935
   self.assertAlmostEqual(QED.qed(Chem.MolFromSmiles('c1cc2OCOc2cc1OCC1CNCCC1c1ccc(F)cc1')), 0.934,
                          places=3)
   # Leflunomide 0.929
   self.assertAlmostEqual(QED.qed(Chem.MolFromSmiles('C1=NOC(C)=C1C(=O)Nc1ccc(cc1)C(F)(F)F')),
                          0.911, places=3)
   # Clomipramine 0.779
   self.assertAlmostEqual(QED.qed(Chem.MolFromSmiles('CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21')),
                          0.818, places=3)
   # Tegaserod 0.213
   self.assertAlmostEqual(QED.qed(Chem.MolFromSmiles('CCCCCNC(=N)NN=CC1=CNc2ccc(CO)cc21')),
                          0.235, places=3)
Ejemplo n.º 4
0
 def testNCI200(self):
     for d in readTestData(dataNCI200):
         self.assertAlmostEqual(
             QED.qed(d.mol),
             d.expected,
             msg='QED not equal to expected in line {}'.format(d.lineNo))
         # Check that adding hydrogens will not change the result
         # This is currently not the case. Hydrogens change the number of rotatable bonds and the
         # number of alerts.
         mol = Chem.AddHs(d.mol)
         self.assertAlmostEqual(
             QED.qed(mol),
             d.expected,
             msg='QED not equal to expected in line {}'.format(d.lineNo))
Ejemplo n.º 5
0
def one_slurm_qed(list_smiles, unique_id, name):
    """

    :param list_smiles:
    :param unique_id:
    :param name:
    :return:
    """
    dirname = os.path.join(script_dir, 'results', name,
                           'docking_small_results')
    dump_path = os.path.join(dirname, f"{unique_id}.csv")

    header = ['smile', 'score']
    with open(dump_path, 'w', newline='') as csvfile:
        csv.writer(csvfile).writerow(header)

    for smile in list_smiles:
        m = Chem.MolFromSmiles(smile)
        if m is not None:
            score_smile = QED.qed(m)
        else:
            score_smile = 0
        with open(dump_path, 'a', newline='') as csvfile:
            list_to_write = [smile, score_smile]
            csv.writer(csvfile).writerow(list_to_write)
Ejemplo n.º 6
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Ejemplo n.º 7
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Ejemplo n.º 8
0
def score_molecule(smiles):
    lipinski_score = 0
    qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1

    try:
        m = Chem.MolFromSmiles(smiles)
        logp = Descriptors.MolLogP(m)
        lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0

        wt = Descriptors.MolWt(m)
        lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0

        hdonor = Lipinski.NumHDonors(m)
        lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        hacceptor = Lipinski.NumHAcceptors(m)
        lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        rotatable_bond = Lipinski.NumRotatableBonds(m)
        lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0

        qed = QED.qed(m)
    except Exception as ex:
        lipinski_score = 0
        logger.exception(ex)

    return lipinski_score, qed
Ejemplo n.º 9
0
def sample_qed(data_smiles, max_num):
    exam_molecules_smiles = random.sample(data_smiles, max_num)
    qed_list = []
    for i in range(len(exam_molecules_smiles)):
        qed_list.append(QED.qed(Chem.MolFromSmiles(exam_molecules_smiles[i])))
        print(i, exam_molecules_smiles[i], qed_list[i])
    return (exam_molecules_smiles, qed_list)
Ejemplo n.º 10
0
    def policy_evaluate(self):
        """
        Evaluate the trained policy by playing against the pure MCTS player
        Note: this is only for monitoring the progress of training
        """
        player = MCTSPlayer(self.policy_value_net.policy_value,
                            c_puct=self.c_puct,
                            n_playout=30)
        environment = Molecule(["C", "O", "N"],
                               init_mol=self.mol,
                               allow_removal=True,
                               allow_no_modification=False,
                               allow_bonds_between_rings=False,
                               allowed_ring_sizes=[5, 6],
                               max_steps=10,
                               target_fn=None,
                               record_path=False)
        environment.initialize()
        environment.init_qed = QED.qed(Chem.MolFromSmiles(self.mol))

        moves, fp, _S_P, _Qs = player.get_action(environment,
                                                 temp=self.temp,
                                                 return_prob=1,
                                                 rand=False)

        return moves, _S_P, _Qs
Ejemplo n.º 11
0
    def _reward(self):
        """Calculates the reward of the current state.

    The reward is defined as a tuple of the similarity and QED value.

    Returns:
      A tuple of the similarity and qed value
    """
        # calculate similarity.
        # if the current molecule does not contain the scaffold of the target,
        # similarity is zero.
        if self._state is None:
            return 0.0, 0.0
        mol = Chem.MolFromSmiles(self._state)
        if mol is None:
            return 0.0, 0.0

        qed_value = QED.qed(mol)
        sas = SA_Score.sascorer.calculateScore(mol)

        # c1 = soft_cst(sas, FLAGS.target_sas - 0.2, FLAGS.target_sas + 0.2)
        # c2 = soft_cst(qed_value, FLAGS.target_qed - 0.1, FLAGS.target_qed + 0.1)
        # # if c1 < 0 and c2 < 0:
        # #   return - c1 * c2
        # # else:
        # #   return c1 * c2
        return (soft_cst(sas, FLAGS.target_sas - 0.2, FLAGS.target_sas + 0.2) +
                soft_cst(qed_value, FLAGS.target_qed - 0.1, FLAGS.target_qed +
                         0.1)) * FLAGS.gamma**(self.max_steps - self._counter)
Ejemplo n.º 12
0
    def __getitem__(self, idx):
        item = self.smiles_dataset[idx]
        input_random, input_label, input_adj_mask = self.random_masking(item)

        input_data = [self.vocab.start_index
                      ] + input_random + [self.vocab.end_index]
        input_label = [self.vocab.pad_index
                       ] + input_label + [self.vocab.pad_index]
        input_adj_mask = [0] + input_adj_mask + [0]
        # give info to start token
        if self.mat_pos == 'start':
            input_adj_mask = [1] + [0 for _ in range(len(input_adj_mask) - 1)]

        smiles_bert_input = input_data[:self.seq_len]
        smiles_bert_label = input_label[:self.seq_len]
        smiles_bert_adj_mask = input_adj_mask[:self.seq_len]

        padding = [0 for _ in range(self.seq_len - len(smiles_bert_input))]
        smiles_bert_input.extend(padding)
        smiles_bert_label.extend(padding)
        smiles_bert_adj_mask.extend(padding)
        mol = Chem.MolFromSmiles(self.adj_dataset[idx])
        smiles_bert_value = QED.qed(mol)

        adj_mat = GetAdjacencyMatrix(mol)
        smiles_bert_adjmat = self.zero_padding(adj_mat,
                                               (self.seq_len, self.seq_len))

        output = {"smiles_bert_input": smiles_bert_input, "smiles_bert_label": smiles_bert_label,  \
           "smiles_bert_adj_mask": smiles_bert_adj_mask, "smiles_bert_adjmat": smiles_bert_adjmat, "smiles_bert_value": smiles_bert_value}

        return {key: torch.tensor(value) for key, value in output.items()}
Ejemplo n.º 13
0
def get_ro3_from_mol(mol):
    """
    Get rule of three criteria for a fragment, i.e. molecular weight, logP, number of hydrogen bond acceptors/donors, number of rotatable bonds, and PSA.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Fragment.

    Returns
    -------
    pd.Series
        Rule of three criteria for input fragment.
        
    Notes
    -----
    Taken from: https://europepmc.org/article/med/14554012
    """

    properties = QED.properties(mol)

    mw = 1 if properties.MW < 300 else 0
    logp = 1 if properties.ALOGP <= 3 else 0
    hbd = 1 if properties.HBD <= 3 else 0
    hba = 1 if properties.HBA <= 3 else 0
    nrot = 1 if properties.ROTB <= 3 else 0
    psa = 1 if properties.PSA <= 60 else 0

    return pd.Series([mw, logp, hbd, hba, nrot, psa],
                     index="mw logp hbd hba nrot psa".split())
Ejemplo n.º 14
0
def get_score(objective, mol, smiles2score=None):
    try:
        if objective == 'qed':
            # print('qed call')
            return QED.qed(mol)
        elif objective == 'docking':
            smiles = Chem.MolToSmiles(mol)
            if smiles in smiles2score:
                return smiles2score[smiles]
            value = -oracle2(smiles)
            smiles2score[smiles] = value
            return value
            ####
        elif objective == 'sa':
            # print('sa call')
            x = sa_scorer.calculateScore(mol)
            return (10. - x) / 9.  # normalized to [0, 1]
        elif objective == 'mw':  # molecular weight
            return mw(mol)
        elif objective == 'logp':  # real number
            print('logp call')
            return Descriptors.MolLogP(mol)
        elif objective == 'penalized_logp':
            print('plogp call')
            return penalized_logp(mol)
        elif 'rand' in objective:
            raise NotImplementedError
            # return rand_scorer.get_score(objective, mol)
        else:
            raise NotImplementedError
    except ValueError:
        return 0.
Ejemplo n.º 15
0
    def _reward(self):
        """Calculates the reward of the current state.

    The reward is defined as a tuple of the similarity and QED value.

    Returns:
      A tuple of the similarity and qed value
    """
        # calculate similarity.
        # if the current molecule does not contain the scaffold of the target,
        # similarity is zero.
        if self._state is None:
            return 0.0
        mol = Chem.MolFromSmiles(self._state)
        if mol is None:
            return 0.0

        qed_value = QED.qed(mol)
        sas = SA_Score.sascorer.calculateScore(mol)

        c1 = -abs(sas - FLAGS.target_sas)
        c2 = -abs(qed_value - FLAGS.target_qed)
        if FLAGS.use_multiply:
            if c1 < 0 and c2 < 0:
                reward = -c1 * c2
            else:
                reward = c1 * c2
        else:
            reward = (c1 + c2)
        return reward * FLAGS.gamma**(self.max_steps - self._counter)
Ejemplo n.º 16
0
    def _playout(self, state, n):
        """Run a single playout from the root to the leaf, getting a value at
        the leaf and propagating it back through its parents.
        State is modified in-place, so a copy must be provided.
        """
        node = self._root

        while state._counter < state.max_steps:
            if node.is_leaf():
                action_probs = [(
                    state._valid_actions[i],
                    QED.qed(Chem.MolFromSmiles(state._valid_actions[i])),
                ) for i in range(len(state._valid_actions_fp))]
                # Check for end of game.
                # print(state._counter, state.max_steps)
                node.expand(action_probs)

                # Greedily select next move.
            action, node = node.select(self._c_puct, n)
            state.step(action)
            # self.update_with_move(action, node._fp)
            if state._counter == state.max_steps:
                # qv=QED.qed(Chem.MolFromSmiles(action))
                node._Q = node._P
                print("###")
            node.update_recursive(node._Q)
Ejemplo n.º 17
0
def check_qed(dataset):
    with open('generated_smiles_%s' % dataset, 'rb') as f:
        all_smiles = set(pickle.load(f))
    f = open('real.txt', 'rt')
    f2 = open('pred.txt', 'w')
    test = f.readline()
    qed_sum = 0
    total = 0
    qed_score_per_molecule = []
    diff = []
    real = []
    pred = []
    for idx, smiles in enumerate(all_smiles):
        print(idx)
        if idx > 5000:
            break
        real_qed = f.readline()
        real.append(float(real_qed))
        new_mol = Chem.MolFromSmiles(smiles)
        try:
            val = QED.qed(new_mol)
            pred.append(val)
            f2.write(str(val) + "\n")
        except:
            continue
        qed_sum += val
        diff.append(abs(float(real_qed) - val))
        qed_score_per_molecule.append(val)
        total += 1
    f2.close()
    return qed_sum / total, qed_score_per_molecule, diff, real, pred
Ejemplo n.º 18
0
    def run(self):
        """run the training pipeline"""
        try:
            for i in range(self.game_batch_num):
                self.collect_selfplay_data(self.play_batch_size)
                print("batch i: {}, episode_len: {}".format(
                    i + 1, self.episode_len))
                if len(self.data_buffer) >= self.batch_size:
                    loss, entropy = self.policy_update()
                    print("loss is {}  entropy is {}".format(loss, entropy))
                # check the performance of the current model,
                # and save the model params
                if (i + 1) % self.check_freq == 0:
                    print("current self-play batch: {}".format(i + 1))
                    move_list, _S_P, _Qs = self.policy_evaluate()
                    # self.policy_value_net.save_model('./current_policy.model')
                    print(move_list)
                    print(_Qs)
                    print(_S_P)

                    self.output_smi.extend(move_list)
                    o_qed = list(
                        map(lambda x: QED.qed(Chem.MolFromSmiles(x)),
                            move_list))
                    print(o_qed)
                    print("#" * 30)
                    self.output_qed.extend(o_qed)
        except KeyboardInterrupt:
            print('\n\rquit')
Ejemplo n.º 19
0
    def _reward(self):
        """Calculates the reward of the current state.

    The reward is defined as a tuple of the similarity and QED value.

    Returns:
      A tuple of the similarity and qed value
    """
        # calculate similarity.
        # if the current molecule does not contain the scaffold of the target,
        # similarity is zero.
        if self._state is None:
            return 0.0, 0.0
        mol = Chem.MolFromSmiles(self._state)
        if mol is None:
            return 0.0, 0.0
        if molecules.contains_scaffold(mol, self._target_mol_scaffold):
            similarity_score = self.get_similarity(self._state)
        else:
            similarity_score = 0.0
        # calculate QED
        qed_value = QED.qed(mol)
        return similarity_score * FLAGS.gamma**(
            self.max_steps - self._counter), qed_value * FLAGS.gamma**(
                self.max_steps - self._counter)
Ejemplo n.º 20
0
def _rdkit_eval(entry: dict) -> dict:
    """Computes the chemical properties from RDKit,
    adds them to the input dictionary"""
    mol = Chem.MolFromSmiles(entry['smiles'])
    entry['logP'] = Crippen.MolLogP(mol)
    entry['QED'] = QED.qed(mol)
    entry['SA_score'] = calculateScore(mol)
    return entry
Ejemplo n.º 21
0
 def quantitative_estimation_druglikeness_scores(mols, norm=False):
     return np.array(
         list(
             map(lambda x: 0 if x is None else x, [
                 MolecularMetrics._avoid_sanitization_error(
                     lambda: QED.qed(mol)) if mol is not None else None
                 for mol in mols
             ])))
def qed(s):
    if s is None: return 0.0
    mol = Chem.MolFromSmiles(s)
    try:
        qed_score = QED.qed(mol)
    except:
        qed_score = 0
    return qed_score
Ejemplo n.º 23
0
def compute_druglikeness(mol: Chem.rdchem.Mol):
    """Call RDKit to compute the drug likeness properties."""
    try:
        data = QED.properties(mol)
        results = [getattr(data, key) for key in KEYS_DRUGS_LIKENESS]
    except RuntimeError:
        results = [None] * len(KEYS_DRUGS_LIKENESS)
    return results
Ejemplo n.º 24
0
    def score(self, smiles):
        mol = Chem.MolFromSmiles(smiles)

        qed_score = QED.qed(mol)

        sa_score = sascorer.calculateScore(mol)

        return 5 * qed_score - sa_score
Ejemplo n.º 25
0
 def transform_text_to_qed(text_line):
     molecules = [rdkit_general_ops.get_molecule(mol_str, kekulize=False) for mol_str in text_line.split('.')]
     qed_scores = [QED.qed(mol) for mol in molecules]
     # May have many products so take max (given this is what we are optimising for in the optimisation part).
     # Expect this to be less of an issue in practice as USPTO mostly details
     # single product reactions. It may be interesting to look at using the Molecular Transformer prediction on
     # these reactions rather than this ground truth and other ways of combining multiple products eg mean.
     return np.max(qed_scores)
Ejemplo n.º 26
0
 def testRegression(self):
     if not doLong:
         raise unittest.SkipTest('long test')
     for d in readTestData(dataRegression):
         self.assertAlmostEqual(
             QED.qed(d.mol),
             d.expected,
             msg='QED not equal to expected in line {}'.format(d.lineNo))
Ejemplo n.º 27
0
def properties_violin(filepaths, labels, pred_type):

    properties = []

    for i, fname in enumerate(filepaths):
        with open(filepaths[i], 'r') as f:
            reader = csv.reader(f)

            it = iter(reader)
            #            next(it, None)  # skip first item.
            for row in it:
                if pred_type == 'pIC50':
                    properties.append(
                        [labels[i], 'IC50 for KOR',
                         float(row[1])])
                if i != 0:
                    properties.append([labels[i], 'SA score', float(row[2])])
                    try:
                        mol = Chem.MolFromSmiles(row[0])
                        q = QED.qed(mol)
                        #                        x, y = desc.MolWt(mol), Crippen.MolLogP(mol)
                        #                        properties.append([labels[i],'Molecular weight',x])
                        #                        properties.append([labels[i],'logP',y])
                        properties.append([labels[i], 'QED', q])

                    except:
                        print("Non-Canonical SMILES: " + row[0])
                else:

                    try:
                        mole = smiles2mol(row[0])
                        prediction_sas = SAscore(mole)
                        properties.append(
                            [labels[i], 'SA score',
                             float(prediction_sas[0])])
                        mol = Chem.MolFromSmiles(row[0])
                        q = QED.qed(mol)
                        #                        x, y = desc.MolWt(mol), Crippen.MolLogP(mol)
                        #                        properties.append([labels[i],'Molecular weight',x])
                        #                        properties.append([labels[i],'logP',y])
                        properties.append([labels[i], 'QED', q])
                    except:
                        print("Non-Canonical SMILES: " + row[0])

    df = pd.DataFrame(properties, columns=['Sets', 'Property', 'Value'])
    return df
Ejemplo n.º 28
0
def QED_oracle(smiles):
    # takes a list of smiles and returns a list of corresponding QEDs
    t = torch.zeros(len(smiles))
    for i, s in enumerate(smiles):
        m = Chem.MolFromSmiles(s)
        if m is not None:
            t[i] = QED.qed(m)
    return t
 def _reward(self):
     molecule = Chem.MolFromSmiles(self._state)
     if molecule is None:
         return 0.0
     try:
         qed = QED.qed(molecule)
     except ValueError:
         qed = 0
     return qed * FLAGS.gamma**(self.max_steps - self._counter)
Ejemplo n.º 30
0
def updateTestData():
  """ Update the test data. This should only be done if the method changes! """
  for filename in (dataNCI200, dataRegression,):
    data = list(readTestData(filename))
    with open(filename, 'w') as f:
      print('# Test data for QED descriptor', file=f)
      for d in data:
        expected = QED.qed(d.mol)
        print('{0.smiles},{1}'.format(d, expected), file=f)
Ejemplo n.º 31
0
    def _reward(self):
        """Reward of a state.

    Returns:
      intermediate reward: SA score, QED score
      final reward: Docking score (a negative value of the binding energy)
    """
        molecule = Chem.MolFromSmiles(self._state)
        if molecule is None:
            return 0.0

        # calculate SA and QED score
        sa = calculateScore(molecule)
        sa_norm = round((10 - sa) / 9, 2)  # normalize the SA score
        qed = round(QED.qed(molecule), 2)
        print("SA score and QED: {}, {} : {}".format(sa_norm, qed,
                                                     self._state))

        if self._counter < self.max_steps:  # intermediate state
            return round(
                (sa_norm + qed) *
                self.discount_factor**(self.max_steps - self.num_steps_taken),
                2)

        if self._counter >= self.max_steps:  # terminal state
            # create SMILES file
            with open('ligand.smi', 'w') as f:
                f.write(self._state)

            # convert SMILES > PDBQT
            # --gen3d: the option for generating 3D coordinate
            #  -h: protonation
            cvt_cmd = "obabel ligand.smi -O ligand.pdbqt --gen3D -h > cvt_log.txt"
            os.system(cvt_cmd)

            # docking
            docking_cmd = "qvina02 --config config.txt --num_modes=1 > log_docking.txt"
            os.system(docking_cmd)

            # parsing docking score from log file
            try:
                data = pd.read_csv('log_docking.txt', sep="\t", header=None)
            except:
                return 0.0
            docking_score = round(float(data.values[-2][0].split()[1]), 2)
            print("binding energy value: " + str(round(docking_score, 2)) +
                  '\t' + self._state)

        # record a optimized result with the SMILES, docking score, SA score,
        # and QED score.
        with open('./optimized_result_total.txt', 'a') as f2:
            f2.write(self._state + '\t' + str(docking_score) + '\t' +
                     str(sa_norm) + '\t' + str(qed) + '\n')

        # we use the negative of the docking score because the lower docking score
        # the better.
        return round(-docking_score, 2)
Ejemplo n.º 32
0
 def get_mol_props(smiles):
     """Get the molecular properties of a single molecule"""
     mol = Chem.MolFromSmiles(smiles)
     assert mol is not None
     mol_wt = Descriptors.MolWt(mol)
     log_p = Descriptors.MolLogP(mol)
     qed = QED.qed(mol)
     assert (mol_wt is not None and log_p is not None and qed is not None)
     return mol_wt, log_p, qed
Ejemplo n.º 33
0
 def __call__(self, smiles_list):
     scores = []
     for smiles in smiles_list:
         mol = Chem.MolFromSmiles(smiles)
         if mol is None:
             scores.append(0)
         else:
             scores.append(QED.qed(mol))
     return np.float32(scores)
Ejemplo n.º 34
0
  def test_properties(self):
    m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O')
    p = QED.properties(m)
    self.assertAlmostEqual(p.MW, 337.456)
    self.assertAlmostEqual(p.ALOGP, -0.55833)
    self.assertAlmostEqual(p.HBA, 6)
    self.assertAlmostEqual(p.HBD, 5)
    self.assertAlmostEqual(p.PSA, 173.33)
    self.assertAlmostEqual(p.ROTB, 7)
    self.assertAlmostEqual(p.AROM, 1)
    self.assertAlmostEqual(p.ALERTS, 3)

    p = QED.properties(Chem.AddHs(m))
    self.assertAlmostEqual(p.MW, 337.456)
    self.assertAlmostEqual(p.ALOGP, -0.55833)
    self.assertAlmostEqual(p.HBA, 6)
    self.assertAlmostEqual(p.HBD, 5)
    self.assertAlmostEqual(p.PSA, 173.33)
    self.assertAlmostEqual(p.ROTB, 7)
    self.assertAlmostEqual(p.AROM, 1)
    self.assertAlmostEqual(p.ALERTS, 3)
Ejemplo n.º 35
0
 def testRegression(self):
   if not doLong:
     raise unittest.SkipTest('long test')
   for d in readTestData(dataRegression):
     self.assertAlmostEqual(QED.qed(d.mol), d.expected,
                            msg='QED not equal to expected in line {}'.format(d.lineNo))