def canonicalize(self, mol): """Return a canonical tautomer by enumerating and scoring all possible tautomers. :param mol: The input molecule. :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` :return: The canonical tautomer. :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` """ # TODO: Overload the mol parameter to pass a list of pre-enumerated tautomers tautomers = self._enumerate_tautomers(mol) if len(tautomers) == 1: return tautomers[0] # Calculate score for each tautomer highest = None for t in tautomers: smiles = Chem.MolToSmiles(t, isomericSmiles=True) logger.debug('Tautomer: %s', smiles) score = 0 # Add aromatic ring scores ssr = Chem.GetSymmSSSR(t) for ring in ssr: btypes = { t.GetBondBetweenAtoms(*pair).GetBondType() for pair in pairwise(ring) } elements = { t.GetAtomWithIdx(idx).GetAtomicNum() for idx in ring } if btypes == {BondType.AROMATIC}: logger.debug('Score +100 (aromatic ring)') score += 100 if elements == {6}: logger.debug('Score +150 (carbocyclic aromatic ring)') score += 150 # Add SMARTS scores for tscore in self.scores: for match in t.GetSubstructMatches(tscore.smarts): logger.debug('Score %+d (%s)', tscore.score, tscore.name) score += tscore.score # Add (P,S,Se,Te)-H scores for atom in t.GetAtoms(): if atom.GetAtomicNum() in {15, 16, 34, 52}: hs = atom.GetTotalNumHs() if hs: logger.debug('Score %+d (%s-H bonds)', -hs, atom.GetSymbol()) score -= hs # Set as highest if score higher or if score equal and smiles comes first alphabetically if not highest or highest['score'] < score or ( highest['score'] == score and smiles < highest['smiles']): logger.debug('New highest tautomer: %s (%s)', smiles, score) highest = {'smiles': smiles, 'tautomer': t, 'score': score} return highest['tautomer']
def canonicalize(self, mol): """Return a canonical tautomer by enumerating and scoring all possible tautomers. :param mol: The input molecule. :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` :return: The canonical tautomer. :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` """ # TODO: Overload the mol parameter to pass a list of pre-enumerated tautomers tautomers = self._enumerate_tautomers(mol) if len(tautomers) == 1: return tautomers[0] # Calculate score for each tautomer highest = None for t in tautomers: smiles = Chem.MolToSmiles(t, isomericSmiles=True) logger.debug('Tautomer: %s', smiles) score = 0 # Add aromatic ring scores ssr = Chem.GetSymmSSSR(t) for ring in ssr: btypes = {t.GetBondBetweenAtoms(*pair).GetBondType() for pair in pairwise(ring)} elements = {t.GetAtomWithIdx(idx).GetAtomicNum() for idx in ring} if btypes == {BondType.AROMATIC}: logger.debug('Score +100 (aromatic ring)') score += 100 if elements == {6}: logger.debug('Score +150 (carbocyclic aromatic ring)') score += 150 # Add SMARTS scores for tscore in self.scores: for _ in t.GetSubstructMatches(tscore.smarts): logger.debug('Score %+d (%s)', tscore.score, tscore.name) score += tscore.score # Add (P,S,Se,Te)-H scores for atom in t.GetAtoms(): if atom.GetAtomicNum() in {15, 16, 34, 52}: hs = atom.GetTotalNumHs() if hs: logger.debug('Score %+d (%s-H bonds)', -hs, atom.GetSymbol()) score -= hs # Set as highest if score higher or if score equal and smiles comes first alphabetically if not highest or highest['score'] < score or (highest['score'] == score and smiles < highest['smiles']): logger.debug('New highest tautomer: %s (%s)', smiles, score) highest = {'smiles': smiles, 'tautomer': t, 'score': score} return highest['tautomer']
def compute_score(self, mol, returndetails=False, log=False): """ Return a canonical tautomer by enumerating and scoring all possible tautomers. :param mol: The input molecule. :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` :return: The canonical tautomer. :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` """ t_scores = [] scores_detail = [] t_depict = [] if isinstance(mol, SmallMol): mol = mol._mol # TODO: Overload the mol parameter to pass a list of pre-enumerated tautomers tautomers = self._enumerate_tautomers(mol) for t in tautomers: tmp_score_details = { 'ArRing': 0, 'CarbArRing': 0, 'MatchFeature': [0, []], 'Penalty': [0, []], 'Conjugate': 0 } smiles = Chem.MolToSmiles(t, isomericSmiles=True) if log: print('Tautomer: %s', smiles) score = 0 # Add aromatic ring scores ssr = Chem.GetSymmSSSR(t) for ring in ssr: btypes = { t.GetBondBetweenAtoms(*pair).GetBondType() for pair in pairwise(ring) } elements = { t.GetAtomWithIdx(idx).GetAtomicNum() for idx in ring } if btypes == {BondType.AROMATIC}: if log: print('Score +100 (aromatic ring)') score += 100 tmp_score_details['ArRing'] += 1 if elements == {6}: if log: print('Score +150 (carbocyclic aromatic ring)') score += 150 tmp_score_details['CarbArRing'] += 1 # Add SMARTS scores, Chem.MolToSmiles(t)) for tscore in self.scores: for match in t.GetSubstructMatches(tscore.smarts): if log: print('Score %+d (%s)' % (tscore.score, tscore.name)) score += tscore.score tmp_score_details['MatchFeature'][0] += 1 tmp_score_details['MatchFeature'][1].append(tscore.name) # Add (P,S,Se,Te)-H scores for atom in t.GetAtoms(): if atom.GetAtomicNum() in {15, 16, 34, 52}: hs = atom.GetTotalNumHs() if hs: if log: print('Score %+d (%s-H bonds)' % (-hs, atom.GetSymbol())) score -= hs tmp_score_details['Penalty'][0] += 1 tmp_score_details['Penalty'][1].append( atom.GetSymbol()) # compute the conjuggate system n_conjugate, depictionatoms = self.get_conjugate(t) t_depict.append(depictionatoms) tmp_score_details['Conjugate'] = n_conjugate score += n_conjugate * 2 scores_detail.append(tmp_score_details) t_scores.append(score) if returndetails: return [SmallMol(tautomer) for tautomer in tautomers ], t_scores, t_depict, scores_detail return tautomers, t_scores
def compute_score(self, mol, returndetails=False, log=False): """ Return a canonical tautomer by enumerating and scoring all possible tautomers. :param mol: The input molecule. :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` :return: The canonical tautomer. :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` """ t_scores = [] scores_detail = [] t_depict = [] if isinstance(mol, SmallMol): mol = mol._mol # TODO: Overload the mol parameter to pass a list of pre-enumerated tautomers tautomers = self._enumerate_tautomers(mol) for t in tautomers: tmp_score_details = {'ArRing': 0, 'CarbArRing': 0, 'MatchFeature': [0, []], 'Penalty': [0, []], 'Conjugate': 0} smiles = Chem.MolToSmiles(t, isomericSmiles=True) if log: print('Tautomer: %s', smiles) score = 0 # Add aromatic ring scores ssr = Chem.GetSymmSSSR(t) for ring in ssr: btypes = {t.GetBondBetweenAtoms(*pair).GetBondType() for pair in pairwise(ring)} elements = {t.GetAtomWithIdx(idx).GetAtomicNum() for idx in ring} if btypes == {BondType.AROMATIC}: if log: print('Score +100 (aromatic ring)') score += 100 tmp_score_details['ArRing'] += 1 if elements == {6}: if log: print('Score +150 (carbocyclic aromatic ring)') score += 150 tmp_score_details['CarbArRing'] += 1 # Add SMARTS scores, Chem.MolToSmiles(t)) for tscore in self.scores: for match in t.GetSubstructMatches(tscore.smarts): if log: print('Score %+d (%s)' % (tscore.score, tscore.name)) score += tscore.score tmp_score_details['MatchFeature'][0] += 1 tmp_score_details['MatchFeature'][1].append(tscore.name) # Add (P,S,Se,Te)-H scores for atom in t.GetAtoms(): if atom.GetAtomicNum() in {15, 16, 34, 52}: hs = atom.GetTotalNumHs() if hs: if log: print('Score %+d (%s-H bonds)' % (-hs, atom.GetSymbol())) score -= hs tmp_score_details['Penalty'][0] += 1 tmp_score_details['Penalty'][1].append(atom.GetSymbol()) # compute the conjuggate system n_conjugate, depictionatoms = self.get_conjugate(t) t_depict.append(depictionatoms) tmp_score_details['Conjugate'] = n_conjugate score += n_conjugate * 2 scores_detail.append(tmp_score_details) t_scores.append(score) if returndetails: return [SmallMol(tautomer) for tautomer in tautomers], t_scores, t_depict, scores_detail return tautomers, t_scores