Ejemplo n.º 1
0
  def testCxSmiles(self):
    m = Chem.MolFromSmiles(
      'C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 |o1:8,5,&1:1,3,r,c:11,18,t:9,15|')

    self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer),
                     'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0')

    self.assertEqual(
      rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer, True),
      'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0 |o1:1,&1:14,16|')
Ejemplo n.º 2
0
def init_molecule_name(mol):
    """Initialize the name of a molecule if not provided.

    If the molecule has no `_Name` property then it is
    set as a hash computed by ``rdkit.Chem.rdMolHash``.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol

    Notes
    -----
    Since rdkit 2020.09.01 the GetMolHashString has been
    deprecated. If using an rdkit version >= 2020.09.01
    the function sets the name to 'MolNode-' plus the
    canonical smiles. This prevents collisions with
    scaffolds which are hashed using their canonical smiles.

    """
    if not mol.HasProp('_Name') or mol.GetProp('_Name') == '':
        if rdversion < '2020.09.01':
            n = rdMolHash.GenerateMoleculeHashString(mol)
        else:  # New version deprecated GenrateMolHashString
            hashf = rdMolHash.HashFunction.CanonicalSmiles
            n = 'MolNode-' + rdMolHash.MolHash(mol, hashf)
        mol.SetProp('_Name', n)
Ejemplo n.º 3
0
 def test1(self):
   m = Chem.MolFromSmiles('C1CCCC(O)C1c1ccnc(OC)c1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.AnonymousGraph),
                    '***1****(*2*****2*)*1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ElementGraph),
                    'COC1CC(C2CCCCC2O)CCN1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.CanonicalSmiles),
                    'COc1cc(C2CCCCC2O)ccn1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.MurckoScaffold),
                    'c1cc(C2CCCCC2)ccn1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ExtendedMurcko),
                    '*c1cc(C2CCCCC2*)ccn1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.MolFormula), 'C12H17NO2')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.AtomBondCounts), '15,16')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.DegreeVector), '0,4,9,2')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.Mesomer),
                    'CO[C]1[CH][C](C2CCCCC2O)[CH][CH][N]1_0')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.Regioisomer),
                    '*O.*O*.C.C1CCCCC1.c1ccncc1')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.NetCharge), '0')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.SmallWorldIndexBR), 'B16R2')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.SmallWorldIndexBRL), 'B16R2L9')
   self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ArthorSubstructureOrder),
                    '000f001001000c000300005f000000')
Ejemplo n.º 4
0
def canonicalHash(m):
    return rdMolHash.MolHash(m, rdMolHash.HashFunction.CanonicalSmiles)
Ejemplo n.º 5
0
def tautomerHash(m):
    return rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer)
Ejemplo n.º 6
0
handles data registration settings.
"""
import rdkit
from bson import Binary
from rdkit import Chem
from rdkit.Chem import rdMolHash
import pickle

DEFAULT_SCHEME_NAME = 'default'
DEFAULT_AUTHOR = 'package-native'
DEFAULT_PREPROCESS = False
DEFAULT_INDEX = 'inchikey_standard'

HASH_FUNCTIONS = {}
for k, v in rdMolHash.HashFunction.names.items():
    HASH_FUNCTIONS[k] = lambda rdmol, f=v: rdMolHash.MolHash(rdmol, f)
HASH_FUNCTIONS['inchi_standard'] = Chem.MolToInchi
HASH_FUNCTIONS['inchikey_standard'] = Chem.MolToInchiKey
HASH_FUNCTIONS['inchi_KET_15T'] = lambda rdmol: Chem.MolToInchi(
    rdmol, options='-KET -15T')
HASH_FUNCTIONS['inchikey_KET_15T'] = lambda rdmol: Chem.MolToInchiKey(
    rdmol, options='-KET -15T')
HASH_FUNCTIONS['noiso_smiles'] = lambda rdmol: Chem.MolToSmiles(
    rdmol, isomericSmiles=False)
HASH_FUNCTIONS['cx_smiles'] = Chem.MolToCXSmiles


class MolDocScheme():
    def __init__(self):
        self.scheme_name = DEFAULT_SCHEME_NAME
        self.author = DEFAULT_AUTHOR