def testCxSmiles(self): m = Chem.MolFromSmiles( 'C[C@@H](O)[C@@H](C)[C@@H](C)C[C@H](C1=CN=CN1)C1=CNC=N1 |o1:8,5,&1:1,3,r,c:11,18,t:9,15|') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer), 'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0') self.assertEqual( rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer, True), 'C[C@@H](CC([C]1[CH][N][CH][N]1)[C]1[CH][N][CH][N]1)[C@H](C)[C@@H](C)[O]_3_0 |o1:1,&1:14,16|')
def init_molecule_name(mol): """Initialize the name of a molecule if not provided. If the molecule has no `_Name` property then it is set as a hash computed by ``rdkit.Chem.rdMolHash``. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Notes ----- Since rdkit 2020.09.01 the GetMolHashString has been deprecated. If using an rdkit version >= 2020.09.01 the function sets the name to 'MolNode-' plus the canonical smiles. This prevents collisions with scaffolds which are hashed using their canonical smiles. """ if not mol.HasProp('_Name') or mol.GetProp('_Name') == '': if rdversion < '2020.09.01': n = rdMolHash.GenerateMoleculeHashString(mol) else: # New version deprecated GenrateMolHashString hashf = rdMolHash.HashFunction.CanonicalSmiles n = 'MolNode-' + rdMolHash.MolHash(mol, hashf) mol.SetProp('_Name', n)
def test1(self): m = Chem.MolFromSmiles('C1CCCC(O)C1c1ccnc(OC)c1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.AnonymousGraph), '***1****(*2*****2*)*1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ElementGraph), 'COC1CC(C2CCCCC2O)CCN1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.CanonicalSmiles), 'COc1cc(C2CCCCC2O)ccn1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.MurckoScaffold), 'c1cc(C2CCCCC2)ccn1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ExtendedMurcko), '*c1cc(C2CCCCC2*)ccn1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.MolFormula), 'C12H17NO2') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.AtomBondCounts), '15,16') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.DegreeVector), '0,4,9,2') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.Mesomer), 'CO[C]1[CH][C](C2CCCCC2O)[CH][CH][N]1_0') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.Regioisomer), '*O.*O*.C.C1CCCCC1.c1ccncc1') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.NetCharge), '0') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.SmallWorldIndexBR), 'B16R2') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.SmallWorldIndexBRL), 'B16R2L9') self.assertEqual(rdMolHash.MolHash(m, rdMolHash.HashFunction.ArthorSubstructureOrder), '000f001001000c000300005f000000')
def canonicalHash(m): return rdMolHash.MolHash(m, rdMolHash.HashFunction.CanonicalSmiles)
def tautomerHash(m): return rdMolHash.MolHash(m, rdMolHash.HashFunction.HetAtomTautomer)
handles data registration settings. """ import rdkit from bson import Binary from rdkit import Chem from rdkit.Chem import rdMolHash import pickle DEFAULT_SCHEME_NAME = 'default' DEFAULT_AUTHOR = 'package-native' DEFAULT_PREPROCESS = False DEFAULT_INDEX = 'inchikey_standard' HASH_FUNCTIONS = {} for k, v in rdMolHash.HashFunction.names.items(): HASH_FUNCTIONS[k] = lambda rdmol, f=v: rdMolHash.MolHash(rdmol, f) HASH_FUNCTIONS['inchi_standard'] = Chem.MolToInchi HASH_FUNCTIONS['inchikey_standard'] = Chem.MolToInchiKey HASH_FUNCTIONS['inchi_KET_15T'] = lambda rdmol: Chem.MolToInchi( rdmol, options='-KET -15T') HASH_FUNCTIONS['inchikey_KET_15T'] = lambda rdmol: Chem.MolToInchiKey( rdmol, options='-KET -15T') HASH_FUNCTIONS['noiso_smiles'] = lambda rdmol: Chem.MolToSmiles( rdmol, isomericSmiles=False) HASH_FUNCTIONS['cx_smiles'] = Chem.MolToCXSmiles class MolDocScheme(): def __init__(self): self.scheme_name = DEFAULT_SCHEME_NAME self.author = DEFAULT_AUTHOR