def getMaximumCommonSubstructure(smallmol_list, removeHs=True, returnAtomIdxs=False): """ Returns the maximum common substructure and two list of lists. The first one contains for each molecules the atom indexes that are part of the MCS, the second list contains the indexes that are not part of the MCS. Parameters ---------- smallmol_list: list The list of SmallMol objects removeHs: bool If True, the atom the hydrogens where not considered Default: True returnAtomIdxs: bool If True, the lists of the atom indexes are returned Default: False Returns ------- mcs_mol: rdkit.Chem.rdchem.Mol The MCS molecule atom_mcs_list: list A list of lists containing the atom indexes that are part of the MCS atom_no_mcs_list: list A list of lists containing the atom indexes that are not part of the MCS """ from rdkit.Chem import rdFMCS smallmol_list = [sm.copy() for sm in smallmol_list] for sm in smallmol_list: AllChem.EmbedMolecule(sm._mol, AllChem.ETKDG()) if removeHs: sm._mol = Chem.RemoveHs(sm._mol) rdkitMols_list = [sm._mol for sm in smallmol_list] mcs = rdFMCS.FindMCS(rdkitMols_list) logger.info('MCS found a substructure of {} atoms and {} bonds'.format( mcs.numAtoms, mcs.numBonds)) mcs_mol = Chem.MolFromSmarts(mcs.smartsString) if not returnAtomIdxs: return mcs_mol atoms_mcs_list = [] atoms_no_mcs_list = [] for sm, m in zip(smallmol_list, rdkitMols_list): match = m.GetSubstructMatch(mcs_mol) sel_str = convertToString(match) atoms_mcs = sm.get('idx', 'idx {}'.format(sel_str)) atoms_no_mcs = sm.get('idx', 'idx {}'.format(sel_str), invert=True) atoms_mcs_list.append(atoms_mcs.tolist()) atoms_no_mcs_list.append(atoms_no_mcs.tolist()) return mcs_mol, atoms_mcs_list, atoms_no_mcs_list
def get(self, returnField, sel='all', convertType=True, invert=False): """ Returns the property for the atom specified with the selection. The selection is another atom property Parameters ---------- returnField: str The field of the atom to return sel: str The selection string. atom field name followed by spaced values for that field convertType: bool If True, and where possible the returnField is converted in rdkit object Default: True invert: bool If True, the selection is inverted Default: False Returns ------- values: np.array The array of values for the property Example ------- >>> sm.get('element', 'idx 0 1 7') # doctest: +SKIP array(['C', 'C', 'H'], dtype='<U1') >>> sm.get('hybridization', 'element N') # doctest: +SKIP array([rdkit.Chem.rdchem.HybridizationType.SP2, rdkit.Chem.rdchem.HybridizationType.SP2], dtype=object) >>> sm.get('hybridization', 'element N', convertType=False) array([3, 3]) >>> sm.get('element', 'hybridization sp2') # doctest: +SKIP array(['C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N'], dtype='<U1') >>> sm.get('element', 'hybridization S') # doctest: +SKIP array(['H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'], dtype='<U1') >>> sm.get('element', 'hybridization 1') # doctest: +SKIP array(['H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'], dtype='<U1') >>> sm.get('atomobject', 'element N') # doctest: +SKIP array([<rdkit.Chem.rdchem.Atom object at 0x7faf616dd120>, <rdkit.Chem.rdchem.Atom object at 0x7faf616dd170>], dtype=object) """ if sel == 'all': sel = 'idx {}'.format(convertToString(self._idx.tolist())) # get the field key and the value to grep key = sel.split()[0] selector = sel.split()[1:] if key not in self._atom_fields: raise KeyError('The property passed {} does not exist'.format(key)) if len(selector) == 0: raise ValueError('No selection was provided') # get the returnField and process exceptional field if hasattr(self, '_'+key): _arrayFrom = self.__getattribute__('_'+key) else: _arrayFrom = self.__getattribute__(key) # special selector for hybridization: can be idx, or rdkit.Chem.rdchem.HybridizationType if key == 'hybridization': try: selector = [_hybridizations_StringToType[s.upper()] for s in selector] except: pass _dtype = self._dtypes[key] if _dtype is not object: selector = [_dtype(s) for s in selector] idxs = np.concatenate([np.where(_arrayFrom == s)[0] for s in selector]) if invert: idxs = np.array([i for i in self._idx if i not in idxs]) idxs = np.sort(idxs) if returnField == 'atomobject': return self.getAtoms()[idxs] elif returnField == 'bondtype': if convertType: return np.array([[_bondtypes_IdxToType[bt] for bt in neighbt] for neighbt in self._neighborsbondtype[idxs]], dtype=object) else: return self._neighborsbondtype[idxs] elif returnField == 'hybridization' and convertType: _arrayTo = np.array([_hybridizations_IdxToType[v] for v in self._hybridization], dtype=object) else: _arrayTo = self.__getattribute__('_'+returnField) return _arrayTo[idxs]