Beispiel #1
0
def getMaximumCommonSubstructure(smallmol_list,
                                 removeHs=True,
                                 returnAtomIdxs=False):
    """
    Returns the maximum common substructure and two list of lists. The first one contains for each molecules the atom
    indexes that are part of the MCS, the second list contains the indexes that are not part of the MCS.

    Parameters
    ----------
    smallmol_list: list
        The list of SmallMol objects
    removeHs: bool
        If True, the atom the hydrogens where not considered
        Default: True
    returnAtomIdxs: bool
        If True, the lists of the atom indexes are returned
        Default: False

    Returns
    -------
    mcs_mol: rdkit.Chem.rdchem.Mol
        The MCS molecule
    atom_mcs_list: list
        A list of lists containing the atom indexes that are part of the MCS
    atom_no_mcs_list: list
        A list of lists containing the atom indexes that are not part of the MCS
    """
    from rdkit.Chem import rdFMCS

    smallmol_list = [sm.copy() for sm in smallmol_list]
    for sm in smallmol_list:
        AllChem.EmbedMolecule(sm._mol, AllChem.ETKDG())
        if removeHs:
            sm._mol = Chem.RemoveHs(sm._mol)
    rdkitMols_list = [sm._mol for sm in smallmol_list]

    mcs = rdFMCS.FindMCS(rdkitMols_list)

    logger.info('MCS found a substructure of {} atoms and {} bonds'.format(
        mcs.numAtoms, mcs.numBonds))

    mcs_mol = Chem.MolFromSmarts(mcs.smartsString)

    if not returnAtomIdxs:
        return mcs_mol

    atoms_mcs_list = []
    atoms_no_mcs_list = []
    for sm, m in zip(smallmol_list, rdkitMols_list):
        match = m.GetSubstructMatch(mcs_mol)
        sel_str = convertToString(match)

        atoms_mcs = sm.get('idx', 'idx {}'.format(sel_str))
        atoms_no_mcs = sm.get('idx', 'idx {}'.format(sel_str), invert=True)

        atoms_mcs_list.append(atoms_mcs.tolist())
        atoms_no_mcs_list.append(atoms_no_mcs.tolist())

    return mcs_mol, atoms_mcs_list, atoms_no_mcs_list
Beispiel #2
0
    def get(self, returnField, sel='all', convertType=True, invert=False):
        """
        Returns the property for the atom specified with the selection. The selection is another atom property

        Parameters
        ----------
        returnField: str
            The field of the atom to return
        sel: str
            The selection string. atom field name followed by spaced values for that field
        convertType: bool
            If True, and where possible the returnField is converted in rdkit object
            Default: True
        invert: bool
            If True, the selection is inverted
            Default: False

        Returns
        -------
        values: np.array
            The array of values for the property

        Example
        -------
        >>> sm.get('element', 'idx 0 1 7')  # doctest: +SKIP
        array(['C', 'C', 'H'],
              dtype='<U1')
        >>> sm.get('hybridization', 'element N')  # doctest: +SKIP
        array([rdkit.Chem.rdchem.HybridizationType.SP2,
               rdkit.Chem.rdchem.HybridizationType.SP2], dtype=object)
        >>> sm.get('hybridization', 'element N', convertType=False)
        array([3, 3])
        >>> sm.get('element', 'hybridization sp2')  # doctest: +SKIP
        array(['C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N'],
              dtype='<U1')
        >>> sm.get('element', 'hybridization S')  # doctest: +SKIP
        array(['H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'],
              dtype='<U1')
        >>> sm.get('element', 'hybridization 1')  # doctest: +SKIP
        array(['H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'],
              dtype='<U1')
        >>> sm.get('atomobject', 'element N')  # doctest: +SKIP
        array([<rdkit.Chem.rdchem.Atom object at 0x7faf616dd120>,
               <rdkit.Chem.rdchem.Atom object at 0x7faf616dd170>], dtype=object)
        """
        if sel == 'all':
            sel = 'idx {}'.format(convertToString(self._idx.tolist()))
        # get the field key and the value to grep
        key = sel.split()[0]
        selector = sel.split()[1:]

        if key not in self._atom_fields:
            raise KeyError('The property passed {} does not exist'.format(key))
        if len(selector) == 0:
            raise ValueError('No selection was provided')

        # get the returnField and process exceptional field
        if hasattr(self, '_'+key):
            _arrayFrom = self.__getattribute__('_'+key)
        else:
            _arrayFrom = self.__getattribute__(key)

        # special selector for hybridization: can be idx, or rdkit.Chem.rdchem.HybridizationType
        if key == 'hybridization':
            try:
                selector = [_hybridizations_StringToType[s.upper()] for s in selector]
            except:
                pass

        _dtype = self._dtypes[key]
        if _dtype is not object:
            selector = [_dtype(s) for s in selector]
        idxs = np.concatenate([np.where(_arrayFrom == s)[0] for s in selector])
        if invert:
            idxs = np.array([i for i in self._idx if i not in idxs])
        idxs = np.sort(idxs)

        if returnField == 'atomobject':
            return self.getAtoms()[idxs]
        elif returnField == 'bondtype':
            if convertType:
                return np.array([[_bondtypes_IdxToType[bt] for bt in neighbt] for neighbt in self._neighborsbondtype[idxs]], dtype=object)
            else:
                return self._neighborsbondtype[idxs]
        elif returnField == 'hybridization' and convertType:
            _arrayTo = np.array([_hybridizations_IdxToType[v] for v in self._hybridization], dtype=object)
        else:
            _arrayTo = self.__getattribute__('_'+returnField)

        return _arrayTo[idxs]