def set_position(mol: rdkit.Mol, position, conformer=-1): """ Sets the centroid of the molecule to `position`. Parameters ---------- position : :class:`numpy.array` This array holds the position on which the centroid of the molecule should be placed. conformer : :class:`int`, optional The id of the conformer to be used. Returns ------- :class:`rdkit.Chem.rdchem.Mol` The ``rdkit`` molecule with the centroid placed at `position`. This is the same instance as that in :attr:`Molecule.mol`. """ conf_id = mol.GetConformer(conformer).GetId() # Get the original centroid. centroid = get_centroid(mol, conf_id) # Find out how much it needs to shift to reach `position`. shift = position - centroid # Apply the shift and get the resulting rdkit conformer object. new_conf = apply_shift(mol, shift, conf_id).GetConformer() new_conf.SetId(conf_id) # Replace the old rkdit conformer with one where the centroid # is at `position`. mol.RemoveConformer(conf_id) mol.AddConformer(new_conf) return mol
def all_atom_coords(mol: rdkit.Mol, conformer=-1): """ Yields the coordinates of atoms in :attr:`mol`. Parameters ---------- conformer : :class:`int`, optional The id of the conformer to be used. Yields ------ :class:`tuple` The yielded :class:`tuple` has the form .. code-block:: python (32, numpy.array([12, 34, 3])) Where the first element is the atom id and the second element is an array holding the coordinates of the atom. """ # Get the conformer from the rdkit instance. conf = mol.GetConformer(conformer) # Go through all the atoms and ask the conformer to return # the position of each atom. This is done by supplying the # conformers `GetAtomPosition` method with the atom's id. for atom in mol.GetAtoms(): atom_id = atom.GetIdx() atom_position = conf.GetAtomPosition(atom_id) yield atom_id, np.array([*atom_position])
def prune_last_conformer( mol: Chem.Mol, tfd_thresh: float, energies: List[float]) -> Tuple[Chem.Mol, List[float]]: """Prunes the last conformer of the molecule. If no conformers in `mol` have a TFD (Torsional Fingerprint Deviation) with the last conformer of less than `tfd_thresh`, the last conformer is kept. Otherwise, the lowest energy conformer with TFD less than `tfd_thresh` is kept and all other conformers are discarded. Parameters ---------- mol : RDKit Mol The molecule to be pruned. The conformers in the molecule should be ordered by ascending energy. tfd_thresh : float The minimum threshold for TFD between conformers. energies : list of float A list of all the energies of the conformers in `mol`. Returns ------- mol : RDKit Mol The updated molecule after pruning, with conformers sorted by ascending energy. energies : list of float A list of all the energies of the conformers in `mol` after pruning and sorting by ascending energy. """ if tfd_thresh < 0 or mol.GetNumConformers() <= 1: return mol, energies idx = bisect.bisect(energies[:-1], energies[-1]) tfd = TorsionFingerprints.GetTFDBetweenConformers( mol, range(0, mol.GetNumConformers() - 1), [mol.GetNumConformers() - 1], useWeights=False) tfd = np.array(tfd) # if lower energy conformer is within threshold, drop new conf if not np.all(tfd[:idx] >= tfd_thresh): energies = energies[:-1] mol.RemoveConformer(mol.GetNumConformers() - 1) return mol, energies else: keep = list(range(0, idx)) keep.append(mol.GetNumConformers() - 1) keep += [ x for x in range(idx, mol.GetNumConformers() - 1) if tfd[x] >= tfd_thresh ] new = Chem.Mol(mol) new.RemoveAllConformers() for i in keep: conf = mol.GetConformer(i) new.AddConformer(conf, assignId=True) return new, [energies[i] for i in keep]
def prune_conformers(mol: Chem.Mol, tfd_thresh: float) -> Chem.Mol: """Prunes all the conformers in the molecule. Removes conformers that have a TFD (torsional fingerprint deviation) lower than `tfd_thresh` with other conformers. Lowest energy conformers are kept. Parameters ---------- mol : RDKit Mol The molecule to be pruned. tfd_thresh : float The minimum threshold for TFD between conformers. Returns ------- mol : RDKit Mol The updated molecule after pruning. """ if tfd_thresh < 0 or mol.GetNumConformers() <= 1: return mol energies = get_conformer_energies(mol) tfd = tfd_matrix(mol) sort = np.argsort(energies) # sort by increasing energy keep = [] # always keep lowest-energy conformer discard = [] for i in sort: this_tfd = tfd[i][np.asarray(keep, dtype=int)] # discard conformers within the tfd threshold if np.all(this_tfd >= tfd_thresh): keep.append(i) else: discard.append(i) # create a new molecule to hold the chosen conformers # this ensures proper conformer IDs and energy-based ordering new = Chem.Mol(mol) new.RemoveAllConformers() for i in keep: conf = mol.GetConformer(int(i)) new.AddConformer(conf, assignId=True) return new
def get_cavity_size(mol: rdkit.Mol, origin, conformer): """Calculates diameter of the conformer from `origin`. The cavity is measured by finding the atom nearest to `origin`, correcting for van der Waals diameter and multiplying by -2. Args: mol: Molecule to calculate diameter of. origin: Coordinates of the position from which the cavity is measured. conformer: ID of the conformer to use. Returns: (float): Cavity size of the molecule. """ conf = mol.GetConformer(conformer) atom_vdw = np.array( [atom_vdw_radii[x.GetSymbol()] for x in mol.GetAtoms()]) distances = euclidean_distances(conf.GetPositions(), np.matrix(origin)) distances = distances.flatten() - atom_vdw return -2 * min(distances)
def atom_distance(mol: rdkit.Mol, atom1_id: int, atom2_id: int, conf=-1) -> float: """Gets distance between two atoms. Args: mol: Molecule containing the atoms. atom1_id: ID of first atom. atom2_id: ID of second atom. conf: Confomrer of `mol` to use. Returns: (float): Euclidean distance between two atoms. """ # Get the atomic positions of each atom and use the scipy # function to calculate their distance in Euclidean space. conf = mol.GetConformer(conf) atom1_coords = conf.GetAtomPosition(atom1_id) atom2_coords = conf.GetAtomPosition(atom2_id) return euclidean(atom1_coords, atom2_coords)
def apply_shift(mol: rdkit.Mol, shift, conformer=-1): """ Shifts the coordinates of all atoms. This does not modify the molecule. A modified copy is returned. Parameters ---------- shift : :class:`numpy.array` A numpy array holding the value of the shift along each axis. conformer : :class:`int`, optional The id of the conformer to use. Returns ------- :class:`rdkit.Chem.rdchem.Mol` A copy of the molecule where the coordinates have been shifted by `shift`. """ # The function does not modify the existing conformer, as a # result a new instance is created and used for modification. conf = rdkit.Conformer(mol.GetConformer(conformer)) # For each atom, get the atomic positions from the conformer # and shift them. Create a new geometry instance from these new # coordinate values. The geometry instance is used by rdkit to # store the coordinates of atoms. Finally, set the conformers # atomic position to the values stored in this newly generated # geometry instance. for atom in mol.GetAtoms(): # Remember the id of the atom you are currently using. It # is used to change the position of the correct atom at the # end of the loop. atom_id = atom.GetIdx() # `atom_position` in an instance holding in the x, y and z # coordinates of an atom in its 'x', 'y' and 'z' # attributes. atom_position = np.array(conf.GetAtomPosition(atom_id)) # Inducing the shift. new_atom_position = atom_position + shift # Creating a new geometry instance. new_coords = Point3D(*new_atom_position) # Changes the position of the atom in the conformer to the # values stored in the new geometry instance. conf.SetAtomPosition(atom_id, new_coords) # Create a new copy of the rdkit molecule instance representing # the molecule - the original instance is not to be modified. new_mol = rdkit.Mol(mol) # The new rdkit molecule was copied from the one held in the # `mol` attribute, as result it has a copy of its conformer. To # prevent the rdkit molecule from holding multiple conformers # the `RemoveAllConformers` method is run first. The shifted # conformer is then given to the rdkit molecule, which is # returned. new_mol.RemoveAllConformers() new_mol.AddConformer(conf) return new_mol
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: typing.Iterable[typing.Union[ FunctionalGroup, FunctionalGroupFactory]], placer_ids: typing.Optional[tuple[int, ...]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: An :class:`iterable` of :class:`.FunctionalGroup` or :class:`.FunctionalGroupFactory` or both. :class:`.FunctionalGroup` instances are added to the building block and :class:`.FunctionalGroupFactory` instances are used to create :class:`.FunctionalGroup` instances the building block should hold. :class:`.FunctionalGroup` instances are used to identify which atoms are modified during :class:`.ConstructedMolecule` construction. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge()) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() super().__init__(atoms, bonds, position_matrix) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: _FunctionalGroups, placer_ids: typing.Optional[abc.Iterable[int]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: The :class:`.FunctionalGroup` instances the building block should have, and / or :class:`.FunctionalGroupFactory` instances used for creating them. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom( id=a.GetIdx(), atomic_number=a.GetAtomicNum(), charge=a.GetFormalCharge(), ) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() Molecule.__init__( self=self, atoms=atoms, bonds=bonds, position_matrix=position_matrix, ) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))