def all_atom_coords(mol: rdkit.Mol, conformer=-1): """ Yields the coordinates of atoms in :attr:`mol`. Parameters ---------- conformer : :class:`int`, optional The id of the conformer to be used. Yields ------ :class:`tuple` The yielded :class:`tuple` has the form .. code-block:: python (32, numpy.array([12, 34, 3])) Where the first element is the atom id and the second element is an array holding the coordinates of the atom. """ # Get the conformer from the rdkit instance. conf = mol.GetConformer(conformer) # Go through all the atoms and ask the conformer to return # the position of each atom. This is done by supplying the # conformers `GetAtomPosition` method with the atom's id. for atom in mol.GetAtoms(): atom_id = atom.GetIdx() atom_position = conf.GetAtomPosition(atom_id) yield atom_id, np.array([*atom_position])
def mol_to_atom_feats_and_adjacency_list(mol: AllChem.Mol, atom_map_to_index_map=None, params: AtomFeatParams = None): """ :param atom_map_to_index_map: if you pass this in it will use the defined indices for each atom. Otherwise will use rdkit default indexing. """ params = AtomFeatParams() if params is None else params atoms = mol.GetAtoms() num_atoms = len(atoms) node_feats = np.zeros((num_atoms, params.atom_feature_length), dtype=np.float32) idx_to_atom_map = np.zeros(num_atoms, dtype=np.float32) if atom_map_to_index_map is None: # then we will create this map atom_map_to_index_map = {} use_supplied_idx_flg = False else: # we will use the mapping given use_supplied_idx_flg = True assert set(atom_map_to_index_map.values()) == set(range(len(atoms))), \ "if give pre supplied ordering it must be the same size as the molecules trying to order" # First we will create the atom features and the mappings for atom in atoms: props = atom.GetPropsAsDict() am = props['molAtomMapNumber'] # the atom mapping in the file if use_supplied_idx_flg: idx = atom_map_to_index_map[am] else: idx = atom.GetIdx() # goes from 0 to A-1 atom_map_to_index_map[am] = idx idx_to_atom_map[idx] = am atom_features = get_atom_features(atom, params) node_feats[idx, :] = atom_features # Now we will go through and create the adjacency lists adjacency_lists = {k: [] for k in params.bond_names} for bond in mol.GetBonds(): begin = bond.GetBeginAtom() end = bond.GetEndAtom() props_b = begin.GetPropsAsDict() props_e = end.GetPropsAsDict() am_b = props_b['molAtomMapNumber'] am_e = props_e['molAtomMapNumber'] ix_b = atom_map_to_index_map[am_b] ix_e = atom_map_to_index_map[am_e] bond_name = params.get_bond_name(bond) adjacency_lists[bond_name].append((ix_b, ix_e)) # Finally we pack all the results together res = graph_as_adj_list.GraphAsAdjList( node_feats, {k: np.array(v).T for k, v in adjacency_lists.items()}, np.zeros(node_feats.shape[0], dtype=data_types.INT)) return res
def set_position(mol: rdkit.Mol, position, conformer=-1): """ Sets the centroid of the molecule to `position`. Parameters ---------- position : :class:`numpy.array` This array holds the position on which the centroid of the molecule should be placed. conformer : :class:`int`, optional The id of the conformer to be used. Returns ------- :class:`rdkit.Chem.rdchem.Mol` The ``rdkit`` molecule with the centroid placed at `position`. This is the same instance as that in :attr:`Molecule.mol`. """ conf_id = mol.GetConformer(conformer).GetId() # Get the original centroid. centroid = get_centroid(mol, conf_id) # Find out how much it needs to shift to reach `position`. shift = position - centroid # Apply the shift and get the resulting rdkit conformer object. new_conf = apply_shift(mol, shift, conf_id).GetConformer() new_conf.SetId(conf_id) # Replace the old rkdit conformer with one where the centroid # is at `position`. mol.RemoveConformer(conf_id) mol.AddConformer(new_conf) return mol
def calc_features_mol(mol: Chem.Mol, features_generator: FeaturesGenerator): if mol is not None and mol.GetNumHeavyAtoms() > 0: features_mol = features_generator(mol) # for H2 elif mol is not None and mol.GetNumHeavyAtoms() == 0: # not all features_mol are equally long, so use methane as dummy # molecule to determine length features_mol = np.zeros( len(features_generator(Chem.MolFromSmiles('C')))) else: features_mol = None return np.asarray(features_mol)
def make_entry( mol: rdkit.Mol, sa_scorer: SyntheticAccesibilityScorer, ): # Ensure hydrogens are added to molecule. mol = rdkit.AddHs(mol) sascore, scscore, rfmodel = [ sa_scorer.calculate_sa(mol, func) for func in sa_scorer.sa_funcs ] try: fg_name = str( list( filter( lambda x: len( mol.GetSubstructMatch(rdkit.MolFromSmarts(fg_names[x])) ) != 0, fg_names, ) )[0] ) except: fg_name = "" return ( rdkit.MolToSmiles(mol), str(fg_name), float(sascore), float(scscore), float(rfmodel), )
def calculate_normalizers(mol: Chem.Mol, num_confs: int = 200, pruning_thresh: float = 0.05) -> Tuple[float, float]: """Calculates the :math:`E_0` and :math:`Z_0` normalizing constants for a molecule used in the TorsionNet [1]_ paper. Parameters ---------- mol : RDKit Mol The molecule of interest. num_confs : int The number of conformers to generate when calculating the constants. Should equal the number of steps for each episode of the environment containing this molecule. pruning_thresh : float TFD threshold for pruning the conformers of `mol`. References ---------- .. [1] `TorsionNet paper <https://arxiv.org/abs/2006.07078>`_ """ Chem.MMFFSanitizeMolecule(mol) confslist = Chem.EmbedMultipleConfs(mol, numConfs=num_confs) if (len(confslist) < 1): raise Exception('Unable to embed molecule with conformer using rdkit') Chem.MMFFOptimizeMoleculeConfs(mol) mol = prune_conformers(mol, pruning_thresh) energys = get_conformer_energies(mol) E0 = energys.min() Z0 = np.sum(np.exp(-(energys - E0))) mol.RemoveAllConformers() return E0, Z0
def prune_last_conformer( mol: Chem.Mol, tfd_thresh: float, energies: List[float]) -> Tuple[Chem.Mol, List[float]]: """Prunes the last conformer of the molecule. If no conformers in `mol` have a TFD (Torsional Fingerprint Deviation) with the last conformer of less than `tfd_thresh`, the last conformer is kept. Otherwise, the lowest energy conformer with TFD less than `tfd_thresh` is kept and all other conformers are discarded. Parameters ---------- mol : RDKit Mol The molecule to be pruned. The conformers in the molecule should be ordered by ascending energy. tfd_thresh : float The minimum threshold for TFD between conformers. energies : list of float A list of all the energies of the conformers in `mol`. Returns ------- mol : RDKit Mol The updated molecule after pruning, with conformers sorted by ascending energy. energies : list of float A list of all the energies of the conformers in `mol` after pruning and sorting by ascending energy. """ if tfd_thresh < 0 or mol.GetNumConformers() <= 1: return mol, energies idx = bisect.bisect(energies[:-1], energies[-1]) tfd = TorsionFingerprints.GetTFDBetweenConformers( mol, range(0, mol.GetNumConformers() - 1), [mol.GetNumConformers() - 1], useWeights=False) tfd = np.array(tfd) # if lower energy conformer is within threshold, drop new conf if not np.all(tfd[:idx] >= tfd_thresh): energies = energies[:-1] mol.RemoveConformer(mol.GetNumConformers() - 1) return mol, energies else: keep = list(range(0, idx)) keep.append(mol.GetNumConformers() - 1) keep += [ x for x in range(idx, mol.GetNumConformers() - 1) if tfd[x] >= tfd_thresh ] new = Chem.Mol(mol) new.RemoveAllConformers() for i in keep: conf = mol.GetConformer(i) new.AddConformer(conf, assignId=True) return new, [energies[i] for i in keep]
def get_max_diameter(mol: rdkit.Mol, conf=-1) -> float: """Gets the largest distance between two atoms in a molecule. Args: mol: Molecule to calculate diameter of. conf: Conformer to use to calculate diameter Returns: maxd: Maximum diameter. """ maxid1, maxid2 = max( (x for x in it.combinations(range(mol.GetNumAtoms()), 2)), key=lambda x: atom_distance(mol, *x, conf), ) maxd = atom_distance(mol, maxid1, maxid2, conf) maxd += (atom_vdw_radii[mol.GetAtomWithIdx(maxid1).GetSymbol()] + atom_vdw_radii[mol.GetAtomWithIdx(maxid2).GetSymbol()]) return maxd
def prune_conformers(mol: Chem.Mol, tfd_thresh: float) -> Chem.Mol: """Prunes all the conformers in the molecule. Removes conformers that have a TFD (torsional fingerprint deviation) lower than `tfd_thresh` with other conformers. Lowest energy conformers are kept. Parameters ---------- mol : RDKit Mol The molecule to be pruned. tfd_thresh : float The minimum threshold for TFD between conformers. Returns ------- mol : RDKit Mol The updated molecule after pruning. """ if tfd_thresh < 0 or mol.GetNumConformers() <= 1: return mol energies = get_conformer_energies(mol) tfd = tfd_matrix(mol) sort = np.argsort(energies) # sort by increasing energy keep = [] # always keep lowest-energy conformer discard = [] for i in sort: this_tfd = tfd[i][np.asarray(keep, dtype=int)] # discard conformers within the tfd threshold if np.all(this_tfd >= tfd_thresh): keep.append(i) else: discard.append(i) # create a new molecule to hold the chosen conformers # this ensures proper conformer IDs and energy-based ordering new = Chem.Mol(mol) new.RemoveAllConformers() for i in keep: conf = mol.GetConformer(int(i)) new.AddConformer(conf, assignId=True) return new
def get_conformer_energy(mol: Chem.Mol, confId: int = None) -> float: """Returns the energy of the conformer with `confId` in `mol`. """ if confId is None: confId = mol.GetNumConformers() - 1 Chem.MMFFSanitizeMolecule(mol) mmff_props = Chem.MMFFGetMoleculeProperties(mol) ff = Chem.MMFFGetMoleculeForceField(mol, mmff_props, confId=confId) energy = ff.CalcEnergy() return energy
def get_cavity_size(mol: rdkit.Mol, origin, conformer): """Calculates diameter of the conformer from `origin`. The cavity is measured by finding the atom nearest to `origin`, correcting for van der Waals diameter and multiplying by -2. Args: mol: Molecule to calculate diameter of. origin: Coordinates of the position from which the cavity is measured. conformer: ID of the conformer to use. Returns: (float): Cavity size of the molecule. """ conf = mol.GetConformer(conformer) atom_vdw = np.array( [atom_vdw_radii[x.GetSymbol()] for x in mol.GetAtoms()]) distances = euclidean_distances(conf.GetPositions(), np.matrix(origin)) distances = distances.flatten() - atom_vdw return -2 * min(distances)
def get_conformer_energies(mol: Chem.Mol) -> List[float]: """Returns a list of energies for each conformer in `mol`. """ energies = [] Chem.MMFFSanitizeMolecule(mol) mmff_props = Chem.MMFFGetMoleculeProperties(mol) for conf in mol.GetConformers(): ff = Chem.MMFFGetMoleculeForceField(mol, mmff_props, confId=conf.GetId()) energy = ff.CalcEnergy() energies.append(energy) return np.asarray(energies, dtype=float)
def change_mol_bond(mol: AllChem.Mol, diff_mode: chem_details.ElectronMode, bond_to_change_indcs: typing.Tuple[int, int]): """ Change a molecule by adding or removing a pair of electrons from a bond. """ ed_mol = Chem.RWMol(mol) exists = mol.GetBondBetweenAtoms(bond_to_change_indcs[0], bond_to_change_indcs[1]) # Either we are reducing the number of pairs of electrons in the bond by one. if diff_mode is chem_details.ElectronMode.REMOVE: # a. we first remove the bond: ed_mol.RemoveBond(bond_to_change_indcs[0], bond_to_change_indcs[1]) # b. we then (if it had more than one pair of electrons) add it back with one less pair of electrons than before: if exists: bt_d = exists.GetBondTypeAsDouble() if bt_d - 1 != 0: new_bt = NUM_TO_BOND[bt_d - 1] ed_mol.AddBond(bond_to_change_indcs[0], bond_to_change_indcs[1], order=new_bt) # Or we are increasing the number of pairs of electrons in the bond by one. elif diff_mode is chem_details.ElectronMode.ADD: if exists: # a. if it already exists we remove it and add it back with an extra pair of electrons bt_d = exists.GetBondTypeAsDouble() if bt_d + 1 not in NUM_TO_BOND: new_bt = NUM_TO_BOND[ bt_d] # if already at maximum we leave it as it is (we do not deal with aromatic) else: new_bt = NUM_TO_BOND[bt_d + 1] ed_mol.RemoveBond(bond_to_change_indcs[0], bond_to_change_indcs[1]) ed_mol.AddBond(bond_to_change_indcs[0], bond_to_change_indcs[1], order=new_bt) else: # b. if it does not exist then we create a single bond. ed_mol.AddBond(*bond_to_change_indcs, order=NUM_TO_BOND[1]) else: raise RuntimeError("Invalid mode: {}".format(diff_mode)) new_mol = ed_mol.GetMol() return new_mol
def atom_distance(mol: rdkit.Mol, atom1_id: int, atom2_id: int, conf=-1) -> float: """Gets distance between two atoms. Args: mol: Molecule containing the atoms. atom1_id: ID of first atom. atom2_id: ID of second atom. conf: Confomrer of `mol` to use. Returns: (float): Euclidean distance between two atoms. """ # Get the atomic positions of each atom and use the scipy # function to calculate their distance in Euclidean space. conf = mol.GetConformer(conf) atom1_coords = conf.GetAtomPosition(atom1_id) atom2_coords = conf.GetAtomPosition(atom2_id) return euclidean(atom1_coords, atom2_coords)
def apply_shift(mol: rdkit.Mol, shift, conformer=-1): """ Shifts the coordinates of all atoms. This does not modify the molecule. A modified copy is returned. Parameters ---------- shift : :class:`numpy.array` A numpy array holding the value of the shift along each axis. conformer : :class:`int`, optional The id of the conformer to use. Returns ------- :class:`rdkit.Chem.rdchem.Mol` A copy of the molecule where the coordinates have been shifted by `shift`. """ # The function does not modify the existing conformer, as a # result a new instance is created and used for modification. conf = rdkit.Conformer(mol.GetConformer(conformer)) # For each atom, get the atomic positions from the conformer # and shift them. Create a new geometry instance from these new # coordinate values. The geometry instance is used by rdkit to # store the coordinates of atoms. Finally, set the conformers # atomic position to the values stored in this newly generated # geometry instance. for atom in mol.GetAtoms(): # Remember the id of the atom you are currently using. It # is used to change the position of the correct atom at the # end of the loop. atom_id = atom.GetIdx() # `atom_position` in an instance holding in the x, y and z # coordinates of an atom in its 'x', 'y' and 'z' # attributes. atom_position = np.array(conf.GetAtomPosition(atom_id)) # Inducing the shift. new_atom_position = atom_position + shift # Creating a new geometry instance. new_coords = Point3D(*new_atom_position) # Changes the position of the atom in the conformer to the # values stored in the new geometry instance. conf.SetAtomPosition(atom_id, new_coords) # Create a new copy of the rdkit molecule instance representing # the molecule - the original instance is not to be modified. new_mol = rdkit.Mol(mol) # The new rdkit molecule was copied from the one held in the # `mol` attribute, as result it has a copy of its conformer. To # prevent the rdkit molecule from holding multiple conformers # the `RemoveAllConformers` method is run first. The shifted # conformer is then given to the rdkit molecule, which is # returned. new_mol.RemoveAllConformers() new_mol.AddConformer(conf) return new_mol
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: _FunctionalGroups, placer_ids: typing.Optional[abc.Iterable[int]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: The :class:`.FunctionalGroup` instances the building block should have, and / or :class:`.FunctionalGroupFactory` instances used for creating them. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom( id=a.GetIdx(), atomic_number=a.GetAtomicNum(), charge=a.GetFormalCharge(), ) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() Molecule.__init__( self=self, atoms=atoms, bonds=bonds, position_matrix=position_matrix, ) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))
def standardize(compound: AllChem.Mol, add_hs=True, remove_stereo=True, thorough=False) -> AllChem.Mol: """ Standardizes an RDKit molecule by running various cleanup and sanitization operations. Parameters ---------- compound : rdkit.Chem.rdchem.Mol A chemical compound. add_hs : bool If True, adds hydrogens to the compound. remove_stereo : bool If True, removes stereochemistry info from the compound. thorough : bool If True, removes charge, isotopes, and small fragments from the compound. Returns ------- rdkit.Chem.rdchem.Mol The standardized compound. """ # basic cleanup Chem.Cleanup(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) AllChem.AssignStereochemistry(compound, cleanIt=True, force=True, flagPossibleStereoCenters=True) # remove isotopes, neutralize charge if thorough: for atom in compound.GetAtoms(): atom.SetIsotope(0) compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # remove stereochemistry if remove_stereo: Chem.RemoveStereochemistry(compound) # commute inchi compound = _commute_inchi(compound) # keep biggest fragment if thorough: compound = _strip_small_fragments(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # neutralize charge compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # add protons if add_hs: return Chem.AddHs(compound, explicitOnly=False, addCoords=True) return compound
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: typing.Iterable[typing.Union[ FunctionalGroup, FunctionalGroupFactory]], placer_ids: typing.Optional[tuple[int, ...]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: An :class:`iterable` of :class:`.FunctionalGroup` or :class:`.FunctionalGroupFactory` or both. :class:`.FunctionalGroup` instances are added to the building block and :class:`.FunctionalGroupFactory` instances are used to create :class:`.FunctionalGroup` instances the building block should hold. :class:`.FunctionalGroup` instances are used to identify which atoms are modified during :class:`.ConstructedMolecule` construction. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge()) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() super().__init__(atoms, bonds, position_matrix) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))