Beispiel #1
0
def _asa_uff(mol_complete: Molecule, ligands: Iterable[Molecule],
             core: Molecule, read_template: bool, job: Type[Job],
             settings: Settings) -> Tuple[float, float, float, float, int]:
    r"""Perform an activation strain analyses using RDKit UFF.

    Parameters
    ----------
    mol_complete : |plams.Molecule|
        A Molecule representing the (unfragmented) relaxed structure of the system of interest.

    mol_fragments : :class:`Iterable<collections.abc.Iterable>` [|plams.Molecule|]
        An iterable of Molecules represnting the induvidual moleculair or atomic fragments
        within **mol_complete**.

    /**kwargs : :data:`Any<typing.Any>`
        Used for retaining compatbility with the signature of :func:`._asa_plams`.

    Returns
    -------
    :class:`float`, :class:`float`, :class:`float` and :class:`int`
        The energy of **mol_complete**,
        the energy of **mol_fragments**,
        the energy of an optimized fragment within **mol_fragments** and
        the total number of fragments within **mol_fragments**.

    """
    # Create RDKit molecules
    mol_complete = molkit.to_rdmol(mol_complete)
    rd_ligands = (molkit.to_rdmol(mol) for mol in ligands)

    # Calculate the energy of the total system
    E_complete = UFF(mol_complete,
                     ignoreInterfragInteractions=False).CalcEnergy()

    # Calculate the (summed) energy of each individual fragment in the total system
    E_ligands = 0.0
    E_min = np.inf
    mol_min = None
    for ligand_count, rdmol in enumerate(rd_ligands, 1):
        E = UFF(rdmol, ignoreInterfragInteractions=False).CalcEnergy()
        E_ligands += E
        if E < E_min:
            E_min, mol_min = E, rdmol

    # One of the calculations failed; better stop now
    if np.isnan(E_ligands):
        return np.nan, np.nan, np.nan, np.nan, ligand_count

    # Calculate the energy of an optimizes fragment
    UFF(mol_min, ignoreInterfragInteractions=False).Minimize()
    E_ligand_opt = UFF(mol_min, ignoreInterfragInteractions=False).CalcEnergy()

    E_core = UFF(molkit.to_rdmol(core),
                 ignoreInterfragInteractions=False).CalcEnergy()
    return E_complete, E_ligands, E_core, E_ligand_opt, ligand_count
Beispiel #2
0
def set_qd(qd: Molecule, mol_dict: Settings) -> Molecule:
    """Update quantum dots imported by :func:`.read_mol`."""
    # Create ligand (and anchor) molecules
    ligand = molkit.from_smiles(mol_dict.ligand_smiles)
    ligand_rdmol = molkit.to_rdmol(ligand)
    anchor = molkit.from_smiles(mol_dict.ligand_anchor)
    anchor_rdmol = molkit.to_rdmol(anchor)
    qd_rdmol = molkit.to_rdmol(qd)

    # Create arrays of atomic indices of the core and ligands
    lig_idx = 1 + np.array(qd_rdmol.GetSubstructMatches(ligand_rdmol))
    core_idx = np.arange(1, len(qd))[~lig_idx]
    lig_idx = lig_idx.ravel().tolist()
    core_idx = core_idx.tolist()

    # Guess bonds
    if mol_dict.guess_bonds:
        qd.guess_bonds(atom_subset=[qd[i] for i in lig_idx])

    # Reorder all atoms: core atoms first followed by ligands
    qd.atoms = [qd[i] for i in core_idx] + [qd[j] for i in lig_idx for j in i]

    # Construct a list with the indices of all ligand anchor atoms
    core_idx_max = 1 + len(core_idx)
    _anchor_idx = ligand_rdmol.GetSubstructMatch(anchor_rdmol)[0]
    start = core_idx_max + _anchor_idx
    stop = core_idx_max + _anchor_idx + np.product(lig_idx.shape)
    step = len(ligand)
    anchor_idx = list(range(start, stop, step))

    # Update the properties of **qd**
    for i in anchor_idx:
        qd[i].properties.anchor = True
    qd.properties.indices = list(range(1, core_idx_max)) + anchor_idx
    qd.properties.job_path = []
    qd.properties.name = mol_dict.name
    qd.properties.path = mol_dict.path
    qd.properties.ligand_smiles = Chem.CanonSmiles(mol_dict.ligand_smiles)
    qd.properties.ligand_anchor = f'{ligand[_anchor_idx].symbol}{_anchor_idx}'

    # Update the pdb_info of all atoms
    for i, at in enumerate(qd, 1):
        at.properties.pdb_info.SerialNumber = i
        if i <= core_idx_max:  # A core atom
            at.properties.pdb_info.ResidueNumber = 1
        else:  # A ligand atom
            at.properties.pdb_info.ResidueNumber = 2 + int(
                (i - core_idx_max) / len(ligand))
Beispiel #3
0
def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int],
                                anchor: Atom) -> None:
    """A modified version of the :func:`.global_minimum_scan_rdkit` function.

    * Uses the ligand vector as criteria rather than the energy.
    * Geometry optimizations are constrained during the conformation search.
    * Finish with a final unconstrained geometry optimization.

    See Also
    --------
    :func:`global_minimum_scan_rdkit<scm.plams.recipes.global_minimum.minimum_scan_rdkit>`:
        Optimize the molecule (RDKit UFF) with 3 different values for the given dihedral angle and
        find the lowest energy conformer.

        :param |Molecule| mol: The input molecule
        :param tuple bond_tuple: A 2-tuples containing the atomic indices of valid bonds
        :return |Molecule|: A copy of *mol* with a newly optimized geometry

    """
    # Define a number of variables and create 3 copies of the ligand
    angles = (-120, 0, 120)
    mol_list = [ligand.copy() for _ in range(3)]
    for angle, mol in zip(angles, mol_list):
        bond = mol[bond_tuple]
        atom = mol[bond_tuple[0]]
        mol.rotate_bond(bond, atom, angle, unit='degree')
    rdmol_list = [molkit.to_rdmol(mol, properties=False) for mol in mol_list]

    # Optimize the (constrained) geometry for all dihedral angles in angle_list
    # The geometry that yields the minimum energy is returned
    fixed = _find_idx(mol, bond)
    for rdmol in rdmol_list:
        ff = UFF(rdmol)
        for f in fixed:
            ff.AddFixedPoint(f)
        ff.Minimize()

    # Find the conformation with the optimal ligand vector
    cost_list = []
    try:
        i = ligand.atoms.index(anchor)
    except ValueError:
        i = -1  # Default to the origin as anchor

    for rdmol in rdmol_list:
        xyz = rdmol_as_array(rdmol)
        if i == -1:  # Default to the origin as anchor
            xyz = np.vstack([xyz, [0, 0, 0]])
        rotmat = optimize_rotmat(xyz, i)
        xyz[:] = xyz @ rotmat.T
        xyz -= xyz[i]
        cost = np.exp(xyz[:, 1:]).sum()
        cost_list.append(cost)

    # Perform an unconstrained optimization on the best geometry and update the geometry of ligand
    j = np.argmin(cost_list)
    rdmol_best = rdmol_list[j]
    UFF(rdmol).Minimize()
    ligand.from_rdmol(rdmol_best)
Beispiel #4
0
    def get_current_value(self, mol: MolType) -> float:
        """Return the value of the coordinate."""
        if isinstance(mol, Molecule):
            mol = molkit.to_rdmol(mol)
        conf = mol.GetConformer()

        # list of indices
        xs = [i - 1 for i in self.atoms]
        return self.fun(conf, *xs)
Beispiel #5
0
def substructure_split(ligand: Molecule,
                       idx: Tuple[int, int],
                       split: bool = True) -> Molecule:
    """Delete the hydrogen or mono-/polyatomic counterion attached to the functional group.

    Sets the charge of the remaining heteroatom to -1 if ``split=True``.

    Parameters
    ----------
    ligand: |plams.Molecule|_
        The ligand molecule.

    idx : |tuple|_ [|int|_]
        A tuple with 2 atomic indices associated with a functional group.

    split : bool
        If a functional group should be split from **ligand** (``True``) or not (``False``).

    Returns
    -------
    |plams.Molecule|_
        A copy of **ligand**, with part of its functional group removed (see **split**).

    """
    lig = ligand.copy()
    at1 = lig[idx[0] + 1]
    at2 = lig[idx[-1] + 1]

    if split:
        lig.delete_atom(at2)
        mol_list = lig.separate_mod()
        for mol in mol_list:
            if at1 not in mol:
                continue

            lig = mol
            break

        # Check if the ligand heteroatom has a charge assigned, assigns a charge if not
        if not at1.properties.charge:
            at1.properties.charge = -1

    # Update ligand properties
    lig.properties.dummies = at1
    lig.properties.anchor = at1.symbol + str(lig.atoms.index(at1) + 1)
    lig.properties.charge = sum(
        atom.properties.get('charge', 0) for atom in lig)

    # Update the ligand smiles string
    rdmol = molkit.to_rdmol(lig)
    smiles = Chem.MolToSmiles(rdmol)
    lig.properties.smiles = Chem.CanonSmiles(smiles)
    lig.properties.name = santize_smiles(
        lig.properties.smiles) + '@' + lig.properties.anchor
    lig.properties.path = ligand.properties.path

    return lig
Beispiel #6
0
    def get_current_value(self, mol):
        """
        Value of the coordinate
        """
        if isinstance(mol, Molecule):
            mol = molkit.to_rdmol(mol)
        conf = mol.GetConformer()

        # list of indices
        xs = [i - 1 for i in self.atoms]
        return self.fun(conf, *xs)
Beispiel #7
0
def sa_scores(mols: Iterable[Molecule],
              filename: Optional[PathType] = None) -> np.ndarray:
    """Calculate the synthetic accessibility score for all molecules in **mols**."""
    sa_model = _load_sa_model(filename) if filename is not None else {}
    rdmols = (to_rdmol(mol) for mol in mols)

    try:
        count = len(mols)  # type: ignore
    except TypeError:
        count = -1
    iterator = (_compute_sas(mol, sa_model) for mol in rdmols)
    return np.fromiter(iterator, dtype=float, count=count)
Beispiel #8
0
def _parse_name_type(mol_dict: Settings) -> None:
    """Set the ``"name"`` and ``"type"`` keys in **mol_dict**.

    The new values of ``"name"`` and ``"type"`` depend on the value of ``mol_dict["mol"]``.

    Parameters
    ----------
    mol_dict : |plams.Settings|_
        A Settings instance containing the ``"mol"`` key.
        ``mol_dict["mol"]`` is exp

    Raises
    ------
    TypeError
        Raised ``mol_dict["mol"]`` is an instance of neither :class:`str`, :class:`Molecule` nor
        :class:`mol`.

    """
    mol = mol_dict.mol
    if isinstance(mol, str):
        if isfile(mol):  # mol is a file
            mol_dict.type = mol.rsplit('.', 1)[-1]
            mol_dict.name = basename(mol.rsplit('.', 1)[0])
        elif isdir(mol):  # mol is a directory
            mol_dict.type = 'folder'
            mol_dict.name = basename(mol)
        else:  # mol is (probably; hopefully?) a SMILES string
            i = 1 + len(mol_dict.path) if 'path' in mol_dict else 0
            mol_dict.type = 'smiles'
            mol_dict.mol = mol[i:]
            mol_dict.name = santize_smiles(mol_dict.mol)

    elif isinstance(mol, Molecule):  # mol is an instance of plams.Molecule
        mol_dict.type = 'plams_mol'
        if not mol.properties.name:
            mol_dict.name = Chem.MolToSmiles(Chem.RemoveHs(molkit.to_rdmol(mol)), canonical=True)
        else:
            mol_dict.name = mol.properties.name

    elif isinstance(mol, Chem.rdchem.Mol):  # mol is an instance of rdkit.Chem.Mol
        mol_dict.type = 'rdmol'
        mol_dict.name = Chem.MolToSmiles(Chem.RemoveHs(mol), canonical=True)

    else:
        raise TypeError(f"mol_dict['mol'] expects an instance of 'str', 'Molecule' or 'Mol'; "
                        f"observed type: '{mol.__class__.__name__}'")
Beispiel #9
0
def fix_h(mol: Molecule) -> None:
    """If a C=C-H angle is smaller than :math:`20` degrees, set it back to :math:`120` degrees.

    Performs an inplace update of **plams_mol**.

    Parameters
    ----------
    plams_mol : |plams.Molecule|_
        A PLAMS molecule.

    """
    h_list = [
        atom for atom in mol if atom.atnum == 1
        and 2.0 in [bond.order for bond in mol.neighbors(atom)[0].bonds]
    ]

    rdmol = molkit.to_rdmol(mol)
    conf = rdmol.GetConformer()
    get_idx = mol.atoms.index
    set_angle = rdMolTransforms.SetAngleDeg
    get_angle = rdMolTransforms.GetAngleDeg

    update = False
    for atom in h_list:
        at1 = atom  # Central atom
        at2 = mol.neighbors(at1)[0]  # Neighbours
        at3 = [atom for atom in mol.neighbors(at2)
               if atom != at1]  # Neighbours of neighbours

        # Create 2 sets of 3 atomic indices for defining angles: at1-at2=at3
        idx_tup1 = get_idx(at3[0]), get_idx(at2), get_idx(at1)
        idx_tup2 = get_idx(at3[1]), get_idx(at2), get_idx(at1)

        if get_angle(conf, *idx_tup1) <= 20.0:
            set_angle(conf, *idx_tup1, 120.0)
            update = True
        elif get_angle(conf, *idx_tup2) <= 20.0:
            set_angle(conf, *idx_tup2, 120.0)
            update = True

    if update:
        mol.from_rdmol(rdmol)
Beispiel #10
0
def adf_connectivity(mol: Molecule) -> List[str]:
    """Create an AMS-compatible connectivity list.

    Parameters
    ----------
    mol : |plams.Molecule|_
        A PLAMS molecule with :math:`n` bonds.

    Returns
    -------
    :math:`n` |list|_ [|str|_]
        An ADF-compatible connectivity list of :math:`n` bonds.

    """
    mol.set_atoms_id()

    # Create list of indices of all aromatic bonds
    try:
        rdmol = molkit.to_rdmol(mol)
    except Exception as ex:
        if type(ex) is ValueError or ex.__class__.__name__ == 'ArgumentError':
            # Plan B: ignore aromatic bonds
            bonds = [
                f'{bond.atom1.id} {bond.atom2.id} {bond.order:.1f}'
                for bond in mol.bonds
            ]
            mol.unset_atoms_id()
            return bonds
        raise ex

    aromatic = [bond.GetIsAromatic() for bond in rdmol.GetBonds()]

    # Create a list of bond orders; aromatic bonds get a bond order of 1.5
    bond_orders = [(1.5 if ar else bond.order)
                   for ar, bond in zip(aromatic, mol.bonds)]
    bonds = [
        f'{bond.atom1.id} {bond.atom2.id} {order:.1f}'
        for bond, order in zip(mol.bonds, bond_orders)
    ]
    mol.unset_atoms_id()

    return bonds
Beispiel #11
0
def fix_carboxyl(mol: Molecule) -> None:
    """Resets carboxylate OCO angles if it is smaller than :math:`60` degrees.

    Performs an inplace update of **plams_mol**.

    Parameters
    ----------
    plams_mol : |plams.Molecule|_
        A PLAMS molecule.

    """
    rdmol = molkit.to_rdmol(mol)
    conf = rdmol.GetConformer()
    matches = rdmol.GetSubstructMatches(_CARBOXYLATE)

    if matches:
        get_angle = rdMolTransforms.GetAngleDeg
        set_angle = rdMolTransforms.SetAngleDeg
        for idx in matches:
            if get_angle(conf, idx[3], idx[1], idx[0]) < 60:
                set_angle(conf, idx[2], idx[1], idx[3], 180.0)
                set_angle(conf, idx[0], idx[1], idx[3], 120.0)
        mol.from_rdmol(rdmol)
Beispiel #12
0
def set_mol_prop(mol: Molecule, mol_dict: Settings) -> None:
    """Set molecular and atomic properties."""
    if mol_dict.is_core:
        residue_name = 'COR'
        mol.properties.name = mol.get_formula()
    else:
        residue_name = 'LIG'
        mol.properties.name = mol_dict.name

    mol.properties.dummies = mol_dict.indices
    mol.properties.path = mol_dict.path
    mol.properties.job_path = []

    # Prepare a generator of letters for pdb_info.Name
    alphabet = itertools.combinations(ascii_letters, 2)

    # Set the atomic properties
    for atom, i in zip(mol, itertools.cycle(alphabet)):
        set_atom_prop(atom, i, residue_name)

    if not mol.properties.smiles:
        mol.properties.smiles = Chem.MolToSmiles(Chem.RemoveHs(
            molkit.to_rdmol(mol)),
                                                 canonical=True)
Beispiel #13
0
 def _get_value(mol: Molecule) -> Tuple[List[str], int]:
     """Return a partially deserialized .pdb file and the length of aforementioned file."""
     ret = Chem.MolToPDBBlock(molkit.to_rdmol(mol)).splitlines()
     return ret, len(ret)
Beispiel #14
0
def canonicalize_mol(mol: Molecule,
                     inplace: bool = True) -> Optional[Molecule]:
    """Take a PLAMS molecule and sort its atoms based on their canonical rank.

    .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms

    Examples
    --------
    .. code:: python

        >>> from scm.plams import Molecule, from_smiles

        # Methane
        >>> mol: Molecule = from_smiles('C')
        >>> print(mol)  # doctest: +SKIP
        Atoms:
            1         H      0.640510      0.640510     -0.640510
            2         H      0.640510     -0.640510      0.640510
            3         C      0.000000      0.000000      0.000000
            4         H     -0.640510      0.640510      0.640510
            5         H     -0.640510     -0.640510     -0.640510

        >>> canonicalize_mol(mol)
        >>> print(mol)  # doctest: +SKIP
        Atoms:
            1         C      0.000000      0.000000      0.000000
            2         H     -0.640510     -0.640510     -0.640510
            3         H     -0.640510      0.640510      0.640510
            4         H      0.640510     -0.640510      0.640510
            5         H      0.640510      0.640510     -0.640510

    Parameters
    ----------
    mol : |plams.Molecule|_
        A PLAMS molecule.

    inplace : bool
        If ``True``, perform an inplace update of **mol** rather than returning
        a new :class:`Molecule` instance.

    Returns
    -------
    |plams.Molecule|_
        Optional: if ``inplace=False``, return a copy of **mol** with its atoms sorted by their
        canonical rank.

    See Also
    --------
    * rdkit.Chem.CanonicalRankAtoms_: Returns the canonical atom ranking for each atom of a
      molecule fragment.

    """  # noqa
    rdmol = molkit.to_rdmol(mol)
    idx_collection = Chem.CanonicalRankAtoms(rdmol)

    # Reverse sort Molecule.atoms by the atomic indices in idx_collection
    if inplace:
        mol.atoms = [
            at
            for _, at in sorted(zip(idx_collection, mol.atoms), reverse=True)
        ]
        return
    else:
        ret = mol.copy()
        ret.atoms = [
            at
            for _, at in sorted(zip(idx_collection, ret.atoms), reverse=True)
        ]
        return ret
Beispiel #15
0
def set_dihed(self,
              angle: float,
              anchor: Atom,
              cap: Sequence[Atom],
              opt: bool = True,
              unit: str = 'degree') -> None:
    """Change all valid dihedral angles into a specific value.

    Performs an inplace update of this instance.

    Parameters
    ----------
    angle : :class:`float`
        The desired dihedral angle.

    anchor : |plams.Atom|
        The ligand anchor atom.

    opt : :class:`bool`
        Whether or not the dihedral adjustment should be followed up by an RDKit UFF optimization.

    unit : :class:`str`
        The input unit.

    """
    cap_atnum = []
    for at in cap:
        cap_atnum.append(at.atnum)
        at.atnum = 0

    angle = Units.convert(angle, unit, 'degree')
    bond_iter = (bond for bond in self.bonds
                 if bond.atom1.atnum != 1 and bond.atom2.atnum != 1
                 and bond.order == 1 and not self.in_ring(bond))

    # Correction factor for, most importantly, tri-valent anchors (e.g. P(R)(R)R)
    dihed_cor = angle / 2
    neighbors = anchor.neighbors()
    if len(neighbors) > 2:
        atom_list = [anchor] + sorted(neighbors, key=lambda at: -at.atnum)[:3]
        improper = get_dihed(atom_list)
        dihed_cor *= np.sign(improper)

    for bond in bond_iter:
        # Gather lists of all non-hydrogen neighbors
        n1, n2 = self.neighbors_mod(bond.atom1), self.neighbors_mod(bond.atom2)

        # Remove all atoms in `bond`
        n1 = [atom for atom in n1 if atom is not bond.atom2]
        n2 = [atom for atom in n2 if atom is not bond.atom1]

        # Remove all non-subsituted atoms
        # A special case consists of anchor atoms; they can stay
        if len(n1) > 1:
            n1 = [
                atom for atom in n1 if (len(self.neighbors_mod(atom)) > 1
                                        or atom is anchor or atom.atnum == 0)
            ]
        if len(n2) > 1:
            n2 = [
                atom for atom in n2 if (len(self.neighbors_mod(atom)) > 1
                                        or atom is anchor or atom.atnum == 0)
            ]

        # Set `bond` in an anti-periplanar conformation
        if n1 and n2:
            dihed = get_dihed((n1[0], bond.atom1, bond.atom2, n2[0]))
            if anchor not in bond:
                self.rotate_bond(bond,
                                 bond.atom1,
                                 angle - dihed,
                                 unit='degree')
            else:
                dihed -= dihed_cor
                self.rotate_bond(bond, bond.atom1, -dihed, unit='degree')
                dihed_cor *= -1

    for at, atnum in zip(cap, cap_atnum):
        at.atnum = atnum

    if opt:
        rdmol = molkit.to_rdmol(self)
        UFF(rdmol).Minimize()
        self.from_rdmol(rdmol)
Beispiel #16
0
def find_substructure(
        ligand: Molecule,
        func_groups: Iterable[Chem.Mol],
        split: bool = True,
        condition: Optional[Callable[[int], bool]] = None) -> List[Molecule]:
    """Identify interesting functional groups within the ligand.

    Parameters
    ----------
    ligand : |plams.Molecule|_
        The ligand molecule.

    func_groups : |tuple|_ [|Chem.Mol|_]
        A collection of RDKit molecules representing functional groups.

    split : bool
        If a functional group should be split from **ligand** (``True``) or not (``False``).

    Returns
    -------
    |list|_ [|plams.Molecule|_]
        A list of ligands.
        A single copy of **ligand** is created for each identified functional group,
        removing parts of the functional group if required (see **split**).
        An empty list is returned if no valid functional groups are found.

    """
    rdmol = molkit.to_rdmol(ligand)

    # Searches for functional groups (defined by functional_group_list) within the ligand
    get_match = rdmol.GetSubstructMatches
    matches = chain.from_iterable(
        get_match(mol, useChirality=True) for mol in func_groups)

    # Remove all duplicate matches, each heteroatom (match[0]) should have <= 1 entry
    ligand_indices = []
    ref = []
    for idx_tup in matches:
        i, *_ = idx_tup
        if i in ref:
            continue  # Skip duplicates

        ligand_indices.append(idx_tup)
        ref.append(i)

    if condition is not None:
        if not condition(len(ligand_indices)):
            err = (
                f"Failed to satisfy the passed condition ({condition!r}) for "
                f"ligand: {ligand.properties.name!r}")
            logger.error(err)
            return []
    if ligand_indices:
        return [
            substructure_split(ligand, tup, split) for tup in ligand_indices
        ]
    else:
        err = (
            f"No functional groups were found (optional.ligand.split = {split!r}) for "
            f"ligand: {ligand.properties.name!r}")
        logger.error(err)
        return []