Esempio n. 1
0
def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int],
                                anchor: Atom) -> None:
    """A modified version of the :func:`.global_minimum_scan_rdkit` function.

    * Uses the ligand vector as criteria rather than the energy.
    * Geometry optimizations are constrained during the conformation search.
    * Finish with a final unconstrained geometry optimization.

    See Also
    --------
    :func:`global_minimum_scan_rdkit<scm.plams.recipes.global_minimum.minimum_scan_rdkit>`:
        Optimize the molecule (RDKit UFF) with 3 different values for the given dihedral angle and
        find the lowest energy conformer.

        :param |Molecule| mol: The input molecule
        :param tuple bond_tuple: A 2-tuples containing the atomic indices of valid bonds
        :return |Molecule|: A copy of *mol* with a newly optimized geometry

    """
    # Define a number of variables and create 3 copies of the ligand
    angles = (-120, 0, 120)
    mol_list = [ligand.copy() for _ in range(3)]
    for angle, mol in zip(angles, mol_list):
        bond = mol[bond_tuple]
        atom = mol[bond_tuple[0]]
        mol.rotate_bond(bond, atom, angle, unit='degree')
    rdmol_list = [molkit.to_rdmol(mol, properties=False) for mol in mol_list]

    # Optimize the (constrained) geometry for all dihedral angles in angle_list
    # The geometry that yields the minimum energy is returned
    fixed = _find_idx(mol, bond)
    for rdmol in rdmol_list:
        ff = UFF(rdmol)
        for f in fixed:
            ff.AddFixedPoint(f)
        ff.Minimize()

    # Find the conformation with the optimal ligand vector
    cost_list = []
    try:
        i = ligand.atoms.index(anchor)
    except ValueError:
        i = -1  # Default to the origin as anchor

    for rdmol in rdmol_list:
        xyz = rdmol_as_array(rdmol)
        if i == -1:  # Default to the origin as anchor
            xyz = np.vstack([xyz, [0, 0, 0]])
        rotmat = optimize_rotmat(xyz, i)
        xyz[:] = xyz @ rotmat.T
        xyz -= xyz[i]
        cost = np.exp(xyz[:, 1:]).sum()
        cost_list.append(cost)

    # Perform an unconstrained optimization on the best geometry and update the geometry of ligand
    j = np.argmin(cost_list)
    rdmol_best = rdmol_list[j]
    UFF(rdmol).Minimize()
    ligand.from_rdmol(rdmol_best)
Esempio n. 2
0
def substructure_split(ligand: Molecule,
                       idx: Tuple[int, int],
                       split: bool = True) -> Molecule:
    """Delete the hydrogen or mono-/polyatomic counterion attached to the functional group.

    Sets the charge of the remaining heteroatom to -1 if ``split=True``.

    Parameters
    ----------
    ligand: |plams.Molecule|_
        The ligand molecule.

    idx : |tuple|_ [|int|_]
        A tuple with 2 atomic indices associated with a functional group.

    split : bool
        If a functional group should be split from **ligand** (``True``) or not (``False``).

    Returns
    -------
    |plams.Molecule|_
        A copy of **ligand**, with part of its functional group removed (see **split**).

    """
    lig = ligand.copy()
    at1 = lig[idx[0] + 1]
    at2 = lig[idx[-1] + 1]

    if split:
        lig.delete_atom(at2)
        mol_list = lig.separate_mod()
        for mol in mol_list:
            if at1 not in mol:
                continue

            lig = mol
            break

        # Check if the ligand heteroatom has a charge assigned, assigns a charge if not
        if not at1.properties.charge:
            at1.properties.charge = -1

    # Update ligand properties
    lig.properties.dummies = at1
    lig.properties.anchor = at1.symbol + str(lig.atoms.index(at1) + 1)
    lig.properties.charge = sum(
        atom.properties.get('charge', 0) for atom in lig)

    # Update the ligand smiles string
    rdmol = molkit.to_rdmol(lig)
    smiles = Chem.MolToSmiles(rdmol)
    lig.properties.smiles = Chem.CanonSmiles(smiles)
    lig.properties.name = santize_smiles(
        lig.properties.smiles) + '@' + lig.properties.anchor
    lig.properties.path = ligand.properties.path

    return lig
Esempio n. 3
0
def _protonate_mol(mol: Molecule) -> Molecule:
    mol_cp = mol.copy()

    i = mol.index(mol.properties.dummies)
    anchor = mol_cp[i]
    if anchor.properties.charge != -1:
        raise NotImplementedError("Non-anionic anchors are not supported")

    anchor.properties.charge = 0
    return add_Hs(mol_cp, forcefield="uff")
Esempio n. 4
0
def _get_ligand(mol: Molecule) -> Molecule:
    """Extract a single ligand from **mol** as a copy."""
    at_list = []
    res = mol.atoms[-1].properties.pdb_info.ResidueNumber
    for at in reversed(mol.atoms):
        if at.properties.pdb_info.ResidueNumber == res:
            at_list.append(at)
        else:
            ret = Molecule()
            ret.atoms = at_list
            ret.bonds = list(
                set(chain.from_iterable(at.bonds for at in at_list)))
            return ret.copy()
Esempio n. 5
0
def get_asa_fragments(qd: Molecule) -> Tuple[List[Molecule], Molecule]:
    """Construct the fragments for an activation strain analyses.

    Parameters
    ----------
    qd : |plams.Molecule|
        A Molecule whose atoms' properties should be marked with `pdb_info.ResidueName`.
        Atoms in the core should herein be marked with ``"COR"``.

    Returns
    -------
    :class:`list` [|plams.Molecule|] and |plams.Molecule|
        A list of ligands and the core.
        Fragments are defined based on connectivity patterns (or lack thereof).

    """
    # Delete all atoms within the core
    mol_complete = qd.copy()
    core = Molecule()
    core.properties = mol_complete.properties.copy()

    core_atoms = [at for at in mol_complete if at.properties.pdb_info.ResidueName == 'COR']
    for atom in core_atoms:
        mol_complete.delete_atom(atom)
        atom.mol = core

    core.atoms = core_atoms
    mol_complete.properties.name += '_frags'
    core.properties.name += '_core'

    # Fragment the molecule
    ligand_list = mol_complete.separate()

    # Set atomic properties
    for at1, at2 in zip(chain(*ligand_list), mol_complete):
        at1.properties.symbol = at2.properties.symbol
        at1.properties.charge_float = at2.properties.charge_float
    for at1, at2 in zip(core, qd):
        at1.properties.symbol = at2.properties.symbol
        at1.properties.charge_float = at2.properties.charge_float

    # Set the prm parameter which points to the created .prm file
    name = mol_complete.properties.name[:-1]
    path = mol_complete.properties.path
    prm = mol_complete.properties.prm
    for mol in ligand_list:
        mol.properties.name = name
        mol.properties.path = path
        mol.properties.prm = prm

    return ligand_list, core
Esempio n. 6
0
def _get_neutral_frag(frag: Molecule) -> Molecule:
    """Return a neutral fragment for :func:`md_generator`."""
    frag_neutral = frag.copy()
    for anchor in frag_neutral:
        if anchor.properties.anchor:
            charge = anchor.properties.charge

            if charge > 0:
                run_ff_cationic(frag_neutral, anchor, MATCH_SETTINGS)
            elif charge < 0:
                run_ff_anionic(frag_neutral, anchor, MATCH_SETTINGS)
            break
    else:
        raise MoleculeError("Failed to identify the anchor atom within 'frag'")

    return frag_neutral
Esempio n. 7
0
def _lig_from_psf(mol: Molecule,
                  psf: PSFContainer) -> Dict[FrozenSet[int], Molecule]:
    residue_set = set(psf.segment_name[psf.residue_name == 'LIG'])

    ret: Dict[FrozenSet[int], Molecule] = {}
    for res in residue_set:
        _key = psf.residue_id[psf.segment_name == res]
        key = frozenset(_key)

        res_id = _key.iat[0]
        idx = psf.atoms[psf.residue_id == res_id].index
        i = idx[0] - 1
        j = idx[-1]

        ret[key] = mol.copy(atoms=mol.atoms[i:j])
        ret[key].round_coords(decimals=3)
    return ret
Esempio n. 8
0
def array_to_qd(mol: Molecule, xyz_array: np.ndarray,
                mol_out: Optional[Molecule] = None) -> Optional[List[Molecule]]:
    """Create :math:`n` copies of **mol** and update their Cartesian coordinates with **xyz_array**.

    Parameters
    ----------
    mol : |plams.Molecule|_
        A template PLAMS molecule consisting of :math:`n` atoms.

    xyz_array : :math:`m*n*3` |np.ndarray|_:
        A 3D array-like object representing the cartesian coordinates of
        :math:`m` molecules each with :math:`n` atoms.

    mol_out : |plams.Molecule|_
        Optional: If not ``None`` merge the to-be returned molecules with **mol_out**.

    Returns
    -------
    |list|_ [|Molecule|_]
        Optional: if **mol_out** = ``None`` return a list **mol** copies, each with its
        Cartesian coordinates replaced with a set of coordinates from **xyz_array**.

    """
    mol_list = []
    xyz_array = sanitize_dim_3(xyz_array)
    if mol_out is None:
        start = 2
    else:
        start = 1 + mol_out[-1].properties.get('pdb_info', {}).get('ResidueNumber', 1)

    for i, xyz in enumerate(xyz_array, start=start):
        mol_cp = mol.copy()
        mol_cp.from_array(xyz)
        for at in mol_cp:
            at.properties.pdb_info.ResidueNumber = i
        mol_list.append(mol_cp)

    # return a list of molecules or merge the list with an existing molecule
    if mol_out is None:
        return mol_list
    else:
        for m in mol_list:
            mol_out.add_molecule(m)
        return None
Esempio n. 9
0
def ligand_to_qd(core: Molecule, ligand: Molecule, path: str,
                 allignment: str = 'sphere',
                 idx_subset: Optional[Iterable[int]] = None) -> Molecule:
    """Function that handles quantum dot (qd, *i.e.* core + all ligands) operations.

    Combine the core and ligands and assign properties to the quantom dot.

    Parameters
    ----------
    core : |plams.Molecule|_
        A core molecule.

    ligand : |plams.Molecule|_
        A ligand molecule.

    allignment : :class:`str`
        How the core vector(s) should be defined.
        Accepted values are ``"sphere"`` and ``"surface"``:

        * ``"sphere"``: Vectors from the core anchor atoms to the center of the core.
        * ``"surface"``: Vectors perpendicular to the surface of the core.

        Note that for a perfect sphere both approaches are equivalent.

    idx_subset : :class:`Iterable<collections.anc.Iterable>` [:class:`int`], optional
        An iterable with the (0-based) indices defining a subset of atoms in **core**.
        Only relevant in the construction of the convex hull when ``allignment=surface``.

    Returns
    -------
    |plams.Molecule|_
        A quantum dot consisting of a core molecule and *n* ligands

    """
    def get_name() -> str:
        core_name = core.properties.name
        anchor = str(qd[-1].properties.pdb_info.ResidueNumber - 1)
        lig_name = ligand.properties.name
        return f'{core_name}__{anchor}_{lig_name}'

    idx_subset_ = idx_subset if idx_subset is not None else ...

    # Define vectors and indices used for rotation and translation the ligands
    vec1 = np.array([-1, 0, 0], dtype=float)  # All ligands are already alligned along the X-axis
    idx = ligand.get_index(ligand.properties.dummies) - 1
    ligand.properties.dummies.properties.anchor = True

    # Attach the rotated ligands to the core, returning the resulting strucutre (PLAMS Molecule).
    if allignment == 'sphere':
        vec2 = np.array(core.get_center_of_mass()) - sanitize_dim_2(core.properties.dummies)
        vec2 /= np.linalg.norm(vec2, axis=1)[..., None]
    elif allignment == 'surface':
        if isinstance(core.properties.dummies, np.ndarray):
            anchor = core.properties.dummies
        else:
            anchor = core.as_array(core.properties.dummies)
        vec2 = -get_surface_vec(np.array(core)[idx_subset_], anchor)
    else:
        raise ValueError(repr(allignment))

    lig_array = rot_mol(ligand, vec1, vec2, atoms_other=core.properties.dummies, core=core, idx=idx)
    qd = core.copy()
    array_to_qd(ligand, lig_array, mol_out=qd)
    qd.round_coords()

    # Set properties
    qd.properties = Settings({
        'indices': [i for i, at in enumerate(qd, 1) if
                    at.properties.pdb_info.ResidueName == 'COR' or at.properties.anchor],
        'path': path,
        'name': get_name(),
        'job_path': [],
        'prm': ligand.properties.get('prm')
    })

    # Print and return
    _evaluate_distance(qd, qd.properties.name)
    return qd
Esempio n. 10
0
def _construct_xyn(
    ion: str | int | Molecule,
    lig_count: int,
    lig: Molecule,
    lig_at: Atom,
    lig_idx: int,
) -> Tuple[Molecule, Atom]:
    """Construct the :math:`XYn` molecule for :func:`get_xyn`.

    Parameters
    ----------
    ion : |str|_, |int|_ or |plams.Molecule|_
        An ion (:math:`X`), be it mono- (*e.g.* atomic number or symbol) or poly-atomic.

    lig_count : int
        The number of to-be attached ligands per ion.

    lig : |plams.Molecule|_
        A single ligand molecule.

    lig_at : |plams.Atom|_
        The ligand anchor atom.

    lig_idx : int
        The (1-based) index of **lig_at**.

    Returns
    -------
    |plams.Molecule|_ and |plams.Atom|_
        A :math:`XY_{n}` molecule and the the charged atom from :math:`X`.

    """
    # Create a list of n ligands, n anchor atoms, n desired ion-anchor distances and n angles
    lig_gen = (lig.copy() for _ in range(lig_count))
    angle_ar = np.arange(0, 2 * np.pi, 2 * np.pi / lig_count)

    # Prepare vectors for translations and rotations
    vec1 = lig_at.vector_to(np.zeros(3))
    _vec = lig_at.vector_to(lig.get_center_of_mass())
    vec2 = get_perpendicular_vec(_vec)

    # Update the XYn molecule with ligands
    XYn, X = _parse_ion(ion)
    iterator = enumerate(zip(angle_ar, lig_gen), 2)
    for i, (angle, mol) in iterator:
        # Prepare for translations and rotations
        anchor = mol[lig_idx]
        rotmat = axis_rotation_matrix(vec2, angle)
        dist = anchor.radius + X.radius

        # Translate and rotate the ligand
        mol.translate(vec1)
        mol.rotate(rotmat)
        vec3 = anchor.vector_to(mol.get_center_of_mass())
        vec3 /= np.linalg.norm(vec3) / dist
        mol.translate(vec3)

        # Set pdb attributes
        for at in mol:
            at.properties.pdb_info.ResidueNumber = i
            at.properties.pdb_info.ResidueName = 'LIG'

        # Combine the translated and rotated ligand with XYn
        XYn.add_molecule(mol)
        XYn.add_bond(X, anchor)

    return XYn, X
Esempio n. 11
0
def dissociate_surface(mol: Molecule,
                       idx: npt.ArrayLike,
                       symbol: str = 'Cl',
                       lig_count: int = 1,
                       k: int = 4,
                       displacement_factor: float = 0.5,
                       **kwargs: Any) -> Generator[Molecule, None, None]:
    r"""A workflow for dissociating :math:`(XY_{n})_{\le m}` compounds from the surface of **mol**.

    The workflow consists of four distinct steps:

    1. Identify which atoms :math:`Y`, as specified by **symbol**,
       are located on the surface of **mol**.
    2. Identify which surface atoms are neighbors of :math:`X`, the latter being defined by **idx**.
    3. Identify which pairs of :math:`n*m` neighboring surface atoms are furthest removed from
       each other.
       :math:`n` is defined by **lig_count** and :math:`m`, if applicable, by the index along axis 1
       of **idx**.
    4. Yield :math:`(XY_{n})_{\le m}` molecules constructed from **mol**.

    Note
    ----
    The indices supplied in **idx** will, when applicable, be sorted along its last axis.

    Examples
    --------
    .. code:: python

        >>> from pathlib import Path

        >>> import numpy as np

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import dissociate_surface, row_accumulator

        >>> base_path = Path(...)
        >>> mol = Molecule(base_path / 'mol.xyz')

        # The indices of, e.g., Cs-pairs
        >>> idx = np.array([
        ...     [1, 3],
        ...     [4, 5],
        ...     [6, 10],
        ...     [15, 12],
        ...     [99, 105],
        ...     [20, 4]
        ... ])

        # Convert 1- to 0-based indices by substracting 1 from idx
        >>> mol_generator = dissociate_surface(mol, idx-1, symbol='Cl', lig_count=1)

        # Note: The indices in idx are (always) be sorted along axis 1
        >>> iterator = zip(row_accumulator(np.sort(idx, axis=1)), mol_generator)
        >>> for i, mol in iterator:
        ...     mol.write(base_path / f'output{i}.xyz')


    Parameters
    ----------
    mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>`
        The input molecule.

    idx : array-like, dimensions: :math:`\le 2`
        An array of indices denoting to-be dissociated atoms (*i.e.* :math:`X`);
        its elements will, if applicable, be sorted along the last axis.
        If a 2D array is provided then all elements along axis 1 will be dissociated
        in a cumulative manner.
        :math:`m` is herein defined as the index along axis 1.

    symbol : :class:`str` or :class:`int`
        An atomic symbol or number defining the super-set of the atoms to-be dissociated in
        combination with **idx** (*i.e.* :math:`Y`).

    lig_count : :class:`int`
        The number of atoms specified in **symbol** to-be dissociated in combination
        with a single atom from **idx** (*i.e.* :math:`n`).

    k : :class:`int`
        The number of atoms specified in **symbol** which are surrounding a single atom in **idx**.
        Must obey the following condition: :math:`k \ge 1`.

    displacement_factor : :class:`float`
        The smoothing factor :math:`n` for constructing a convex hull;
        should obey :math:`0 <= n <= 1`.
        Represents the degree of displacement of all atoms with respect to a spherical surface;
        :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all.

        A non-zero value is generally recomended here,
        as the herein utilized :class:`ConvexHull<scipy.spatial.ConvexHull>` class
        requires an adequate degree of surface-convexness,
        lest it fails to properly identify all valid surface points.

    \**kwargs : :data:`Any<typing.Any>`
        Further keyword arguments for
        :func:`brute_uniform_idx()<CAT.attachment.distribution_brute.brute_uniform_idx>`.

    Yields
    ------
    :class:`Molecule<scm.plams.mol.molecule.Molecule>`
        Yields new :math:`(XY_{n})_{m}`-dissociated molecules.

    See Also
    --------
    :func:`brute_uniform_idx()<CAT.attachment.distribution_brute.brute_uniform_idx>`
        Brute force approach to creating uniform or clustered distributions.

    :func:`identify_surface()<nanoCAT.bde.identify_surface.identify_surface>`
        Take a molecule and identify which atoms are located on the surface,
        rather than in the bulk.

    :func:`identify_surface_ch()<nanoCAT.bde.identify_surface.identify_surface_ch>`
        Identify the surface of a molecule using a convex hull-based approach.

    :func:`dissociate_ligand()<nanoCAT.bde.dissociate_xyn.dissociate_ligand>`
        Remove :math:`XY_{n}` from **mol** with the help of the
        :class:`MolDissociater<nanoCAT.bde.dissociate_xyn.MolDissociater>` class.

    """
    idx_ar = _parse_idx(idx, ndim=2, copy=False)
    if idx_ar.ndim > 2:
        raise ValueError("'idx' expected a 2D array-like object; "
                         f"observed number of dimensions: {idx_ar.ndim}")
    idx_ar.sort(axis=1)
    idx_ar = idx_ar[:, ::-1]

    # Identify all atoms in **idx** located on the surface
    idx_surface_superset = _get_surface(
        mol, symbol=symbol, displacement_factor=displacement_factor
    )

    # Construct an array with the indices of opposing surface-atoms
    n = lig_count * idx_ar.shape[1]
    idx_surface = _get_opposite_neighbor(
        np.asarray(mol), idx_ar, idx_surface_superset, n=n, k=k, **kwargs
    )

    # Dissociate and yield new molecules
    idx_ar += 1
    idx_surface += 1
    for idx_pair, idx_pair_surface in zip(idx_ar, idx_surface):
        mol_tmp = mol.copy()
        _mark_atoms(mol_tmp, idx_pair_surface)

        for i in idx_pair:
            mol_tmp = next(dissociate_ligand(mol_tmp, lig_count=lig_count,
                                             core_index=i, lig_core_pairs=1,
                                             **kwargs))
            yield mol_tmp
Esempio n. 12
0
def dissociate_bulk(
    mol: Molecule,
    symbol_x: str,
    symbol_y: None | str = None,
    count_x: int = 1,
    count_y: int = 1,
    n_pairs: int = 1,
    k: None | int = 4,
    r_max: None | float = None,
    mode: _ModeKind = 'uniform',
    **kwargs: Any,
) -> Molecule:
    r"""A workflow for removing :math:`XY`-based compounds from the bulk of **mol**.

    Examples
    --------
    .. code-block:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import dissociate_bulk

        >>> mol: Molecule = ...

        # Remove two PbBr2 pairs in a system where
        # each lead atom is surrounded by 6 bromides
        >>> mol_out1 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, k=6
        ... )

        # The same as before, expect all potential bromides are
        # identified based on a radius, rather than a fixed number
        >>> mol_out2 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, r_max=5.0
        ... )

        # Convert a fraction to a number of pairs
        >>> f = 0.5
        >>> count_x = 2
        >>> symbol_x = "Pb"
        >>> n_pairs = int(f * sum(at.symbol == symbol_x for at in mol) / count_x)

        >>> mol_out3 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=n_pairs, k=6
        ... )

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        The input molecule.
    symbol_x : :class:`str` or :class:`int`
        The atomic symbol or number of the central (to-be dissociated) atom(s) :math:`X`.
    symbol_y : :class:`str` or :class:`int`, optional
        The atomic symbol or number of the surrounding (to-be dissociated) atom(s) :math:`Y`.
        If :data:`None`, do not dissociate any atoms :math:`Y`.
    count_x : :class:`int`
        The number of central atoms :math:`X` per individual to-be dissociated cluster.
    count_y : :class:`int`
        The number of surrounding atoms :math:`Y` per individual to-be dissociated cluster.
    n_pairs : :class:`int`
        The number of to-be removed :math:`XY` fragments.
    k : :class:`int`, optional
        The total number of :math:`Y` candidates surrounding each atom :math:`X`.
        This value should be smaller than or equal to **count_y**.
        See the **r_max** parameter for a radius-based approach;
        note that both parameters are not mutually exclusive.
    r_max : :class:`int`, optional
        The radius used for searching for :math:`Y` candidates surrounding each atom :math:`X`.
        See **k** parameter to use a fixed number of nearest neighbors;
        note that both parameters are not mutually exclusive.
    mode : :class:`str`
        How the subset of to-be removed atoms :math:`X` should be generated.
        Accepts one of the following values:

        * ``"random"``: A random distribution.
        * ``"uniform"``: A uniform distribution; the distance between each successive atom and
          all previous points is maximized.
        * ``"cluster"``: A clustered distribution; the distance between each successive atom and
          all previous points is minmized.

    Keyword Arguments
    -----------------
    \**kwargs : :data:`~typing.Any`
        Further keyword arguments for :func:`CAT.distribution.distribute_idx`.

    Returns
    -------
    :class:`~scm.plams.mol.molecule.Molecule`
        The molecule with :math:`n_{\text{pair}} * XY` fragments removed.

    """
    if n_pairs < 0:
        raise ValueError("`n_pairs` must be larger than or equal to 0; "
                         f"observed value: {n_pairs!r}")
    elif n_pairs == 0:
        return mol.copy()

    # Validate the input args
    if count_x <= 0:
        raise ValueError("`count_x` expected a an integer larger than 0; "
                         f"observed value: {count_x!r}")
    elif count_y < 0:
        raise ValueError("`count_y` expected a an integer larger than 0; "
                         f"observed value: {count_y!r}")
    elif count_y == 0:
        symbol_y = None

    # Parse `symbol_x`
    atnum_x = to_atnum(symbol_x)
    idx_x = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_x], dtype=np.intp)
    if len(idx_x) == 0:
        raise MoleculeError(f"No atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}")

    # Parse `symbol_y`
    if symbol_y is not None:
        atnum_y = to_atnum(symbol_y)
        idx_y = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_y], dtype=np.intp)
        if len(idx_y) == 0:
            raise MoleculeError(f"No atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}")

    # Parse `k` and `r_max`
    if k is None and r_max is None:
        raise TypeError("`k` and `r_max` cannot be both set to None")
    if k is None:
        k = cast(int, len(idx_y))
    if r_max is None:
        r_max = cast(float, np.inf)

    # Validate `n_pairs`
    if (n_pairs * count_x) > len(idx_x):
        x = n_pairs * count_x
        raise ValueError(
            f"Insufficient atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}; "
            f"atleast {x} required with `n_pairs={n_pairs!r}` and `count_x={count_x!r}"
        )
    elif symbol_y is not None and (n_pairs * count_y) > len(idx_y):
        y = n_pairs * count_y
        raise ValueError(
            f"Insufficient atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}; "
            f"atleast {y} required with `n_pairs={n_pairs!r}` and `count_y={count_y!r}"
        )

    # Identify a subset of atoms `x`
    if count_x != 1:
        if "cluster_size" in kwargs:
            raise TypeError("Specifying both `cluster_size` and `count_x` is not supported")
        kwargs["cluster_size"] = count_x
    idx_x_sorted = distribute_idx(mol, idx_x, f=1, **kwargs)
    idx_x_sorted.shape = -1, count_x

    if symbol_y is not None:
        # Identify a subset of matching atoms `y`
        idx_x_subset, idx_y_subset, = _get_opposite_neighbor2(
            np.asarray(mol), idx_x_sorted, idx_y, n_pairs, n=count_y, k=k, r_max=r_max
        )

        # Concatenate the subsets
        idx_xy_subset = np.concatenate([idx_x_subset.ravel(), idx_y_subset.ravel()])
    else:
        idx_xy_subset = idx_x[:(n_pairs * count_x)]
    idx_xy_subset.sort()
    idx_xy_subset += 1

    # Create and return the new molecule
    ret = mol.copy()
    atom_set = {ret[i] for i in idx_xy_subset}
    for at in atom_set:
        ret.delete_atom(at)
    return ret
Esempio n. 13
0
def replace_surface(mol: Molecule,
                    symbol: Union[str, int],
                    symbol_new: Union[str, int] = 'Cl',
                    nth_shell: Union[int, Iterable[int]] = 0,
                    f: float = 0.5,
                    mode: str = 'uniform',
                    displacement_factor: float = 0.5,
                    **kwargs: Any) -> Molecule:
    r"""A workflow for identifying all surface atoms in **mol** and replacing a subset of them.

    Consists of three distinct steps:

    1. Identifying which atoms, with a user-specified atomic **symbol**,
       are located on the surface of **mol** rather than in the bulk.
    2. Define a subset of the newly identified surface atoms using one of CAT's
       distribution algorithms.
    3. Create and return a molecule where the atom subset defined in step 2
       has its atomic symbols replaced with **symbol_new**.

    Examples
    --------
    Replace 75% of all surface ``"Cl"`` atoms with ``"I"``.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)
        >>> mol_new.write(...)  # Write an .xyz file

    The same as above, except this time the new ``"I"`` atoms are all deleted.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)

        >>> del_atom = [at for at in mol_new if at.symbol == 'I']
        >>> for at in del_atom:
        ...     mol_new.delete_atom(at)
        >>> mol_new.write(...)  # Write an .xyz file

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        The input molecule.

    symbol : :class:`str` or :class:`int`
        An atomic symbol or number defining the super-set of the surface atoms.

    symbol_new : :class:`str` or :class:`int`
        An atomic symbol or number which will be assigned to the new surface-atom subset.

    nth_shell : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`]
        One or more integers denoting along which shell-surface(s) to search.
        For example, if ``symbol = "Cd"`` then ``nth_shell = 0`` represents the surface,
        ``nth_shell = 1`` is the first sub-surface ``"Cd"`` shell and
        ``nth_shell = 2`` is the second sub-surface ``"Cd"`` shell.
        Using ``nth_shell = [1, 2]`` will search along both the first and second
        ``"Cd"`` sub-surface shells.
        Note that a :exc:`Zscm.plams.core.errors.MoleculeError` will be raised if
        the specified **nth_shell** is larger than the actual number of available
        sub-surface shells.

    f : :class:`float`
        The fraction of surface atoms whose atom types will be replaced with **symbol_new**.
        Must obey the following condition: :math:`0 < f \le 1`.

    mode : :class:`str`
        How the subset of surface atoms will be generated.
        Accepts one of the following values:

        * ``"random"``: A random distribution.
        * ``"uniform"``: A uniform distribution; maximizes the nearest-neighbor distance.
        * ``"cluster"``: A clustered distribution; minimizes the nearest-neighbor distance.

    displacement_factor : :class:`float`
        The smoothing factor :math:`n` for constructing a convex hull;
        should obey :math:`0 <= n <= 1`.
        Represents the degree of displacement of all atoms with respect to a spherical surface;
        :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all.

        A non-zero value is generally recomended here,
        as the herein utilized :class:`~scipy.spatial.ConvexHull` class
        requires an adequate degree of surface-convexness,
        lest it fails to properly identify all valid surface points.

    \**kwargs : :data:`~typing.Any`
        Further keyword arguments for
        :func:`~CAT.attachment.distribution.distribute_idx`.

    Returns
    -------
    :class:`~scm.plams.mol.molecule.Molecule`
        A new Molecule with a subset of its surface atoms replaced with **symbol_new**.

    See Also
    --------
    :func:`~CAT.attachment.distribution.distribute_idx`
        Create a new distribution of atomic indices from **idx** of length :code:`f * len(idx)`.

    :func:`~nanoCAT.bde.identify_surface.identify_surface`
        Take a molecule and identify which atoms are located on the surface,
        rather than in the bulk.

    :func:`~nanoCAT.bde.identify_surface.identify_surface_ch`
        Identify the surface of a molecule using a convex hull-based approach.

    """
    # Parse input arguments
    xyz = np.array(mol, dtype=float, ndmin=2, copy=False)
    atnum = to_atnum(symbol)
    atnum_new = to_atnum(symbol_new)

    # Define the surface-atom subset
    idx = np.fromiter((i for i, at in enumerate(mol) if at.atnum == atnum),
                      dtype=int)
    try:
        idx_surface = idx[_collect_surface(xyz[idx], displacement_factor,
                                           nth_shell)]
    except MoleculeError as exc:
        nth = exc.args[0]
        if nth == 0:
            exc = MoleculeError(
                f"No atoms with atomic symbol {to_symbol(symbol)!r} available in "
                f"{mol.get_formula()!r}")
        else:
            exc = MoleculeError(
                f"No subsurface shell >= {nth!r} of atom {to_symbol(symbol)!r} "
                f"available in {mol.get_formula()!r}")
        exc.__cause__ = None
        raise exc

    try:
        idx_surface_subset = distribute_idx(xyz,
                                            idx_surface,
                                            f=f,
                                            mode=mode,
                                            **kwargs)
    except ValueError as ex:
        raise MoleculeError(
            "Failed to identify any surface atoms with atomic symbol "
            f"{to_symbol(symbol)!r} in {mol.get_formula()!r}") from ex
    else:
        idx_surface_subset += 1

    # Update atomic symbols and return
    ret = mol.copy()
    for i in idx_surface_subset:
        ret[i].atnum = atnum_new
    return ret
Esempio n. 14
0
def canonicalize_mol(mol: Molecule,
                     inplace: bool = True) -> Optional[Molecule]:
    """Take a PLAMS molecule and sort its atoms based on their canonical rank.

    .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms

    Examples
    --------
    .. code:: python

        >>> from scm.plams import Molecule, from_smiles

        # Methane
        >>> mol: Molecule = from_smiles('C')
        >>> print(mol)  # doctest: +SKIP
        Atoms:
            1         H      0.640510      0.640510     -0.640510
            2         H      0.640510     -0.640510      0.640510
            3         C      0.000000      0.000000      0.000000
            4         H     -0.640510      0.640510      0.640510
            5         H     -0.640510     -0.640510     -0.640510

        >>> canonicalize_mol(mol)
        >>> print(mol)  # doctest: +SKIP
        Atoms:
            1         C      0.000000      0.000000      0.000000
            2         H     -0.640510     -0.640510     -0.640510
            3         H     -0.640510      0.640510      0.640510
            4         H      0.640510     -0.640510      0.640510
            5         H      0.640510      0.640510     -0.640510

    Parameters
    ----------
    mol : |plams.Molecule|_
        A PLAMS molecule.

    inplace : bool
        If ``True``, perform an inplace update of **mol** rather than returning
        a new :class:`Molecule` instance.

    Returns
    -------
    |plams.Molecule|_
        Optional: if ``inplace=False``, return a copy of **mol** with its atoms sorted by their
        canonical rank.

    See Also
    --------
    * rdkit.Chem.CanonicalRankAtoms_: Returns the canonical atom ranking for each atom of a
      molecule fragment.

    """  # noqa
    rdmol = molkit.to_rdmol(mol)
    idx_collection = Chem.CanonicalRankAtoms(rdmol)

    # Reverse sort Molecule.atoms by the atomic indices in idx_collection
    if inplace:
        mol.atoms = [
            at
            for _, at in sorted(zip(idx_collection, mol.atoms), reverse=True)
        ]
        return
    else:
        ret = mol.copy()
        ret.atoms = [
            at
            for _, at in sorted(zip(idx_collection, ret.atoms), reverse=True)
        ]
        return ret
Esempio n. 15
0
def get_lig_charge(ligand: Molecule,
                   desired_charge: float,
                   ligand_idx: Union[None, int, Iterable[int], slice] = None,
                   invert_idx: bool = False,
                   settings: Optional[Settings] = None,
                   path: Union[None, str, PathLike] = None,
                   folder: Union[None, str, PathLike] = None) -> pd.Series:
    """Calculate and rescale the **ligand** charges using MATCH_.

    The atomic charges in **ligand_idx** wil be altered such that the molecular
    charge of **ligand** is equal to **desired_charge**.

    .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software

    Examples
    --------
    .. code:: python

        >>> import pandas as pd
        >>> from scm.plams import Molecule

        >>> from CAT.recipes import get_lig_charge

        >>> ligand = Molecule(...)
        >>> desired_charge = 0.66
        >>> ligand_idx = 0, 1, 2, 3, 4

        >>> charge_series: pd.Series = get_lig_charge(
        ...     ligand, desired_charge, ligand_idx
        ... )

        >>> charge_series.sum() == desired_charge
        True


    Parameters
    ----------
    ligand : :class:`~scm.plams.core.mol.molecule.Molecule`
        The input ligand.

    desired_charge : :class:`float`
        The desired molecular charge of the ligand.

    ligand_idx : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`], optional
        An integer or iterable of integers representing atomic indices.
        The charges of these atoms will be rescaled;
        all others will be frozen with respect to the MATCH output.
        Setting this value to ``None`` means that *all* atomic charges are considered variable.
        Indices should be 0-based.

    invert_idx : :class:`bool`
        If ``True`` invert **ligand_idx**, *i.e.* all atoms specified therein are
        now threated as constants and the rest as variables,
        rather than the other way around.

    settings : :class:`~scm.plams.core.settings.Settings`, optional
        The input settings for :class:`~nanoCAT.ff.match_job.MatchJob`.
        Will default to the ``"top_all36_cgenff_new"`` forcefield if not specified.

    path : :class:`str` or :class:`~os.PathLike`, optional
        The path to the PLAMS workdir as passed to :func:`~scm.plams.core.functions.init`.
        Will default to the current working directory if ``None``.

    folder : :class:`str` or :class:`~os.PathLike`, optional
        The name of the to-be created to the PLAMS working directory
        as passed to :func:`~scm.plams.core.functions.init`.
        Will default to ``"plams_workdir"`` if ``None``.

    Returns
    -------
    :class:`pd.Series` [:class:`str`, :class:`float`]
        A Series with the atom types of **ligand** as keys and atomic charges as values.

    See Also
    --------
    :class:`MatchJob`
        A :class:`~scm.plams.core.basejob.Job` subclass for interfacing with MATCH_:
        Multipurpose Atom-Typer for CHARMM.

    """  # noqa
    if settings is None:
        settings = Settings()
        settings.input.forcefield = 'top_all36_cgenff_new'

    # Run the MATCH Job
    init(path, folder)
    ligand = ligand.copy()
    run_match_job(ligand, settings, action='raise')
    finish()

    # Extract the charges and new atom types
    count = len(ligand)
    charge = np.fromiter((at.properties.charge_float for at in ligand), count=count, dtype=float)
    symbol = np.fromiter((at.properties.symbol for at in ligand), count=count, dtype='<U4')

    # Identify the atom subset
    idx = _parse_ligand_idx(ligand_idx)
    if invert_idx:
        idx = _invert_idx(idx, count)
    try:
        idx_len = len(idx)  # type: ignore
    except TypeError:  # idx is a slice object
        idx_len = len(charge[idx])

    # Correct the charges and return
    charge[idx] -= (charge.sum() - desired_charge) / idx_len
    return pd.Series(charge, index=symbol, name='charge')