def get_frag_size(self, bond: Bond, anchor: Atom) -> int: """Return the size of the fragment containing **atom** if this instance was split into two by the breaking of **bond**. Parameters ---------- bond : |plams.Bond| A PLAMS bond. anchor : |plams.Atom| A PLAMS atom. The size of the fragment containg this atom will be returned. Returns ------- :class:`int` The number of atoms in the fragment containing **atom**. """ # noqa if bond not in self.bonds: raise MoleculeError( 'get_frag_size: The argument bond should be of type plams.Bond and ' 'be part of the Molecule') elif anchor not in self.atoms: raise MoleculeError( 'get_frag_size: The argument atom should be of type plams.Atom and ' 'be part of the Molecule') for at in self: at._visited = False frag1 = set() frag2 = set() def dfs(at1: Atom, atom_set: Set[Atom]): at1._visited = True atom_set.add(at1) for at2 in at1.neighbors(): if at2._visited: continue dfs(at2, atom_set) bond.atom1._visited = bond.atom2._visited = True dfs(bond.atom1, frag1) dfs(bond.atom2, frag2) for at in self.atoms: del at._visited # fragment #1 contains **atom** if anchor in frag1: if bond.atom1 not in frag1: bond.atom1, bond.atom2 = bond.atom2, bond.atom1 return len(frag1) if bond.atom1 not in frag2: bond.atom1, bond.atom2 = bond.atom2, bond.atom1 return len(frag2)
def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarray: """Gogogo.""" ret = np.empty((len(ligands), len(qd_series)), dtype=object) for i, qd in enumerate(qd_series): qd = qd.copy() for j, (ligand, atnum) in enumerate(zip(ligands, anchor)): qd.set_atoms_id() try: atoms = [at for at in qd if at.atnum == atnum] assert atoms except AssertionError as ex: raise MoleculeError( f'Failed to identify {to_symbol(atnum)!r} in ' f'{qd.get_formula()!r}') from ex coords = Molecule.as_array(None, atom_subset=atoms) qd.properties.dummies = np.array(coords, ndmin=2, dtype=float) qd = ligand_to_qd(qd, ligand, path=path, allignment=allignment, idx_subset=qd.properties.indices) ret[j, i] = qd for at in reversed(atoms): qd.delete_atom(qd[at.id]) qd.unset_atoms_id() return ret
def neighbors_mod(self, atom: Atom, exclude: Union[int, str] = 1) -> List[Atom]: """A modified PLAMS function: Allows the exlucison of specific elements from the return list. Return a list of neighbors of **atom** within the molecule. Atoms with **atom** has to belong to the molecule. Returned list follows the same order as the **atom.bond** attribute. Parameters ---------- atom : |plams.Atom|_ The plams atom whose neighbours will be returned. exclude : |str|_ or |int|_ Exclude all neighbours with a specific atomic number or symbol. Returns ------- |list|_ [|plams.Atom|_] A list of all neighbours of **atom**. """ exclude = to_atnum(exclude) if atom.mol != self: raise MoleculeError( 'neighbors: passed atom should belong to the molecule') return [ b.other_end(atom) for b in atom.bonds if b.other_end(atom).atnum != exclude ]
def validate_core_atom(atom): # noqa: E302 """Parse and validate the ``["optional"]["qd"]["dissociate"]["core_atom"]`` argument.""" # Potential atomic number or symbol if isinstance(atom, int) or atom in PeriodicTable.symtonum: return to_atnum(atom) # Potential SMILES string try: mol = from_smiles(atom) except Exception as ex: raise ValueError(f'Failed to recognize {atom!r} as a valid atomic number, ' 'atomic symbol or SMILES string') from ex # Double check the SMILES string: charge_dict = {} for at in mol: charge = at.properties.charge try: charge_dict[charge] += 1 except KeyError: charge_dict[charge] = 1 if 0 in charge_dict: del charge_dict[0] # Only a single charged atom is allowed if len(charge_dict) > 1: charge_count = sum([v for v in charge_dict.values()]) raise MoleculeError(f'The SMILES string {repr(atom)} contains more than one charged atom: ' f'charged atom count: {charge_count}') return mol
def _get_anchor(mol: Molecule) -> Tuple[int, Atom]: """Find the first atom in **mol** marked with :attr:`Atom.properties` ``["anchor"]``. Parameters ---------- |plams.Molecule|_ A PLAMS molecule containing (at least) one atom marked whose :attr:`Atom.properties` attribute contains the ``["anchor"]`` key. Returns ------- |int|_ and |plams.Atom|_ The (0-based) index and the matching Atom Raises ------ MoleculeError Raised if no atom with the :attr:`Atom.properties` ``["anchor"]`` key is found. """ for i, at in enumerate(mol.atoms, 1): if at.properties.anchor: return i, at raise MoleculeError("No atom with the Atom.properties.anchor found")
def _evaluate_distance(mol: np.ndarray, name: str, threshold: float = 1.0, action: str = 'warn') -> Union[None, NoReturn]: """Eavluate all the distance matrix of **xyz3D** and perform **action** when distances are below **threshold**.""" # noqa try: action_func = WARN_MAP[action] except KeyError as ex: raise ValueError("'action' expected either 'warn', 'raise' or 'ignore'; " f"observed value: {action!r}") from ex else: if action == 'ignore': return None xyz = np.asarray(mol) tree = cKDTree(xyz) dist, idx = tree.query(xyz, k=[2]) dist = dist.T[0] idx = idx.T[0] bool_ar = dist < threshold if bool_ar.any(): _idx2 = np.stack([np.arange(len(idx)), idx]).T _idx2 += 1 _idx2.sort(axis=1) idx2 = np.unique(_idx2[bool_ar], axis=0) n = len(idx2) exc = MoleculeError( f"\nEncountered >= {n} unique atom-pairs at a distance shorter than " f"{threshold} Angstrom in {name!r}:\n{aNDRepr.repr(idx2.T)}" ) action_func(exc)
def _collect_surface(xyz: np.ndarray, displacement_factor: float, nth_shell: Union[int, Iterable[int]] = 0) -> np.ndarray: """Collect all user-specified surface and/or sub-surface shells of **xyz**.""" n = -1 xyz_df = pd.DataFrame(xyz) n_set = {nth_shell } if not isinstance(nth_shell, abc.Iterable) else set(nth_shell) ret = [] while True: n += 1 # Check if the DataFrame is empty if not xyz_df.size: raise MoleculeError(n) idx = identify_surface_ch(xyz_df, n=displacement_factor) bool_ar = np.ones(len(xyz_df), dtype=bool) bool_ar[idx] = False if n not in n_set: xyz_df = xyz_df.loc[bool_ar, :] continue else: index = xyz_df.index[~bool_ar] xyz_df = xyz_df.loc[bool_ar, :] n_set.remove(n) ret += index.tolist() if not n_set: return np.fromiter(ret, count=len(ret), dtype=int)
def degree_of_separation(mol: Molecule, limit: float = np.inf, bond_mat: Optional[np.ndarray] = None) -> np.ndarray: r"""Construct a matrix with the degree of separation of all atom-pairs in **mol**. Each element :math:`D_{i, j}^{\text{sep}}` in the to-be returned matrix :math:`\boldsymbol{D}^{\text{sep}} \in \mathbb{R}_{+}^{n,n}` represents the (minimum) number of bonds seperating atoms :math:`i` and :math:`j`. The degree of seperation is set to ``inf`` if two atoms are disjointed, *i.e.* there is no combination of bonds connecting aformentioned atom-pair. Parameters ---------- mol : :class:`Molecule` or :class:`MultiMolecule` A PLAMS Molecule with bonds. limit : :class:`float` The maximum degree of separation to calculate, must be >= 0. Using a smaller limit will decrease computation time by aborting calculations between pairs that are separated by a degree of separation > limit. For such pairs, the degree of separation will be equal to ``inf`` (*i.e.* not connected). bond_mat : array_like, optional An optional bond matrix or other object compatible with SciPy's :class:`csr_matrix<scipy.sparse.csr_matrix>`. If ``None``, calculate the sparse bond matrix with :func:`get_sparse_bond_matrix`. Returns ------- (:math:`n`, :math:`n`) :class:`numpy.ndarray` [:class:`float`] A symmetric 2D array containing the degrees of separation between all atom-pairs in **mol**. Values are set to ``inf`` if the responsible atoms are disjointed. Raises ------ :exc:`MoleculeError<scm.plams.core.errors.MoleculeError>` Raised if the passed Molecule has no bonds and **bond_mat** is ``None``. """ len_mol = len(mol) if bond_mat is None: if not mol.bonds: raise MoleculeError("The passed Molecule has no bonds") sparse_bond_mat = sparse_bond_matrix(mol, dtype=bool) else: sparse_bond_mat = csr_matrix(bond_mat, dtype=bool, copy=False, shape=(len_mol, len_mol)) return dijkstra(sparse_bond_mat, directed=False, limit=limit, indices=np.arange(len_mol), return_predecessors=False)
def _get_xyz(mol: Molecule, atom: AtomSymbol) -> np.ndarray: """Return the Cartesian coordinates of **mol** belonging to the atom subset of *atom*.""" atnum = to_atnum(atom) xyz = mol.as_array(atom_subset=(at for at in mol if at.atnum == atnum)) if not xyz.any(): raise MoleculeError(f"No atoms with atomic symbol {to_symbol(atom)!r} " f"in {mol.get_formula()!r}") return xyz
def get_index(self, value: Union[Atom, Bond]) -> Union[int, Tuple[int, int]]: """Return the first index of **value** within this instance. **value** expects an instance of either :class:`Atom` or :class:`Bond`. Note ---- Following the convention addopted by PLAMS, the returned index/indices are 1-based rather than 0-based. Parameters ---------- value : |plams.Atom|_ or |plams.Bond|_ A PLAMS atom or bonds. Returns ------- |int|_ or |tuple|_ [|int|_] An atomic index or (**value**: |plams.Atom|_) or a tuple of two atomic indices (**item**: |plams.Bond|_). Raises ------ TypeError Raised if **value** is an instance of neither :class:`Atom` nor :class:`Bond`. MoleculeError Raised if the passed :class:`Atom` or :class:`Bond` is not in this instance. """ if isinstance(value, Atom): if value not in self.atoms: raise MoleculeError( "Passed atom, {repr(value)}, is not in this instance") return 1 + self.atoms.index(value) elif isinstance(value, Bond): if value not in self.bonds: raise MoleculeError( f"Passed bond, {repr(value)}, is not in this instance") at1, at2 = value return 1 + self.atoms.index(at1), 1 + self.atoms.index(at2) err = "item excepts an instance of 'Atom' or 'Bond'; observed type: '{}'" raise TypeError(err.format(value.__class__.__name__))
def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame: """Function that handles the identification and marking of all core anchor atoms. Parameters ---------- core_df : |CAT.SettingsDataFrame|_ A dataframe of core molecules. Molecules are stored in the *mol* column. Returns ------- |CAT.SettingsDataFrame|_ A dataframe of cores with all anchor atoms removed. """ # Unpack arguments anchor = core_df.settings.optional.core.anchor subset = core_df.settings.optional.core.subset idx_tuples = [] for core in core_df[MOL]: # Checks the if the anchor is a string (atomic symbol) or integer (atomic number) formula = core.get_formula() # Returns the indices of all anchor atom ligand placeholders in the core if not core.properties.dummies: at_idx = np.array( [i for i, atom in enumerate(core) if atom.atnum == anchor]) else: dummies = core.properties.dummies at_idx = np.fromiter(dummies, count=len(dummies), dtype=int) at_idx -= 1 if subset: at_idx = distribute_idx(core, at_idx, **subset) # Convert atomic indices into Atoms at_idx += 1 at_idx.sort() core.properties.dummies = dummies = [core[i] for i in at_idx] # Returns an error if no anchor atoms were found if not dummies: raise MoleculeError( f"{repr(to_symbol(anchor))} was specified as core anchor atom, yet " f"no matching atoms were found in {core.properties.name} " f"(formula: {formula})") # Delete all core anchor atoms for at in dummies: core.delete_atom(at) idx_tuples.append((formula, ' '.join(at_idx.astype(str)))) # Create and return a new dataframe idx = pd.MultiIndex.from_tuples(idx_tuples, names=['formula', 'anchor']) ret = core_df.reindex(idx) ret[MOL] = core_df[MOL].values return ret
def allign_axis(mol: Molecule, anchor: Atom): """Allign a molecule with the Cartesian X-axis; setting **anchor** as the origin.""" try: idx = mol.atoms.index(anchor) except ValueError as ex: raise MoleculeError("The passed anchor is not in mol") from ex xyz = mol.as_array() # Allign the molecule with the X-axis rotmat = optimize_rotmat(xyz, idx) xyz[:] = xyz @ rotmat.T xyz -= xyz[idx] xyz[:] = xyz.round(decimals=3) mol.from_array(xyz)
def _get_neutral_frag(frag: Molecule) -> Molecule: """Return a neutral fragment for :func:`md_generator`.""" frag_neutral = frag.copy() for anchor in frag_neutral: if anchor.properties.anchor: charge = anchor.properties.charge if charge > 0: run_ff_cationic(frag_neutral, anchor, MATCH_SETTINGS) elif charge < 0: run_ff_anionic(frag_neutral, anchor, MATCH_SETTINGS) break else: raise MoleculeError("Failed to identify the anchor atom within 'frag'") return frag_neutral
def _get_surface( mol: Molecule, symbol: str, displacement_factor: float = 0.5, ) -> _NDArray[np.int64]: """Return the indices of all atoms, whose atomic symbol is equal to **atom_symbol**, located on the surface.""" # noqa # Identify all atom with atomic symbol **atom_symbol** atnum = to_atnum(symbol) idx = np.array([i for i, atom in enumerate(mol) if atom.atnum == atnum], dtype=np.int64) xyz = np.asarray(mol) # Identify all atoms on the surface try: return idx[identify_surface_ch(xyz[idx], n=displacement_factor)] except ValueError as ex: raise MoleculeError(f"No atoms with atomic symbol/number {repr(symbol)} available in " f"{mol.get_formula()!r}") from ex
def residue_argsort(mol, concatenate=True): # noqa: E302 """Return the indices that would sort this instance by residue number. Residues are defined based on moleculair fragments based on **self.bonds**. Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` A PLAMS Molecule with bonds. concatenate : bool If ``False``, returned a nested list with atomic indices. Each sublist contains the indices of a single residue. Returns ------- :class:`numpy.ndarray` [:class:`int`], shape :math:`(n,)` or :class:`list` [:class:`list` [:class:`int`]] A 1D array of indices that would sort :math:`n` atoms this instance or a nested list of indices. """ # noqa: E501 if not mol.bonds: raise MoleculeError("The passed Molecule has no bonds") # Define residues frags = separate_mod(mol) symbol = np.fromiter((at.symbol for at in mol), count=len(mol), dtype='U2') # Sort the residues core = [] ligands = [] for frag in frags: if len(frag) == 1: core += frag else: i = np.array(frag) argsort = np.argsort(symbol[i]) ligands.append(i[argsort].tolist()) core.sort() ligands.sort() ret = [core] + ligands if concatenate: return np.concatenate(ret) return ret
def _parse_ion(ion: Molecule | str | int) -> Tuple[Molecule, Atom]: """Interpret and parse the **ion** argument in :func:`.get_xyn`. Construct and return a new :math:`XY_{n=0}` molecule and the atom :math:`X` itself. If **ion** is a polyatomic ion then :math:`XY_{n=0}` is a copy of **ion** and :math:`X` is the first atom with a non-zero charge. Parameters ---------- ion : |str|_, |int|_ or |plams.Molecule|_ An ion (:math:`X`), be it mono- (*e.g.* atomic number or symbol) or poly-atomic. Returns ------- |plams.Molecule|_ and |plams.Atom|_ A :math:`XY_{n=0}` molecule and the the charged atom from :math:`X`. Raises ------ MoleculeError Raised if ion is an instance of :math:`Molecule` but does not contain any charged atoms. """ if isinstance(ion, Molecule): XYn = ion.copy() for i, at in enumerate(XYn, 1): if not at.properties.charge: continue # Found an atom with non-zero charge; return a copy ret = XYn.copy() return ret, ret[i] raise MoleculeError( "No atoms were found in 'ion' with a non-zero charge") else: # Ion is an atomic number or symbol X = Atom(atnum=to_atnum(ion)) XYn = Molecule() XYn.add_atom(X) return XYn, X
def prep_input( arg: Settings ) -> Tuple[SettingsDataFrame, SettingsDataFrame, SettingsDataFrame]: """Interpret and extract the input settings. Returns a list of ligands and a list of cores. Parameters ---------- |plams.Settings|_ A settings object containing all (optional) arguments. Returns ------- |tuple|_ [|CAT.SettingsDataFrame|_, |CAT.SettingsDataFrame|_, |CAT.SettingsDataFrame|_] A tuple containing the ligand, core and qd dataframes. """ # Interpret arguments validate_input(arg, validate_only=False) # Read the input ligands and cores lig_list = read_mol(arg.get('input_ligands')) core_list = read_mol(arg.get('input_cores')) qd_list = read_mol(arg.get('input_qd')) is_qd = True if qd_list is not None else False # Raises an error if lig_list or core_list is empty if is_qd: if not qd_list: raise MoleculeError( 'No valid input quantum dots were found, aborting run') else: if not lig_list: raise MoleculeError( 'No valid input ligands were found, aborting run') elif not core_list: raise MoleculeError( 'No valid input cores were found, aborting run') # Store the molecules in dataframes columns = pd.MultiIndex.from_tuples([MOL], names=['index', 'sub index']) if is_qd: ligand_df = core_df = None qd_df = SettingsDataFrame(index=pd.RangeIndex(len(qd_list)), columns=columns, settings=arg) qd_df[MOL] = qd_list else: qd_df = None ligand_df = SettingsDataFrame(index=pd.RangeIndex(len(lig_list)), columns=columns, settings=arg) core_df = SettingsDataFrame(index=pd.RangeIndex(len(core_list)), columns=columns.copy(), settings=arg) ligand_df[MOL] = lig_list core_df[MOL] = core_list return ligand_df, core_df, qd_df
def run_ff_cationic(mol: Molecule, anchor: Atom, s: Settings) -> None: r"""Assign neutral parameters to a cationic species (*e.g.* ammonium). Consists of 3 distinct steps: * **mol** is converted into two neutral fragments, *e.g.* ammonium is converted into two amines: :math:`N^+(R)_4 \rightarrow N(R)_3 + RN(H)_2`. * Parameters are guessed for both fragments (using MATCH_) and then recombined into **mol**. * The atomic charge of **anchor** is adjusted such that the total moleculair charge becomes zero. Performs an inplace update of **mol**. .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software Parameters ---------- mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>` A cationic molecule. anchor : :class:`Atom<scm.plams.mol.atom.Atom>` The atom in **mol** with the formal positive charge. s : :class:`Settings<scm.plams.core.settings.Settings>` The job Settings to-be passed to :class:`MATCHJob<nanoCAT.ff.match_job.MATCHJob>`. See Also -------- :func:`run_match_job()<nanoCAT.ff.ff_assignment.run_match_job>` Assign atom types and charges to **mol** based on the results of MATCH_. :func:`run_ff_anionic()<nanoCAT.ff.ff_anionic.run_ff_anionic>` Assign neutral parameters to an anionic species (*e.g.* carboxylate). """ # noqa if anchor not in mol: raise MoleculeError("Passed 'anchor' is not part of 'mol'") anchor.properties.charge = 0 # Find the first bond attached to the anchor atom which is not part of a ring for bond in anchor.bonds: if not mol.in_ring(bond): break else: raise MoleculeError( "All bonds attached to 'anchor' are part of a ring system") with SplitMol(mol, bond) as (frag1, frag2): # Identify the amine and the alkylic fragment if anchor in frag1: amine = frag1 alkyl = frag2 else: amine = frag2 alkyl = frag1 amine.delete_atom(anchor.bonds[-1].other_end(anchor)) # Change the capping hydrogen into X # X is the same atom type as **anchor** alkyl_cap = alkyl[-1] alkyl_cap.atnum = anchor.atnum cap_bond = alkyl_cap.bonds[0] bond_length = alkyl_cap.radius + cap_bond.other_end(alkyl_cap).radius cap_bond.resize(alkyl_cap, bond_length) # Change X into XH_n alkyl_with_h = add_Hs(alkyl) properties = mol[1].properties for at in alkyl_with_h.atoms[len(alkyl):]: cap_h = Atom(atnum=at.atnum, coords=at.coords, mol=alkyl, settings=properties.copy()) cap_h.properties.pdb_info.IsHeteroAtom = False cap_h.properties.pdb_info.Name = 'Hxx' alkyl.add_atom(cap_h) alkyl.add_bond(Bond(alkyl_cap, cap_h, mol=alkyl)) # Get the match parameters run_match_job(amine, s) run_match_job(alkyl, s) # Set the total charge of the system to 0 anchor.properties.charge_float -= sum(at.properties.charge_float for at in mol) return None
def prep_ligand(ligand_df: SettingsDataFrame) -> SettingsDataFrame: """Function that handles all ligand operations. * Ligand function group identification * Ligand geometry optimization * Ligand bulkiness calculations * Ligand COSMO-RS calculations * Ligand conceptual DFT calculations .. _Nano-CAT: https://github.com/nlesc-nano/nano-CAT Parameters ---------- ligand_df : |CAT.SettingsDataFrame|_ A dataframe of ligand molecules. Molecules are stored in the *mol* column. Returns ------- |CAT.SettingsDataFrame|_ A new dataframe containing only valid ligands. Raises ------ ImportError Raised if a COSMO-RS calculation is attempted without installing the Nano-CAT_ package. """ # Unpack arguments forcefield = ligand_df.settings.optional.forcefield optimize = ligand_df.settings.optional.ligand.optimize crs = ligand_df.settings.optional.ligand.crs cdft = ligand_df.settings.optional.ligand.cdft # Identify functional groups within the ligand. ligand_df = init_ligand_anchoring(ligand_df) # Check if any valid functional groups were found if not ligand_df[MOL].any(): raise MoleculeError( 'No valid functional groups found in any of the ligands, aborting run' ) # Optimize the ligands if optimize: init_ligand_opt(ligand_df) # Remove failed optimizations from the ligand list _is_opt = (lig.properties.get('is_opt', False) for lig in ligand_df[MOL]) is_opt = np.fromiter(_is_opt, count=len(ligand_df), dtype=bool) ligand_df = ligand_df.loc[is_opt] else: for lig in ligand_df[MOL]: allign_axis(lig, lig.properties.dummies) # Perform a COSMO-RS calculation on the ligands if crs: val_nano_cat( "Ligand COSMO-RS calculations require the nano-CAT package") init_solv(ligand_df) # Assign CHARMM CGenFF atom types to all ligands if forcefield: val_nano_cat( "Automatic ligand forcefield assignment requires MATCH " "(Multipurpose Atom-Typer for CHARMM) and the nano-CAT package") init_ff_assignment(ligand_df) # Run conceptual DFT calculations if cdft: val_nano_cat( "Ligand conceptual DFT calculations require the nano-CAT package") init_cdft(ligand_df) return ligand_df
def prep_qd(ligand_df: Optional[SettingsDataFrame], core_df: Optional[SettingsDataFrame], qd_df: Optional[SettingsDataFrame]) -> SettingsDataFrame: """Function that handles all quantum dot (qd, i.e. core + all ligands) operations. * Constructing the quantum dots * Optimizing the quantum dots * Peforming activation strain analyses * Dissociating ligands on the quantum dot surface .. _Nano-CAT: https://github.com/nlesc-nano/nano-CAT Has two accepted signatures: * ``ligand_df = core_df = None``: Update an existing quantum dot dataframe (**qd_df**). * ``qd_df = None``: Construct a new quantum dot dataframe from **ligand_df** and **core_df**. Parameters ---------- ligand_df : |CAT.SettingsDataFrame|_ ``None`` or a dataframe of ligand molecules. Molecules are stored in the *mol* column. core_df : |CAT.SettingsDataFrame|_ ``None`` or a dataframe of core molecules. Molecules are stored in the *mol* column. qd_df : |CAT.SettingsDataFrame|_ ``None`` or a dataframe of quantum dots. Molecules are stored in the *mol* column. Returns ------- |CAT.SettingsDataFrame|_ A dataframe of quantum dots molecules. Molecules are stored in the *mol* column. Raises ------ ImportError Raised if an activation-strain or ligand dissociation calculation is attempted without installing the Nano-CAT_ package. """ # Unpack arguments bulk = ligand_df.settings.optional.qd.bulkiness optimize = ligand_df.settings.optional.qd.optimize forcefield = ligand_df.settings.optional.forcefield dissociate = ligand_df.settings.optional.qd.dissociate activation_strain = ligand_df.settings.optional.qd.activation_strain construct_qd = ligand_df.settings.optional.qd.construct_qd multi_ligand = ligand_df.settings.optional.qd.multi_ligand # Construct the quantum dot DataFrame # If construct_qd is False, construct the dataframe without filling it with quantum dots if qd_df is None: # Construct new quantum dots qd_df = init_qd_construction(ligand_df, core_df, construct_qd=construct_qd) elif ligand_df is core_df is None: # Update existing quantum dots update_qd_df(qd_df) else: raise TypeError("Either qd_df must be 'None' or ligand_df " " and core_df must both be 'None'") # Start the ligand bulkiness workflow if bulk: val_nano_cat( "Ligand bulkiness calculations require the nano-CAT package") init_lig_bulkiness(qd_df, ligand_df, core_df) # Skip the actual quantum dot construction if not construct_qd: return qd_df if not qd_df[MOL].any(): raise MoleculeError('No valid quantum dots found, aborting') if forcefield: val_nano_cat( "Automatic ligand forcefield assignment requires MATCH " "(Multipurpose Atom-Typer for CHARMM) and the nano-CAT package") # Optimize the qd with the core frozen if optimize: init_qd_opt(qd_df) if multi_ligand: init_multi_ligand(qd_df) # Calculate the interaction between ligands on the quantum dot surface if activation_strain: val_nano_cat( "Quantum dot activation-strain calculations require the nano-CAT package" ) init_asa(qd_df) # Calculate the interaction between ligands on the quantum dot surface upon removal of CdX2 if dissociate: val_nano_cat( "Quantum dot ligand dissociation calculations require the nano-CAT package" ) # Start the BDE calculation logger.info('Calculating ligand dissociation energy') init_bde(qd_df) return qd_df
def _find_start(mol_list: Iterable[Molecule], atom: Atom) -> Molecule: """Find the molecule in **mol_list** containing **atom**.""" for mol in mol_list: if atom in mol: return mol raise MoleculeError(f"{atom!r} is not in any of the passed molecules")
def dissociate_bulk( mol: Molecule, symbol_x: str, symbol_y: None | str = None, count_x: int = 1, count_y: int = 1, n_pairs: int = 1, k: None | int = 4, r_max: None | float = None, mode: _ModeKind = 'uniform', **kwargs: Any, ) -> Molecule: r"""A workflow for removing :math:`XY`-based compounds from the bulk of **mol**. Examples -------- .. code-block:: python >>> from scm.plams import Molecule >>> from CAT.recipes import dissociate_bulk >>> mol: Molecule = ... # Remove two PbBr2 pairs in a system where # each lead atom is surrounded by 6 bromides >>> mol_out1 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, k=6 ... ) # The same as before, expect all potential bromides are # identified based on a radius, rather than a fixed number >>> mol_out2 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, r_max=5.0 ... ) # Convert a fraction to a number of pairs >>> f = 0.5 >>> count_x = 2 >>> symbol_x = "Pb" >>> n_pairs = int(f * sum(at.symbol == symbol_x for at in mol) / count_x) >>> mol_out3 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=n_pairs, k=6 ... ) Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` The input molecule. symbol_x : :class:`str` or :class:`int` The atomic symbol or number of the central (to-be dissociated) atom(s) :math:`X`. symbol_y : :class:`str` or :class:`int`, optional The atomic symbol or number of the surrounding (to-be dissociated) atom(s) :math:`Y`. If :data:`None`, do not dissociate any atoms :math:`Y`. count_x : :class:`int` The number of central atoms :math:`X` per individual to-be dissociated cluster. count_y : :class:`int` The number of surrounding atoms :math:`Y` per individual to-be dissociated cluster. n_pairs : :class:`int` The number of to-be removed :math:`XY` fragments. k : :class:`int`, optional The total number of :math:`Y` candidates surrounding each atom :math:`X`. This value should be smaller than or equal to **count_y**. See the **r_max** parameter for a radius-based approach; note that both parameters are not mutually exclusive. r_max : :class:`int`, optional The radius used for searching for :math:`Y` candidates surrounding each atom :math:`X`. See **k** parameter to use a fixed number of nearest neighbors; note that both parameters are not mutually exclusive. mode : :class:`str` How the subset of to-be removed atoms :math:`X` should be generated. Accepts one of the following values: * ``"random"``: A random distribution. * ``"uniform"``: A uniform distribution; the distance between each successive atom and all previous points is maximized. * ``"cluster"``: A clustered distribution; the distance between each successive atom and all previous points is minmized. Keyword Arguments ----------------- \**kwargs : :data:`~typing.Any` Further keyword arguments for :func:`CAT.distribution.distribute_idx`. Returns ------- :class:`~scm.plams.mol.molecule.Molecule` The molecule with :math:`n_{\text{pair}} * XY` fragments removed. """ if n_pairs < 0: raise ValueError("`n_pairs` must be larger than or equal to 0; " f"observed value: {n_pairs!r}") elif n_pairs == 0: return mol.copy() # Validate the input args if count_x <= 0: raise ValueError("`count_x` expected a an integer larger than 0; " f"observed value: {count_x!r}") elif count_y < 0: raise ValueError("`count_y` expected a an integer larger than 0; " f"observed value: {count_y!r}") elif count_y == 0: symbol_y = None # Parse `symbol_x` atnum_x = to_atnum(symbol_x) idx_x = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_x], dtype=np.intp) if len(idx_x) == 0: raise MoleculeError(f"No atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}") # Parse `symbol_y` if symbol_y is not None: atnum_y = to_atnum(symbol_y) idx_y = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_y], dtype=np.intp) if len(idx_y) == 0: raise MoleculeError(f"No atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}") # Parse `k` and `r_max` if k is None and r_max is None: raise TypeError("`k` and `r_max` cannot be both set to None") if k is None: k = cast(int, len(idx_y)) if r_max is None: r_max = cast(float, np.inf) # Validate `n_pairs` if (n_pairs * count_x) > len(idx_x): x = n_pairs * count_x raise ValueError( f"Insufficient atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}; " f"atleast {x} required with `n_pairs={n_pairs!r}` and `count_x={count_x!r}" ) elif symbol_y is not None and (n_pairs * count_y) > len(idx_y): y = n_pairs * count_y raise ValueError( f"Insufficient atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}; " f"atleast {y} required with `n_pairs={n_pairs!r}` and `count_y={count_y!r}" ) # Identify a subset of atoms `x` if count_x != 1: if "cluster_size" in kwargs: raise TypeError("Specifying both `cluster_size` and `count_x` is not supported") kwargs["cluster_size"] = count_x idx_x_sorted = distribute_idx(mol, idx_x, f=1, **kwargs) idx_x_sorted.shape = -1, count_x if symbol_y is not None: # Identify a subset of matching atoms `y` idx_x_subset, idx_y_subset, = _get_opposite_neighbor2( np.asarray(mol), idx_x_sorted, idx_y, n_pairs, n=count_y, k=k, r_max=r_max ) # Concatenate the subsets idx_xy_subset = np.concatenate([idx_x_subset.ravel(), idx_y_subset.ravel()]) else: idx_xy_subset = idx_x[:(n_pairs * count_x)] idx_xy_subset.sort() idx_xy_subset += 1 # Create and return the new molecule ret = mol.copy() atom_set = {ret[i] for i in idx_xy_subset} for at in atom_set: ret.delete_atom(at) return ret
def run_ff_anionic(mol: Molecule, anchor: Atom, s: Settings) -> None: r"""Assign neutral parameters to an anionic species (*e.g.* carboxylate). Consists of 4 distinct steps: * **mol** is capped with a proton: *e.g.* :math:`RCO_2^- \rightarrow RCO_2H`. * Parameters are guessed for both fragments (using MATCH_) and then recombined into **mol**. * The capping proton is removed again. * The atomic charge of **anchor** is adjusted such that the total moleculair charge becomes zero. Performs an inplace update of **mol**. .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software Parameters ---------- mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>` A cationic molecule. anchor : :class:`Atom<scm.plams.mol.atom.Atom>` The atom in **mol** with the formal negative charge. s : :class:`Settings<scm.plams.core.settings.Settings>` The job Settings to-be passed to :class:`MATCHJob<nanoCAT.ff.match_job.MATCHJob>`. See Also -------- :func:`run_match_job()<nanoCAT.ff.ff_assignment.run_match_job>` Assign atom types and charges to **mol** based on the results of MATCH_. :func:`run_ff_cationic()<nanoCAT.ff.ff_cationic.run_ff_cationic>` Assign neutral parameters to a cationic species (*e.g.* ammonium). """ # noqa if anchor not in mol: raise MoleculeError("Passed 'anchor' is not part of 'mol'") anchor.properties.charge = 0 # Cap the anion with a proton mol_with_h = add_Hs(mol) _cap_h = mol_with_h[-1] cap_h = Atom(atnum=_cap_h.atnum, coords=_cap_h.coords, mol=mol, settings=mol[1].properties.copy()) cap_h.properties.pdb_info.IsHeteroAtom = False cap_h.properties.pdb_info.Name = 'Hxx' mol.add_atom(cap_h) mol.add_bond(Bond(anchor, cap_h, mol=mol)) # Guess parameters and remove the capping proton run_match_job(mol, s) mol.delete_atom(cap_h) # Set the total charge of the system to 0 anchor.properties.charge_float -= sum(at.properties.charge_float for at in mol) return None
def replace_surface(mol: Molecule, symbol: Union[str, int], symbol_new: Union[str, int] = 'Cl', nth_shell: Union[int, Iterable[int]] = 0, f: float = 0.5, mode: str = 'uniform', displacement_factor: float = 0.5, **kwargs: Any) -> Molecule: r"""A workflow for identifying all surface atoms in **mol** and replacing a subset of them. Consists of three distinct steps: 1. Identifying which atoms, with a user-specified atomic **symbol**, are located on the surface of **mol** rather than in the bulk. 2. Define a subset of the newly identified surface atoms using one of CAT's distribution algorithms. 3. Create and return a molecule where the atom subset defined in step 2 has its atomic symbols replaced with **symbol_new**. Examples -------- Replace 75% of all surface ``"Cl"`` atoms with ``"I"``. .. code:: python >>> from scm.plams import Molecule >>> from CAT.recipes import replace_surface >>> mol = Molecule(...) # Read an .xyz file >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75) >>> mol_new.write(...) # Write an .xyz file The same as above, except this time the new ``"I"`` atoms are all deleted. .. code:: python >>> from scm.plams import Molecule >>> from CAT.recipes import replace_surface >>> mol = Molecule(...) # Read an .xyz file >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75) >>> del_atom = [at for at in mol_new if at.symbol == 'I'] >>> for at in del_atom: ... mol_new.delete_atom(at) >>> mol_new.write(...) # Write an .xyz file Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` The input molecule. symbol : :class:`str` or :class:`int` An atomic symbol or number defining the super-set of the surface atoms. symbol_new : :class:`str` or :class:`int` An atomic symbol or number which will be assigned to the new surface-atom subset. nth_shell : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`] One or more integers denoting along which shell-surface(s) to search. For example, if ``symbol = "Cd"`` then ``nth_shell = 0`` represents the surface, ``nth_shell = 1`` is the first sub-surface ``"Cd"`` shell and ``nth_shell = 2`` is the second sub-surface ``"Cd"`` shell. Using ``nth_shell = [1, 2]`` will search along both the first and second ``"Cd"`` sub-surface shells. Note that a :exc:`Zscm.plams.core.errors.MoleculeError` will be raised if the specified **nth_shell** is larger than the actual number of available sub-surface shells. f : :class:`float` The fraction of surface atoms whose atom types will be replaced with **symbol_new**. Must obey the following condition: :math:`0 < f \le 1`. mode : :class:`str` How the subset of surface atoms will be generated. Accepts one of the following values: * ``"random"``: A random distribution. * ``"uniform"``: A uniform distribution; maximizes the nearest-neighbor distance. * ``"cluster"``: A clustered distribution; minimizes the nearest-neighbor distance. displacement_factor : :class:`float` The smoothing factor :math:`n` for constructing a convex hull; should obey :math:`0 <= n <= 1`. Represents the degree of displacement of all atoms with respect to a spherical surface; :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all. A non-zero value is generally recomended here, as the herein utilized :class:`~scipy.spatial.ConvexHull` class requires an adequate degree of surface-convexness, lest it fails to properly identify all valid surface points. \**kwargs : :data:`~typing.Any` Further keyword arguments for :func:`~CAT.attachment.distribution.distribute_idx`. Returns ------- :class:`~scm.plams.mol.molecule.Molecule` A new Molecule with a subset of its surface atoms replaced with **symbol_new**. See Also -------- :func:`~CAT.attachment.distribution.distribute_idx` Create a new distribution of atomic indices from **idx** of length :code:`f * len(idx)`. :func:`~nanoCAT.bde.identify_surface.identify_surface` Take a molecule and identify which atoms are located on the surface, rather than in the bulk. :func:`~nanoCAT.bde.identify_surface.identify_surface_ch` Identify the surface of a molecule using a convex hull-based approach. """ # Parse input arguments xyz = np.array(mol, dtype=float, ndmin=2, copy=False) atnum = to_atnum(symbol) atnum_new = to_atnum(symbol_new) # Define the surface-atom subset idx = np.fromiter((i for i, at in enumerate(mol) if at.atnum == atnum), dtype=int) try: idx_surface = idx[_collect_surface(xyz[idx], displacement_factor, nth_shell)] except MoleculeError as exc: nth = exc.args[0] if nth == 0: exc = MoleculeError( f"No atoms with atomic symbol {to_symbol(symbol)!r} available in " f"{mol.get_formula()!r}") else: exc = MoleculeError( f"No subsurface shell >= {nth!r} of atom {to_symbol(symbol)!r} " f"available in {mol.get_formula()!r}") exc.__cause__ = None raise exc try: idx_surface_subset = distribute_idx(xyz, idx_surface, f=f, mode=mode, **kwargs) except ValueError as ex: raise MoleculeError( "Failed to identify any surface atoms with atomic symbol " f"{to_symbol(symbol)!r} in {mol.get_formula()!r}") from ex else: idx_surface_subset += 1 # Update atomic symbols and return ret = mol.copy() for i in idx_surface_subset: ret[i].atnum = atnum_new return ret