def _tag_improper_torsions(
        self,
        molecule: Molecule,
        symmetry_classes: List[int],
        toolkit_registry: ToolkitRegistry,
    ) -> None:
        """
        For each improper torsion in the list try and tag the combination in the molecule.
        """

        indexer: TorsionIndexer = molecule.properties["dihedrals"]

        for improper in self.improper_scans:

            matches = molecule.chemical_environment_matches(
                query=improper.smarts, toolkit_registry=toolkit_registry)
            unique_torsions = self._get_unique_torsions(
                matches=matches, symmetry_classes=symmetry_classes)
            central_atoms = molecule.chemical_environment_matches(
                improper.central_smarts)
            for tagged_torsion in unique_torsions:
                symmetry_group = get_symmetry_group(
                    atom_group=tagged_torsion,
                    symmetry_classes=symmetry_classes)
                for atom in central_atoms:
                    if atom[0] in tagged_torsion:
                        indexer.add_improper(
                            central_atom=atom[0],
                            improper=tagged_torsion,
                            symmetry_group=symmetry_group,
                            scan_range=improper.scan_range,
                            scan_increment=improper.scan_increment,
                        )
                        break
    def _tag_torsions(
        self,
        molecule: Molecule,
        symmetry_classes: List[int],
        toolkit_registry: ToolkitRegistry,
    ) -> None:
        """
        For each of the torsions in the torsion list try and tag only one in the molecule.
        """

        indexer: TorsionIndexer = molecule.properties["dihedrals"]

        for torsion in self.torsion_scans:
            matches = molecule.chemical_environment_matches(
                query=torsion.smarts1, toolkit_registry=toolkit_registry)
            unique_torsions = self._get_unique_torsions(
                matches=matches, symmetry_classes=symmetry_classes)
            for tagged_torsion in unique_torsions:
                indexer.add_torsion(
                    torsion=tagged_torsion,
                    scan_range=torsion.scan_range1,
                    scan_increment=torsion.scan_increment,
                    symmetry_group=get_symmetry_group(
                        atom_group=tagged_torsion[1:3],
                        symmetry_classes=symmetry_classes,
                    ),
                )
def check_linear_torsions(torsion: Tuple[int, int, int, int],
                          molecule: off.Molecule) -> Tuple[int, int, int, int]:
    """
    Check that the torsion supplied is not for a linear bond.

    Parameters:
        torsion: The indices of the atoms in the selected torsion.
        molecule: The molecule which should be checked.

    Raises:
        LinearTorsionError: If the given torsion involves driving a linear bond.
    """

    # this is based on the past submissions to QCarchive which have failed
    # highlight the central bond of a linear torsion
    linear_smarts = "[*!D1:1]~[$(*#*)&D2,$(C=*)&D2:2]"

    matches = molecule.chemical_environment_matches(linear_smarts)

    if torsion[1:3] in matches or torsion[2:0:-1] in matches:
        raise LinearTorsionError(
            f"The dihedral {torsion} in molecule {molecule} highlights a linear bond."
        )

    return torsion
Beispiel #4
0
def improper_torsion_indices(offmol: Molecule,
                             improper_def='espaloma') -> np.ndarray:
    """ "[*:1]~[X3:2](~[*:3])~[*:4]" matches (_all_improper_torsion_indices returns "[*:1]~[*:2](~[*:3])~[*:4]" matches)

    improper_def allows for choosing which atom will be the central atom in the
    permutations:
    smirnoff: central atom is listed first
    espaloma: central atom is listed second

    Addtionally, for smirnoff, only take the subset of atoms that corresponds
    to the ccw traversal of connected atoms.

    Notes
    -----
    Motivation: offmol.impropers returns a large number of impropers, and we may wish to restrict this number.
    May update this filter definition based on discussion in https://github.com/openff.toolkit/openff.toolkit/issues/746
    """

    ## Find all atoms bound to exactly 3 other atoms
    if improper_def == 'espaloma':
        ## This finds all orderings, which is what we want for the espaloma case
        ##  but not for smirnoff
        improper_smarts = '[*:1]~[X3:2](~[*:3])~[*:4]'
        mol_idxs = offmol.chemical_environment_matches(improper_smarts)
        return np.array(mol_idxs)
    elif improper_def == 'smirnoff':
        improper_smarts = '[*:2]~[X3:1](~[*:3])~[*:4]'
        ## For smirnoff ordering, we only want to find the unique combinations
        ##  of atoms forming impropers so we can permute them the way we want
        mol_idxs = offmol.chemical_environment_matches(improper_smarts,
                                                       unique=True)

        ## Get all ccw orderings
        # feels like there should be some good way to do this with itertools...
        idx_permuts = []
        for c, *other_atoms in mol_idxs:
            for i in range(3):
                idx = [c]
                for j in range(3):
                    idx.append(other_atoms[(i + j) % 3])
                idx_permuts.append(tuple(idx))

        return np.array(idx_permuts)
    else:
        raise ValueError(f'Unknown value for improper_def: {improper_def}')
    def _tag_double_torsions(
        self,
        molecule: Molecule,
        symmetry_classes: List[int],
        toolkit_registry: ToolkitRegistry,
    ) -> None:
        """
        For each double torsion in the list try and tag the combination in the molecule.
        """

        indexer: TorsionIndexer = molecule.properties["dihedrals"]

        for double_torsion in self.double_torsion_scans:
            matches1 = molecule.chemical_environment_matches(
                query=double_torsion.smarts1,
                toolkit_registry=toolkit_registry)
            matches2 = molecule.chemical_environment_matches(
                query=double_torsion.smarts2,
                toolkit_registry=toolkit_registry)
            unique_torsions1 = self._get_unique_torsions(
                matches=matches1, symmetry_classes=symmetry_classes)
            unique_torsions2 = self._get_unique_torsions(
                matches=matches2, symmetry_classes=symmetry_classes)
            for tagged_torsion1 in unique_torsions1:
                symmetry_group1 = get_symmetry_group(
                    atom_group=tagged_torsion1[1:3],
                    symmetry_classes=symmetry_classes)
                for tagged_torsion2 in unique_torsions2:
                    symmetry_group2 = get_symmetry_group(
                        atom_group=tagged_torsion2[1:3],
                        symmetry_classes=symmetry_classes,
                    )
                    indexer.add_double_torsion(
                        torsion1=tagged_torsion1,
                        torsion2=tagged_torsion2,
                        symmetry_group1=symmetry_group1,
                        symmetry_group2=symmetry_group2,
                        scan_range1=double_torsion.scan_range1,
                        scan_range2=double_torsion.scan_range2,
                        scan_increment=double_torsion.scan_increment,
                    )
def find_ring_systems(molecule: Molecule) -> Dict[int, int]:
    """This function attempts to find all ring systems (see [1] for more details) in
    a given molecule.

    The method first attempts to determine which atoms and bonds are part of rings
    by matching the `[*:1]@[*:2]` SMIRKS pattern against the molecule.

    The matched bonds are then used to construct a graph (using ``networkx``), from which
    the ring systems are identified as those sets of atoms which are 'connected'
    together (using ``connected_components``) by at least one path.

    Parameters
    ----------
    molecule:
        The molecule to search for ring systems.

    Notes
    -----
    * Two molecular rings with only one common atom (i.e. spiro compounds) are
      considered to be part of the same ring system.

    References
    ----------
    [1] `Ring Perception <https://docs.eyesopen.com/toolkits/python/oechemtk/ring.html>`_

    Returns
    -------
        The index of which ring system each atom belongs to. Only ring atoms are
        included in the returned dictionary.
    """

    # Find the ring atoms
    ring_atom_index_pairs = {
        tuple(sorted(pair))
        for pair in molecule.chemical_environment_matches("[*:1]@[*:2]")
    }

    # Construct a networkx graph from the found ring bonds.
    graph = networkx.Graph()

    for atom_index_pair in ring_atom_index_pairs:
        graph.add_edge(*atom_index_pair)

    ring_systems = {}

    for i, ring_system in enumerate(networkx.connected_components(graph)):

        for atom_index in ring_system:
            assert atom_index not in ring_systems
            ring_systems[atom_index] = i + 1

    return ring_systems
    def _find_functional_groups(
        cls, molecule: Molecule, functional_groups: Dict[str, str]
    ) -> FunctionalGroups:
        """Find the atoms and bonds involved in the functional groups specified by
        ``functional_groups``.

        Parameters
        ----------
        molecule
            The molecule to search for function groups.
        functional_groups
            A dictionary of SMARTS of functional groups that should not be fragmented
            indexed by a friendly string label, e.g. 'alcohol: [#6:1]-[#8H1X2:2]'

        Returns
        -------
            The atoms and bonds in the found function groups stored in a dictionary
            indexed by a unique key associated with each functional group.
        """

        found_groups = {}

        for functional_group, smarts in functional_groups.items():

            unique_matches = {
                tuple(sorted(match))
                for match in molecule.chemical_environment_matches(smarts)
            }

            for i, match in enumerate(unique_matches):

                atoms = set(get_map_index(molecule, index) for index in match)
                bonds = set(
                    (
                        get_map_index(molecule, bond.atom1_index),
                        get_map_index(molecule, bond.atom2_index),
                    )
                    for bond in molecule.bonds
                    if bond.atom1_index in match and bond.atom2_index in match
                )

                found_groups[f"{functional_group}_{i}"] = (atoms, bonds)

        return found_groups
    def _detect_linear_torsions(self, molecule: off.Molecule) -> List:
        """
        Try and find any linear bonds in the molecule with torsions that should not be driven.

        Parameters:
            molecule: An openforcefield molecule instance

        Returns:
            A list of the central bond tuples in the molecule which should not be driven, this can then be compared
            against the torsions which have been selected.
        """

        # this is based on the past submissions to QCarchive which have failed
        # highlight the central bond of a linear torsion
        linear_smarts = "[*!D1:1]~[$(*#*)&D2,$(C=*)&D2:2]"

        matches = molecule.chemical_environment_matches(linear_smarts)

        return matches
    def _find_ortho_substituents(
        cls, parent: Molecule, bonds: Set[BondTuple]
    ) -> AtomAndBondSet:
        """Find ring substituents that are ortho to one of the rotatable bonds specified
        in a list of bonds.

        Parameters
        ----------
        parent
            The parent molecule being fragmented.
        bonds
            The map indices of the rotatable bonds.

        Returns
        -------
            The set of map indices of atoms in ortho group and of bond tuples in ortho
            group.
        """

        matched_atoms = set()
        matched_bonds = set()

        for match in parent.chemical_environment_matches(
            "[!#1:1]~&!@[*:2]@[*:3]~&!@[!#1*:4]"
        ):

            map_tuple = tuple(get_map_index(parent, i) for i in match)

            if map_tuple[:2] not in bonds and map_tuple[:2][::-1] not in bonds:
                continue

            matched_atoms.update(map_tuple[::3])
            matched_bonds.update((map_tuple[i], map_tuple[i + 1]) for i in [0, 2])

        # Ensure the matched bonds doesn't include duplicates.
        matched_bonds = {tuple(sorted(bond)) for bond in matched_bonds}

        return matched_atoms, matched_bonds
    def find_rotatable_bonds(
        cls, molecule: Molecule, target_bond_smarts: Optional[List[str]]
    ) -> List[BondTuple]:
        """Finds the rotatable bonds in a molecule *including* rotatable double
        bonds.

        Parameters
        ----------
        molecule
            The molecule to search for rotatable bonds.
        target_bond_smarts
            An optional list of SMARTS patterns that should be used to identify the bonds
            within the parent molecule to grow fragments around. Each SMARTS pattern
            should include **two** indexed atoms that correspond to the two atoms
            involved in the central bond.

            If no pattern is provided fragments will be constructed around all 'rotatable
            bonds'. A 'rotatable bond' here means any bond matched by a
            `[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added
            constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of
            both atoms in the bond must be >= 2.

        Returns
        -------
            A list of the **map** indices of the atoms that form the rotatable
            bonds, ``[(m1, m2),...]``.
        """

        if target_bond_smarts is None:

            matches = molecule.chemical_environment_matches(
                "[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]"
            )

        else:

            matches = [
                match
                for smarts in target_bond_smarts
                for match in molecule.chemical_environment_matches(smarts)
            ]

            if not all(len(match) == 2 for match in matches):

                raise ValueError(
                    f"The `target_bond_smarts` pattern ({target_bond_smarts}) "
                    f"must define exactly two indexed atoms to match."
                )

        unique_matches = {tuple(sorted(match)) for match in matches}

        if target_bond_smarts is None:

            # Drop bonds without a heavy degree of at least 2 on each end to avoid
            # finding terminal bonds
            def heavy_degree(atom_index: int) -> int:
                atom = molecule.atoms[atom_index]
                return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1)

            unique_matches = {
                match
                for match in unique_matches
                if all(heavy_degree(i) > 1 for i in match)
            }

        return [
            (
                get_map_index(molecule, match[0]),
                get_map_index(molecule, match[1]),
            )
            for match in unique_matches
        ]