def _tag_improper_torsions( self, molecule: Molecule, symmetry_classes: List[int], toolkit_registry: ToolkitRegistry, ) -> None: """ For each improper torsion in the list try and tag the combination in the molecule. """ indexer: TorsionIndexer = molecule.properties["dihedrals"] for improper in self.improper_scans: matches = molecule.chemical_environment_matches( query=improper.smarts, toolkit_registry=toolkit_registry) unique_torsions = self._get_unique_torsions( matches=matches, symmetry_classes=symmetry_classes) central_atoms = molecule.chemical_environment_matches( improper.central_smarts) for tagged_torsion in unique_torsions: symmetry_group = get_symmetry_group( atom_group=tagged_torsion, symmetry_classes=symmetry_classes) for atom in central_atoms: if atom[0] in tagged_torsion: indexer.add_improper( central_atom=atom[0], improper=tagged_torsion, symmetry_group=symmetry_group, scan_range=improper.scan_range, scan_increment=improper.scan_increment, ) break
def _tag_torsions( self, molecule: Molecule, symmetry_classes: List[int], toolkit_registry: ToolkitRegistry, ) -> None: """ For each of the torsions in the torsion list try and tag only one in the molecule. """ indexer: TorsionIndexer = molecule.properties["dihedrals"] for torsion in self.torsion_scans: matches = molecule.chemical_environment_matches( query=torsion.smarts1, toolkit_registry=toolkit_registry) unique_torsions = self._get_unique_torsions( matches=matches, symmetry_classes=symmetry_classes) for tagged_torsion in unique_torsions: indexer.add_torsion( torsion=tagged_torsion, scan_range=torsion.scan_range1, scan_increment=torsion.scan_increment, symmetry_group=get_symmetry_group( atom_group=tagged_torsion[1:3], symmetry_classes=symmetry_classes, ), )
def check_linear_torsions(torsion: Tuple[int, int, int, int], molecule: off.Molecule) -> Tuple[int, int, int, int]: """ Check that the torsion supplied is not for a linear bond. Parameters: torsion: The indices of the atoms in the selected torsion. molecule: The molecule which should be checked. Raises: LinearTorsionError: If the given torsion involves driving a linear bond. """ # this is based on the past submissions to QCarchive which have failed # highlight the central bond of a linear torsion linear_smarts = "[*!D1:1]~[$(*#*)&D2,$(C=*)&D2:2]" matches = molecule.chemical_environment_matches(linear_smarts) if torsion[1:3] in matches or torsion[2:0:-1] in matches: raise LinearTorsionError( f"The dihedral {torsion} in molecule {molecule} highlights a linear bond." ) return torsion
def improper_torsion_indices(offmol: Molecule, improper_def='espaloma') -> np.ndarray: """ "[*:1]~[X3:2](~[*:3])~[*:4]" matches (_all_improper_torsion_indices returns "[*:1]~[*:2](~[*:3])~[*:4]" matches) improper_def allows for choosing which atom will be the central atom in the permutations: smirnoff: central atom is listed first espaloma: central atom is listed second Addtionally, for smirnoff, only take the subset of atoms that corresponds to the ccw traversal of connected atoms. Notes ----- Motivation: offmol.impropers returns a large number of impropers, and we may wish to restrict this number. May update this filter definition based on discussion in https://github.com/openff.toolkit/openff.toolkit/issues/746 """ ## Find all atoms bound to exactly 3 other atoms if improper_def == 'espaloma': ## This finds all orderings, which is what we want for the espaloma case ## but not for smirnoff improper_smarts = '[*:1]~[X3:2](~[*:3])~[*:4]' mol_idxs = offmol.chemical_environment_matches(improper_smarts) return np.array(mol_idxs) elif improper_def == 'smirnoff': improper_smarts = '[*:2]~[X3:1](~[*:3])~[*:4]' ## For smirnoff ordering, we only want to find the unique combinations ## of atoms forming impropers so we can permute them the way we want mol_idxs = offmol.chemical_environment_matches(improper_smarts, unique=True) ## Get all ccw orderings # feels like there should be some good way to do this with itertools... idx_permuts = [] for c, *other_atoms in mol_idxs: for i in range(3): idx = [c] for j in range(3): idx.append(other_atoms[(i + j) % 3]) idx_permuts.append(tuple(idx)) return np.array(idx_permuts) else: raise ValueError(f'Unknown value for improper_def: {improper_def}')
def _tag_double_torsions( self, molecule: Molecule, symmetry_classes: List[int], toolkit_registry: ToolkitRegistry, ) -> None: """ For each double torsion in the list try and tag the combination in the molecule. """ indexer: TorsionIndexer = molecule.properties["dihedrals"] for double_torsion in self.double_torsion_scans: matches1 = molecule.chemical_environment_matches( query=double_torsion.smarts1, toolkit_registry=toolkit_registry) matches2 = molecule.chemical_environment_matches( query=double_torsion.smarts2, toolkit_registry=toolkit_registry) unique_torsions1 = self._get_unique_torsions( matches=matches1, symmetry_classes=symmetry_classes) unique_torsions2 = self._get_unique_torsions( matches=matches2, symmetry_classes=symmetry_classes) for tagged_torsion1 in unique_torsions1: symmetry_group1 = get_symmetry_group( atom_group=tagged_torsion1[1:3], symmetry_classes=symmetry_classes) for tagged_torsion2 in unique_torsions2: symmetry_group2 = get_symmetry_group( atom_group=tagged_torsion2[1:3], symmetry_classes=symmetry_classes, ) indexer.add_double_torsion( torsion1=tagged_torsion1, torsion2=tagged_torsion2, symmetry_group1=symmetry_group1, symmetry_group2=symmetry_group2, scan_range1=double_torsion.scan_range1, scan_range2=double_torsion.scan_range2, scan_increment=double_torsion.scan_increment, )
def find_ring_systems(molecule: Molecule) -> Dict[int, int]: """This function attempts to find all ring systems (see [1] for more details) in a given molecule. The method first attempts to determine which atoms and bonds are part of rings by matching the `[*:1]@[*:2]` SMIRKS pattern against the molecule. The matched bonds are then used to construct a graph (using ``networkx``), from which the ring systems are identified as those sets of atoms which are 'connected' together (using ``connected_components``) by at least one path. Parameters ---------- molecule: The molecule to search for ring systems. Notes ----- * Two molecular rings with only one common atom (i.e. spiro compounds) are considered to be part of the same ring system. References ---------- [1] `Ring Perception <https://docs.eyesopen.com/toolkits/python/oechemtk/ring.html>`_ Returns ------- The index of which ring system each atom belongs to. Only ring atoms are included in the returned dictionary. """ # Find the ring atoms ring_atom_index_pairs = { tuple(sorted(pair)) for pair in molecule.chemical_environment_matches("[*:1]@[*:2]") } # Construct a networkx graph from the found ring bonds. graph = networkx.Graph() for atom_index_pair in ring_atom_index_pairs: graph.add_edge(*atom_index_pair) ring_systems = {} for i, ring_system in enumerate(networkx.connected_components(graph)): for atom_index in ring_system: assert atom_index not in ring_systems ring_systems[atom_index] = i + 1 return ring_systems
def _find_functional_groups( cls, molecule: Molecule, functional_groups: Dict[str, str] ) -> FunctionalGroups: """Find the atoms and bonds involved in the functional groups specified by ``functional_groups``. Parameters ---------- molecule The molecule to search for function groups. functional_groups A dictionary of SMARTS of functional groups that should not be fragmented indexed by a friendly string label, e.g. 'alcohol: [#6:1]-[#8H1X2:2]' Returns ------- The atoms and bonds in the found function groups stored in a dictionary indexed by a unique key associated with each functional group. """ found_groups = {} for functional_group, smarts in functional_groups.items(): unique_matches = { tuple(sorted(match)) for match in molecule.chemical_environment_matches(smarts) } for i, match in enumerate(unique_matches): atoms = set(get_map_index(molecule, index) for index in match) bonds = set( ( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ) for bond in molecule.bonds if bond.atom1_index in match and bond.atom2_index in match ) found_groups[f"{functional_group}_{i}"] = (atoms, bonds) return found_groups
def _detect_linear_torsions(self, molecule: off.Molecule) -> List: """ Try and find any linear bonds in the molecule with torsions that should not be driven. Parameters: molecule: An openforcefield molecule instance Returns: A list of the central bond tuples in the molecule which should not be driven, this can then be compared against the torsions which have been selected. """ # this is based on the past submissions to QCarchive which have failed # highlight the central bond of a linear torsion linear_smarts = "[*!D1:1]~[$(*#*)&D2,$(C=*)&D2:2]" matches = molecule.chemical_environment_matches(linear_smarts) return matches
def _find_ortho_substituents( cls, parent: Molecule, bonds: Set[BondTuple] ) -> AtomAndBondSet: """Find ring substituents that are ortho to one of the rotatable bonds specified in a list of bonds. Parameters ---------- parent The parent molecule being fragmented. bonds The map indices of the rotatable bonds. Returns ------- The set of map indices of atoms in ortho group and of bond tuples in ortho group. """ matched_atoms = set() matched_bonds = set() for match in parent.chemical_environment_matches( "[!#1:1]~&!@[*:2]@[*:3]~&!@[!#1*:4]" ): map_tuple = tuple(get_map_index(parent, i) for i in match) if map_tuple[:2] not in bonds and map_tuple[:2][::-1] not in bonds: continue matched_atoms.update(map_tuple[::3]) matched_bonds.update((map_tuple[i], map_tuple[i + 1]) for i in [0, 2]) # Ensure the matched bonds doesn't include duplicates. matched_bonds = {tuple(sorted(bond)) for bond in matched_bonds} return matched_atoms, matched_bonds
def find_rotatable_bonds( cls, molecule: Molecule, target_bond_smarts: Optional[List[str]] ) -> List[BondTuple]: """Finds the rotatable bonds in a molecule *including* rotatable double bonds. Parameters ---------- molecule The molecule to search for rotatable bonds. target_bond_smarts An optional list of SMARTS patterns that should be used to identify the bonds within the parent molecule to grow fragments around. Each SMARTS pattern should include **two** indexed atoms that correspond to the two atoms involved in the central bond. If no pattern is provided fragments will be constructed around all 'rotatable bonds'. A 'rotatable bond' here means any bond matched by a `[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of both atoms in the bond must be >= 2. Returns ------- A list of the **map** indices of the atoms that form the rotatable bonds, ``[(m1, m2),...]``. """ if target_bond_smarts is None: matches = molecule.chemical_environment_matches( "[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]" ) else: matches = [ match for smarts in target_bond_smarts for match in molecule.chemical_environment_matches(smarts) ] if not all(len(match) == 2 for match in matches): raise ValueError( f"The `target_bond_smarts` pattern ({target_bond_smarts}) " f"must define exactly two indexed atoms to match." ) unique_matches = {tuple(sorted(match)) for match in matches} if target_bond_smarts is None: # Drop bonds without a heavy degree of at least 2 on each end to avoid # finding terminal bonds def heavy_degree(atom_index: int) -> int: atom = molecule.atoms[atom_index] return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1) unique_matches = { match for match in unique_matches if all(heavy_degree(i) > 1 for i in match) } return [ ( get_map_index(molecule, match[0]), get_map_index(molecule, match[1]), ) for match in unique_matches ]