def test_get_ring_and_fgroup_ortho(input_smiles, bond_smarts, expected_pattern): """Ensure that FGs and rings attached to ortho groups are correctly detected. The expected values were generated using fragmenter=0.0.7 """ molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule( smiles_to_molecule(input_smiles, True), default_functional_groups(), False) bond = tuple( get_map_index(molecule, i) for i in molecule.chemical_environment_matches(bond_smarts)[0]) # noinspection PyTypeChecker atoms, bonds = Fragmenter._get_torsion_quartet(molecule, bond) atoms, bonds = Fragmenter._get_ring_and_fgroups(molecule, functional_groups, ring_systems, atoms, bonds) actual_atoms = { map_index for map_index in atoms if molecule.atoms[get_atom_index(molecule, map_index)].atomic_number != 1 } expected_atoms = { get_map_index(molecule, atom_index) for match in molecule.chemical_environment_matches(expected_pattern) for atom_index in match } assert actual_atoms == expected_atoms
def test_cap_open_valance(): molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule( smiles_to_molecule("CNCCc1ccccc1", True), default_functional_groups(), False) expected_atom = get_map_index( molecule, molecule.chemical_environment_matches("[#7]-[#6H3:1]")[0][0], ) # noinspection PyTypeChecker atoms, bonds = Fragmenter._get_torsion_quartet( molecule, tuple( get_map_index(molecule, i) for i in molecule.chemical_environment_matches("[#6a:1]-[#6H2:2]")[0]), ) atoms, bonds = Fragmenter._get_ring_and_fgroups(molecule, functional_groups, ring_systems, atoms, bonds) # Remove the cap atom from the current list to make sure it gets included during # capping. atoms -= {expected_atom} atoms, _ = Fragmenter._cap_open_valence(molecule, functional_groups, atoms, bonds) # Check that carbon bonded to N was added assert expected_atom in atoms
def test_add_substituent(): result = WBOFragmenter().fragment(Molecule.from_smiles("CCCCCC")) fragment = result.fragments_by_bond[(3, 5)].molecule assert fragment.to_smiles(mapped=False, explicit_hydrogens=False) == "CCCCC" atoms = set( get_map_index(fragment, i) for i in range(fragment.n_atoms) if fragment.atoms[i].atomic_number != 1) bonds = set( ( get_map_index(fragment, bond.atom1_index), get_map_index(fragment, bond.atom2_index), ) for bond in fragment.bonds if bond.atom1.atomic_number != 1 and bond.atom2.atomic_number != 1) fragment, _ = WBOFragmenter._add_next_substituent(result.parent_molecule, {}, {}, {}, atoms, bonds, target_bond=(3, 5)) assert fragment.to_smiles(mapped=False, explicit_hydrogens=False) == "CCCCCC"
def _find_stereo(cls, molecule: Molecule) -> Stereochemistries: """Find chiral atoms and bonds, store the chirality. Notes ----- * This is needed to check if a fragment has flipped chirality. Currently this can happen and it is a bug. Parameters ---------- molecule The molecule to search for stereochemistry. Returns ------- The stereochemistry associated with atom and bond stereocenters """ atom_stereo = { get_map_index(molecule, atom.molecule_atom_index): atom.stereochemistry for atom in molecule.atoms if atom.stereochemistry is not None } bond_stereo = { ( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ): bond.stereochemistry for bond in molecule.bonds if bond.stereochemistry is not None } return {**atom_stereo, **bond_stereo}
def _find_non_rotor_ring_substituents( cls, molecule: Molecule, ring_system_atoms: Set[int] ) -> AtomAndBondSet: """Find the non-rotor substituents attached to a particular ring system. Parameters ---------- molecule The molecule to search for non-rotor ring substituents. ring_system_atoms The map indices of the atoms in the ring system of interest. Returns ------- The map indices of the atoms and bonds involved in any found functional groups. """ rotatable_bonds = molecule.find_rotatable_bonds() def heavy_degree(atom: Atom) -> int: return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1) rotor_bonds = [ bond for bond in rotatable_bonds if heavy_degree(bond.atom1) >= 2 and heavy_degree(bond.atom2) >= 2 ] non_rotor_atoms = set() non_rotor_bonds = set() for bond in molecule.bonds: # Check if the bond is a rotor. if bond in rotor_bonds: continue if bond.atom1.atomic_number == 1 or bond.atom2.atomic_number == 1: continue map_index_1 = get_map_index(molecule, bond.atom1_index) map_index_2 = get_map_index(molecule, bond.atom2_index) in_system_1 = map_index_1 in ring_system_atoms in_system_2 = map_index_2 in ring_system_atoms if (in_system_1 and in_system_2) or (not in_system_1 and not in_system_2): continue non_rotor_atoms.update((map_index_1, map_index_2)) non_rotor_bonds.add((map_index_1, map_index_2)) return non_rotor_atoms, non_rotor_bonds
def _get_torsion_quartet( cls, molecule: Molecule, bond: BondTuple ) -> AtomAndBondSet: """Get all atoms bonded to the torsion quartet around rotatable bond Parameters ---------- molecule The molecule containing the rotatable bond. bond map indices of atoms in bond Returns ------- The map indices of atoms in quartet and the bonds in quartet. """ atom_map_indices = {*bond} bond_map_indices = {bond} atoms = [ molecule.atoms[i] for i, j in molecule.properties["atom_map"].items() if j in bond ] for atom in atoms: map_index = get_map_index(molecule, atom.molecule_atom_index) for neighbor in atom.bonded_atoms: neighbour_map_index = get_map_index( molecule, neighbor.molecule_atom_index ) atom_map_indices.add(neighbour_map_index) bond_map_indices.add((map_index, neighbour_map_index)) for next_neighbour in neighbor.bonded_atoms: next_neighbour_map_index = get_map_index( molecule, next_neighbour.molecule_atom_index ) atom_map_indices.add(next_neighbour_map_index) bond_map_indices.add( (neighbour_map_index, next_neighbour_map_index) ) return atom_map_indices, bond_map_indices
def test_get_map_index_error(raise_error, expected_raises): molecule = Molecule.from_smiles("C") with expected_raises: map_index = get_map_index(molecule, 0, error_on_missing=raise_error) == 5 assert map_index == 0
def test_get_torsion_quartet(input_smiles, expected_n_atoms, expected_n_bonds): molecule = smiles_to_molecule(input_smiles, True) bond_match = molecule.chemical_environment_matches("C[C:1][C:2]CCCC")[0] atoms, bonds = Fragmenter._get_torsion_quartet( molecule, ( get_map_index(molecule, bond_match[0]), get_map_index(molecule, bond_match[1]), ), ) # This also includes explicit hydrogen assert len(atoms) == expected_n_atoms assert len(bonds) == expected_n_bonds
def test_atom_bond_set_to_mol(abemaciclib): molecule = smiles_to_molecule(abemaciclib.to_smiles(mapped=False), True) atoms = { get_map_index(molecule, atom_index) for match in molecule.chemical_environment_matches( "[C:1][C:2][N:3]1[C:4][C:5][N:6][C:7][C:8]1") for atom_index in match } bonds = {( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ) for bond in molecule.bonds if get_map_index(molecule, bond.atom1_index) in atoms and get_map_index(molecule, bond.atom2_index) in atoms} fragment, _ = Fragmenter._atom_bond_set_to_mol(molecule, {}, atoms=atoms, bonds=bonds) for bond in fragment.bonds: if bond.atom1.atomic_number == 1 or bond.atom2.atomic_number == 1: continue map_index_1 = get_map_index(fragment, bond.atom1_index) map_index_2 = get_map_index(fragment, bond.atom2_index) assert tuple(sorted((map_index_1, map_index_2))) in bonds
def _find_functional_groups( cls, molecule: Molecule, functional_groups: Dict[str, str] ) -> FunctionalGroups: """Find the atoms and bonds involved in the functional groups specified by ``functional_groups``. Parameters ---------- molecule The molecule to search for function groups. functional_groups A dictionary of SMARTS of functional groups that should not be fragmented indexed by a friendly string label, e.g. 'alcohol: [#6:1]-[#8H1X2:2]' Returns ------- The atoms and bonds in the found function groups stored in a dictionary indexed by a unique key associated with each functional group. """ found_groups = {} for functional_group, smarts in functional_groups.items(): unique_matches = { tuple(sorted(match)) for match in molecule.chemical_environment_matches(smarts) } for i, match in enumerate(unique_matches): atoms = set(get_map_index(molecule, index) for index in match) bonds = set( ( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ) for bond in molecule.bonds if bond.atom1_index in match and bond.atom2_index in match ) found_groups[f"{functional_group}_{i}"] = (atoms, bonds) return found_groups
def _select_neighbour_by_wbo( cls, molecule: Molecule, atoms: Set[int] ) -> Optional[Tuple[int, BondTuple]]: """A function which return those atoms which neighbour those in the ``atoms`` list sorted by the WBO of the bond between the input atom and the neighbouring atom from largest to smallest. Parameters ---------- molecule The original molecule being fragmented. atoms The map indices of the atoms currently in the fragment. Returns ------- The indices of the atoms to be added to the fragment sorted into the order that they should be added in. """ map_bond_orders = { ( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ): bond.fractional_bond_order for bond in molecule.bonds if bond.atom1.atomic_number != 1 and bond.atom2.atomic_number != 1 } neighbour_bond_orders = { (bond_order, (map_tuple[1 - i], map_tuple)) for i in range(2) for map_tuple, bond_order in map_bond_orders.items() if map_tuple[i] in atoms and map_tuple[1 - i] not in atoms } sorted_atoms = [ atom_to_add for _, atom_to_add in sorted( neighbour_bond_orders, key=lambda x: x[0], reverse=True ) ] return None if len(sorted_atoms) == 0 else sorted_atoms[0]
def test_get_rotor_wbo(): molecule = smiles_to_molecule("CCCC", True) for bond in molecule.bonds: bond.fractional_bond_order = 0.986 expected_bonds = { ( get_map_index(molecule, match[0]), get_map_index(molecule, match[1]), ) for match in molecule.chemical_environment_matches("[#6:1]-[#6:2]") } rotors_wbo = WBOFragmenter._get_rotor_wbo( molecule, WBOFragmenter.find_rotatable_bonds(molecule, None)) assert len(rotors_wbo) == 1 rotor_index = next(iter(rotors_wbo)) assert rotor_index in expected_bonds assert numpy.isclose(rotors_wbo[rotor_index], 0.986, atol=0.001)
def _find_ortho_substituents( cls, parent: Molecule, bonds: Set[BondTuple] ) -> AtomAndBondSet: """Find ring substituents that are ortho to one of the rotatable bonds specified in a list of bonds. Parameters ---------- parent The parent molecule being fragmented. bonds The map indices of the rotatable bonds. Returns ------- The set of map indices of atoms in ortho group and of bond tuples in ortho group. """ matched_atoms = set() matched_bonds = set() for match in parent.chemical_environment_matches( "[!#1:1]~&!@[*:2]@[*:3]~&!@[!#1*:4]" ): map_tuple = tuple(get_map_index(parent, i) for i in match) if map_tuple[:2] not in bonds and map_tuple[:2][::-1] not in bonds: continue matched_atoms.update(map_tuple[::3]) matched_bonds.update((map_tuple[i], map_tuple[i + 1]) for i in [0, 2]) # Ensure the matched bonds doesn't include duplicates. matched_bonds = {tuple(sorted(bond)) for bond in matched_bonds} return matched_atoms, matched_bonds
def _oe_render_parent( parent: Molecule, rotor_bonds: Optional[Collection[BondTuple]] = None, image_width: int = 572, image_height: int = 198, ) -> str: from openeye import oedepict rotor_bonds = [] if rotor_bonds is None else rotor_bonds # Map the OpenFF molecules into OE ones, making sure to explicitly set the atom # map on the OE object as this is not handled by the OpenFF toolkit. oe_parent = parent.to_openeye() for atom in oe_parent.GetAtoms(): atom.SetMapIdx(get_map_index(parent, atom.GetIdx(), False)) oedepict.OEPrepareDepiction(oe_parent) # Set-up common display options. image = oedepict.OEImage(image_width, image_height) display_options = oedepict.OE2DMolDisplayOptions( image_width, image_height, oedepict.OEScale_AutoScale) display_options.SetTitleLocation(oedepict.OETitleLocation_Hidden) display_options.SetAtomColorStyle( oedepict.OEAtomColorStyle_WhiteMonochrome) display_options.SetAtomLabelFontScale(1.2) display_options.SetBondPropertyFunctor(_oe_wbo_label_display(rotor_bonds)) display = oedepict.OE2DMolDisplay(oe_parent, display_options) oedepict.OERenderMolecule(image, display) svg_contents = oedepict.OEWriteImageToString("svg", image) return svg_contents.decode()
def test_get_ring_and_fgroup(input_smiles, bond_smarts, expected): molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule( smiles_to_molecule(input_smiles, True), default_functional_groups(), False) # noinspection PyTypeChecker atoms, bonds = Fragmenter._get_torsion_quartet( molecule, tuple( get_map_index(molecule, i) for i in molecule.chemical_environment_matches(bond_smarts)[0]), ) bonds = {tuple(sorted(bond)) for bond in bonds} l_atoms = len(atoms) l_bonds = len(bonds) atoms_2, bonds_2 = Fragmenter._get_ring_and_fgroups( molecule, functional_groups, ring_systems, atoms, bonds) assert (l_atoms == len(atoms_2)) == expected assert (l_bonds == len(bonds_2)) == expected
def _check_stereo( cls, fragment: Molecule, parent_stereo: Stereochemistries ) -> bool: """Checks if the stereochemistry of a fragment is different to the stereochemistry of the parent. Parameters ---------- fragment The fragment whose stereo should be compared to the parent. parent_stereo The stereochemistry of the parent molecule. Returns ------- Whether the fragment has the same stereochemistry as the parent. """ atom_stereocenters, bond_stereocenters = find_stereocenters(fragment) # Check for new / flipped chiral centers. for atom_index in atom_stereocenters: map_index = get_map_index(fragment, atom_index) if map_index not in parent_stereo: logger.warning(f"A new stereocenter formed at atom {map_index}") return False fragment_stereo = fragment.atoms[atom_index].stereochemistry if fragment_stereo != parent_stereo[map_index]: logger.warning( f"Stereochemistry for atom {map_index} flipped from " f"{parent_stereo[map_index]} to {fragment_stereo}" ) return False for index_tuple in bond_stereocenters: map_tuple = tuple(get_map_index(fragment, i) for i in index_tuple) map_tuple = ( map_tuple if map_tuple in parent_stereo else tuple(reversed(map_tuple)) ) if map_tuple not in parent_stereo: logger.warning(f"A new chiral bond formed at bond {map_tuple}") return False fragment_stereo = fragment.get_bond_between(*index_tuple).stereochemistry if fragment_stereo != parent_stereo[map_tuple]: logger.warning( f"Stereochemistry for bond {map_tuple} flipped from " f"{parent_stereo[map_tuple]} to {fragment_stereo}" ) return False return True
def find_rotatable_bonds( cls, molecule: Molecule, target_bond_smarts: Optional[List[str]] ) -> List[BondTuple]: """Finds the rotatable bonds in a molecule *including* rotatable double bonds. Parameters ---------- molecule The molecule to search for rotatable bonds. target_bond_smarts An optional list of SMARTS patterns that should be used to identify the bonds within the parent molecule to grow fragments around. Each SMARTS pattern should include **two** indexed atoms that correspond to the two atoms involved in the central bond. If no pattern is provided fragments will be constructed around all 'rotatable bonds'. A 'rotatable bond' here means any bond matched by a `[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of both atoms in the bond must be >= 2. Returns ------- A list of the **map** indices of the atoms that form the rotatable bonds, ``[(m1, m2),...]``. """ if target_bond_smarts is None: matches = molecule.chemical_environment_matches( "[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]" ) else: matches = [ match for smarts in target_bond_smarts for match in molecule.chemical_environment_matches(smarts) ] if not all(len(match) == 2 for match in matches): raise ValueError( f"The `target_bond_smarts` pattern ({target_bond_smarts}) " f"must define exactly two indexed atoms to match." ) unique_matches = {tuple(sorted(match)) for match in matches} if target_bond_smarts is None: # Drop bonds without a heavy degree of at least 2 on each end to avoid # finding terminal bonds def heavy_degree(atom_index: int) -> int: atom = molecule.atoms[atom_index] return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1) unique_matches = { match for match in unique_matches if all(heavy_degree(i) > 1 for i in match) } return [ ( get_map_index(molecule, match[0]), get_map_index(molecule, match[1]), ) for match in unique_matches ]
def _rd_render_fragment( parent: Molecule, fragment: Molecule, bond_indices: BondTuple, image_width: int = 283, image_height: int = 169, ) -> str: from rdkit import Chem from rdkit.Chem import Draw from rdkit.Chem.rdDepictor import Compute2DCoords rd_parent: Chem.Mol = parent.to_rdkit() for atom in rd_parent.GetAtoms(): atom.SetAtomMapNum(get_map_index(parent, atom.GetIdx(), False)) rd_parent = Chem.RemoveHs(rd_parent) Compute2DCoords(rd_parent) map_indices = {*fragment.properties["atom_map"].values()} - {0} fragment_atom_indices = [ atom.GetIdx() for atom in rd_parent.GetAtoms() if atom.GetAtomMapNum() in map_indices ] fragment_bond_indices = [ bond.GetIdx() for bond in rd_parent.GetBonds() if bond.GetBeginAtom().GetAtomMapNum() in map_indices and bond.GetEndAtom().GetAtomMapNum() in map_indices ] rotatable_bond_index = [ bond.GetIdx() for bond in rd_parent.GetBonds() if bond.GetBeginAtom().GetAtomMapNum() in bond_indices and bond.GetEndAtom().GetAtomMapNum() in bond_indices ] for atom in rd_parent.GetAtoms(): atom.SetAtomMapNum(0) drawer = Draw.MolDraw2DSVG(image_width, image_height) draw_options = drawer.drawOptions() draw_options.useBWAtomPalette() drawer.DrawMolecule( rd_parent, highlightAtoms=fragment_atom_indices, highlightAtomColors={ index: (52.0 / 255.0, 143.0 / 255.0, 235.0 / 255.0) for index in fragment_atom_indices }, highlightBonds=fragment_bond_indices + rotatable_bond_index, highlightBondColors={ index: (239.0 / 255.0, 134.0 / 255.0, 131.0 / 255.0) if index in rotatable_bond_index else (52.0 / 255.0, 143.0 / 255.0, 235.0 / 255.0) for index in fragment_bond_indices + rotatable_bond_index }, ) drawer.FinishDrawing() svg_contents = drawer.GetDrawingText() return svg_contents
def _oe_render_fragment( parent: Molecule, fragment: Molecule, bond_indices: BondTuple, image_width: int = 283, image_height: int = 169, ) -> str: from openeye import oechem, oedepict # Map the OpenFF molecules into OE ones, making sure to explicitly set the atom # map on the OE object as this is not handled by the OpenFF toolkit. oe_parent = parent.to_openeye() for atom in oe_parent.GetAtoms(): atom.SetMapIdx(get_map_index(parent, atom.GetIdx(), False)) oedepict.OEPrepareDepiction(oe_parent) oe_fragment = fragment.to_openeye() for atom in oe_fragment.GetAtoms(): atom.SetMapIdx(get_map_index(fragment, atom.GetIdx(), False)) oe_parent_bond = oe_parent.GetBond( oe_parent.GetAtom(oechem.OEHasMapIdx(bond_indices[0])), oe_parent.GetAtom(oechem.OEHasMapIdx(bond_indices[1])), ) # Set-up common display options. image = oedepict.OEImage(image_width, image_height) display_options = oedepict.OE2DMolDisplayOptions( image_width, image_height, oedepict.OEScale_AutoScale) display_options.SetTitleLocation(oedepict.OETitleLocation_Hidden) display_options.SetAtomColorStyle( oedepict.OEAtomColorStyle_WhiteMonochrome) display_options.SetAtomLabelFontScale(1.2) # display_options.SetBondPropertyFunctor(_oe_wbo_label_display({bond_indices})) display = oedepict.OE2DMolDisplay(oe_parent, display_options) fragment_atom_predicate, fragment_bond_predicate = _oe_fragment_predicates( {atom.GetMapIdx() for atom in oe_fragment.GetAtoms()}) not_fragment_atoms = oechem.OENotAtom(fragment_atom_predicate) not_fragment_bonds = oechem.OENotBond(fragment_bond_predicate) oedepict.OEAddHighlighting( display, oedepict.OEHighlightByColor(oechem.OEGrey, 0.75), not_fragment_atoms, not_fragment_bonds, ) rotatable_bond = oechem.OEAtomBondSet() rotatable_bond.AddBond(oe_parent_bond) rotatable_bond.AddAtom(oe_parent_bond.GetBgn()) rotatable_bond.AddAtom(oe_parent_bond.GetEnd()) oedepict.OEAddHighlighting( display, oechem.OEColor(oechem.OELimeGreen), oedepict.OEHighlightStyle_BallAndStick, rotatable_bond, ) oedepict.OERenderMolecule(image, display) svg_contents = oedepict.OEWriteImageToString("svg", image) return svg_contents.decode()
def _select_neighbour_by_path_length( cls, molecule: Molecule, atoms: Set[int], target_bond: BondTuple ) -> Optional[Tuple[int, BondTuple]]: atom_indices = {get_atom_index(molecule, atom) for atom in atoms} atoms_to_add = [ (atom_index, neighbour.molecule_atom_index) for atom_index in atom_indices for neighbour in molecule.atoms[atom_index].bonded_atoms if neighbour.atomic_number != 1 and neighbour.molecule_atom_index not in atom_indices ] map_atoms_to_add = [ ( get_map_index(molecule, j), (get_map_index(molecule, i), get_map_index(molecule, j)), ) for i, j in atoms_to_add ] # Compute the distance from each neighbouring atom to each of the atoms in the # target bond. nx_molecule = molecule.to_networkx() target_indices = [get_atom_index(molecule, atom) for atom in target_bond] path_lengths_1, path_lengths_2 = zip( *( ( networkx.shortest_path_length( nx_molecule, target_index, neighbour_index ) for target_index in target_indices ) for atom_index, neighbour_index in atoms_to_add ) ) if len(path_lengths_1) == 0 and len(path_lengths_2) == 0: return None reverse = False min_path_length_1 = min(path_lengths_1) min_path_length_2 = min(path_lengths_2) if min_path_length_1 < min_path_length_2: sort_by = path_lengths_1 elif min_path_length_2 < min_path_length_1: sort_by = path_lengths_2 else: # If there are multiple neighbouring atoms the same path length away # from the target bond fall back to sorting by the WBO. map_atoms_to_add = [ map_tuple for map_tuple, *path_length_tuple in zip( map_atoms_to_add, path_lengths_1, path_lengths_2 ) if min_path_length_1 in path_length_tuple ] sort_by = [ molecule.get_bond_between( get_atom_index(molecule, neighbour_bond[0]), get_atom_index(molecule, neighbour_bond[1]), ).fractional_bond_order for _, neighbour_bond in map_atoms_to_add ] reverse = True sorted_atoms = [ a for _, a in sorted(zip(sort_by, map_atoms_to_add), reverse=reverse) ] return None if len(sorted_atoms) == 0 else sorted_atoms[0]
def test_get_map_index(): molecule = Molecule.from_smiles("[C:5]([H:1])([H:2])([H:3])([H:4])") assert get_map_index(molecule, 0) == 5
def _cap_open_valence( cls, parent: Molecule, parent_groups: FunctionalGroups, atoms: Set[int], bonds: Set[BondTuple], ) -> AtomAndBondSet: """Cap with methyl for fragments that ends with N, O or S. Otherwise cap with H Parameters ---------- parent The molecule being fragmented. parent_groups A dictionary of the functional groups on the molecule which should not be fragmented. atoms The map indices of the atoms in the fragment being constructed. bonds The map indices of the bonds in the fragment being constructed. """ map_index_to_functional_group = { map_index: functional_group for functional_group in parent_groups for map_index in parent_groups[functional_group][0] } atoms_to_add = set() bonds_to_add = set() for map_index in atoms: atom_index = get_atom_index(parent, map_index) atom = parent.atoms[atom_index] if ( atom.atomic_number not in (7, 8, 16) and map_index not in map_index_to_functional_group ): continue # If atom is N, O or S, it needs to be capped should_cap = False for neighbour in atom.bonded_atoms: neighbour_map_index = get_map_index( parent, neighbour.molecule_atom_index ) if neighbour.atomic_number == 1 or neighbour_map_index in atoms: continue should_cap = True break if not should_cap: continue for neighbour in atom.bonded_atoms: if neighbour.atomic_number != 6: continue neighbour_map_index = get_map_index( parent, neighbour.molecule_atom_index ) atoms_to_add.add(neighbour_map_index) bonds_to_add.add((map_index, neighbour_map_index)) atoms.update(atoms_to_add) bonds.update(bonds_to_add) return atoms, bonds
def _extract_rd_fragment( molecule: Molecule, atom_indices: Set[int], bond_indices: Set[Tuple[int, int]] ) -> Molecule: from rdkit import Chem rd_molecule = Chem.RWMol(molecule.to_rdkit()) rd_atoms_by_map: Dict[int, Chem.Atom] = {} # Restore the map indices as to_rdkit does not automatically add them. for atom in rd_molecule.GetAtoms(): atom.SetAtomMapNum(get_map_index(molecule, atom.GetIdx())) rd_atoms_by_map[atom.GetAtomMapNum()] = atom atoms_to_use = [get_atom_index(molecule, i) for i in atom_indices] bonds_to_use = [ rd_molecule.GetBondBetweenAtoms( get_atom_index(molecule, pair[0]), get_atom_index(molecule, pair[1]) ).GetIdx() for pair in bond_indices ] # Make sure to include any Hs bonded to the included atom set otherwise radicals # will form. for map_index in atom_indices: for neighbour in rd_atoms_by_map[map_index].GetNeighbors(): if ( neighbour.GetAtomicNum() != 1 or neighbour.GetAtomMapNum() < 1 or neighbour.GetAtomMapNum() in atom_indices ): continue atoms_to_use.append(neighbour.GetIdx()) bonds_to_use.append( rd_molecule.GetBondBetweenAtoms( rd_atoms_by_map[map_index].GetIdx(), neighbour.GetIdx() ).GetIdx() ) # Add additional hydrogens to atoms where the total valence will change likewise to # ensure the valence does not change. rd_atoms_by_index = {atom.GetIdx(): atom for atom in rd_molecule.GetAtoms()} for atom_index in [*atoms_to_use]: atom = rd_atoms_by_index[atom_index] old_valence = atom.GetTotalValence() new_valence = atom.GetTotalValence() for neighbour_bond in rd_atoms_by_index[atom_index].GetBonds(): if ( neighbour_bond.GetBeginAtomIdx() in atoms_to_use and neighbour_bond.GetEndAtomIdx() in atoms_to_use ): continue new_valence -= neighbour_bond.GetValenceContrib(atom) if numpy.isclose(old_valence, new_valence): # Skip the cases where the valence won't change continue if ( atom.GetAtomicNum() == 6 and atom.GetIsAromatic() and sum( 1 for bond_tuple in bond_indices if atom.GetAtomMapNum() in bond_tuple ) == 1 ): # This is likely a cap carbon which was retained from an existing ring. It's # aromaticity needs to be cleared before calling ``MolFragmentToSmiles`` # otherwise will (understandably) be confused and throw an exception. atom.SetIsAromatic(False) # Add a hydrogen to the atom whose valence will change. for _ in range(int(numpy.rint(old_valence - new_valence))): new_atom = Chem.Atom(1) new_atom_index = rd_molecule.AddAtom(new_atom) rd_molecule.AddBond(atom_index, new_atom_index) new_bond = rd_molecule.GetBondBetweenAtoms(atom_index, new_atom_index) new_bond.SetBondType(Chem.BondType.SINGLE) new_bond.SetIsAromatic(False) atoms_to_use.append(new_atom_index) bonds_to_use.append(new_bond.GetIdx()) fragment_smiles = Chem.MolFragmentToSmiles(rd_molecule, atoms_to_use, bonds_to_use) fragment = Molecule.from_smiles(fragment_smiles, allow_undefined_stereo=True) return fragment
def _find_ring_systems( cls, molecule: Molecule, functional_groups: FunctionalGroups, keep_non_rotor_ring_substituents: bool = False, ) -> RingSystems: """This function finds all ring systems in a molecule. Parameters ---------- molecule The molecule to search for ring systems. functional_groups A dictionary of the functional groups on the molecule which should not be fragmented. keep_non_rotor_ring_substituents If True, keep all non rotatable ring substituents. According to the benchmark, it is not necessary. Returns ------- Any found ring systems. """ atom_to_ring_indices = find_ring_systems(molecule) # Find the map indices of the atoms involved in each ring system. ring_system_atoms = { ring_index: { get_map_index(molecule, i) for i in atom_to_ring_indices if atom_to_ring_indices[i] == ring_index } for ring_index in {*atom_to_ring_indices.values()} } # Find the map indices of the bonds involved in each ring system. ring_system_bonds = defaultdict(set) for bond in molecule.bonds: ring_index_1 = atom_to_ring_indices.get(bond.atom1_index, -1) ring_index_2 = atom_to_ring_indices.get(bond.atom2_index, -2) if ring_index_1 != ring_index_2: continue ring_system_bonds[ring_index_1].add( ( get_map_index(molecule, bond.atom1_index), get_map_index(molecule, bond.atom2_index), ) ) # Scan the neighbours of the ring system atoms for any functional groups # / non-rotor substituents which should be included in the ring systems. for ring_index in ring_system_atoms: # If any atoms are part of a functional group, include the other atoms in the # group in the ring system lists ring_functional_groups = { functional_group for map_index in ring_system_atoms[ring_index] for functional_group in functional_groups if map_index in functional_groups[functional_group][0] } ring_system_atoms[ring_index].update( map_index for functional_group in ring_functional_groups for map_index in functional_groups[functional_group][0] ) ring_system_bonds[ring_index].update( map_tuple for functional_group in ring_functional_groups for map_tuple in functional_groups[functional_group][1] ) if not keep_non_rotor_ring_substituents: continue non_rotor_atoms, non_rotor_bonds = cls._find_non_rotor_ring_substituents( molecule, ring_system_atoms[ring_index] ) ring_system_atoms[ring_index].update(non_rotor_atoms) ring_system_bonds[ring_index].update(non_rotor_bonds) ring_systems = { ring_index: ( ring_system_atoms[ring_index], ring_system_bonds[ring_index], ) for ring_index in ring_system_atoms } return ring_systems