def test_phenylene(self): # make carboxy and amide benzenes that overlap so that the end result is a phenylene where one ring is oxazine conjoined = Chem.MolFromSmiles('c3c1cccc2\C(=O)O/C(-N)c(c12)cc3') before = Chem.MolToSmiles( conjoined) # structure from wiki is not canonical AllChem.EmbedMolecule(conjoined) bonds = [ conjoined.GetBondBetweenAtoms(0, 1).GetIdx(), conjoined.GetBondBetweenAtoms(12, 11).GetIdx(), conjoined.GetBondBetweenAtoms(8, 9).GetIdx() ] fragged = Chem.FragmentOnBonds(conjoined, bonds, addDummies=False) fore = Chem.GetMolFrags(fragged, asMols=True, sanitizeFrags=False)[1] Chem.SanitizeMol(fore) bonds = [ conjoined.GetBondBetweenAtoms(2, 1).GetIdx(), conjoined.GetBondBetweenAtoms(12, 5).GetIdx(), conjoined.GetBondBetweenAtoms(8, 6).GetIdx() ] fragged = Chem.FragmentOnBonds(conjoined, bonds, addDummies=False) aft = Chem.GetMolFrags(fragged, asMols=True, sanitizeFrags=False)[0] Chem.SanitizeMol(aft) # merge them mol = Monster([fore, aft]).combine().positioned_mol after = Chem.MolToSmiles(mol) self.assertEqual(before, after)
def get_ring_ring_splits(input_mol, labels=False, do_comb_index=False): """ Get and break Atom-Atom pairs in two different rings. :param input_mol: :return: """ # TODO Fix for fused e.g.s RI = input_mol.GetRingInfo() rings = RI.AtomRings() out_mols = [] bonds = [item for sublist in RI.BondRings() for item in sublist] bs = [] for bond in input_mol.GetBonds(): if bond.GetIdx() in bonds: continue id_one = bond.GetBeginAtomIdx() id_two = bond.GetEndAtomIdx() # Now find all pairs that are in both for ring in rings: if id_one in ring: for ring_two in rings: if ring == ring_two: continue if id_two in ring_two: bs.append(bond) if bs: for b in bs: if labels: nm = Chem.FragmentOnBonds( input_mol, [b.GetIdx()], dummyLabels=[(b.GetBeginAtomIdx(), b.GetEndAtomIdx())], ) mols = [ x.replace("*", "Xe") for x in Chem.MolToSmiles( nm, isomericSmiles=True).split(".") ] elif do_comb_index: print(b) comb_index = get_comb_index(b.GetBeginAtomIdx(), b.GetEndAtomIdx()) nm = Chem.FragmentOnBonds(input_mol, [b.GetIdx()], dummyLabels=[(comb_index, comb_index) ]) mols = [ x.replace("*", "Xe") for x in Chem.MolToSmiles( nm, isomericSmiles=True).split(".") ] else: nm = Chem.FragmentOnBonds(input_mol, [b.GetIdx()], dummyLabels=[(1, 1)]) # Only takes first mols = [ x.replace("1*", "Xe") for x in Chem.MolToSmiles( nm, isomericSmiles=True).split(".") ] out_mols.append(mols) return out_mols
def detriangulate_tets(): ## test triangle mol = Chem.MolFromSmiles('C1CC1') AllChem.EmbedMolecule(mol) partA = Chem.GetMolFrags(Chem.FragmentOnBonds(mol, [0, 1], addDummies=False), asMols=True)[0] partB = Chem.FragmentOnBonds(mol, [2], addDummies=False)
def desquarify_test(): mol = Chem.MolFromSmiles('C1CCC1') AllChem.EmbedMolecule(mol) mol.SetProp('_Name', 'cyclobutane') partA = Chem.GetMolFrags(Chem.FragmentOnBonds(mol, [0, 2], addDummies=False), asMols=True)[0] partA.SetProp('_Name', 'small') partB = Chem.FragmentOnBonds(mol, [1, 3], addDummies=False) partB.SetProp('_Name', 'big')
def make_pair_by_split(self, conjoined: Chem.Mol, atom_idx: int) -> Tuple[Chem.Mol, Chem.Mol]: # make overlapping mols by getting a single molecule, and split it # this gives more control over Chem.rdMolAlign.AlignMol as this may overlap other atoms. # negative weights does not work... # fore bond = conjoined.GetBondBetweenAtoms(atom_idx, atom_idx + 1) fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()], addDummies=False) fore = Chem.GetMolFrags(fragged, asMols=True)[0] bond = conjoined.GetBondBetweenAtoms(atom_idx - 1, atom_idx) fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()], addDummies=False) aft = Chem.GetMolFrags(fragged, asMols=True)[1] return fore, aft
def map_atoms(reactant, product): """Returns new atom ordering in product that matches the reactant """ reac = copy.deepcopy(reactant) prod = copy.deepcopy(product) Chem.Kekulize(reac, clearAromaticFlags=True) Chem.Kekulize(prod, clearAromaticFlags=True) reac = change_mol(reac) prod = change_mol(prod) # Break Bond in reactant, in order to compare to product. smarts_bond = Chem.MolFromSmarts('[CX4;H0;R]-[CX4;H1;R]') atom_idx = list(reac.GetSubstructMatch(smarts_bond)) if len(atom_idx) != 0: bond = reac.GetBondBetweenAtoms(atom_idx[0], atom_idx[1]) broken_bond_reac = Chem.FragmentOnBonds(reac, [bond.GetIdx()], addDummies=False) # find new atom order for product prod_order = prod.GetSubstructMatch(broken_bond_reac) else: prod_order = prod.GetSubstructMatch(reac) return prod_order
def fragment_recursive(mol, frags): try: bonds = list(BRICS.FindBRICSBonds(mol)) if bonds == []: frags.append(mol) return frags idxs, labs = list(zip(*bonds)) bond_idxs = [] for a1, a2 in idxs: bond = mol.GetBondBetweenAtoms(a1, a2) bond_idxs.append(bond.GetIdx()) order = np.argsort(bond_idxs).tolist() bond_idxs = [bond_idxs[i] for i in order] broken = Chem.FragmentOnBonds(mol, bondIndices=[bond_idxs[0]], dummyLabels=[(0, 0)]) head, tail = Chem.GetMolFrags(broken, asMols=True) print(mol_to_smiles(head), mol_to_smiles(tail)) frags.append(head) fragment_recursive(tail, frags) except Exception: pass
def obtain_rings(smi): '''Obtain a list of all rings present in SMILE string smi Examples: >>> obtain_rings('CCC1=CC=CC=C1') ['c1ccccc1'] >>> obtain_rings('C1=CC=C(C=C1)C1=CC=CC=C1') ['c1ccccc1', 'c1ccccc1'] >>> obtain_rings('C1=CC2=C(C=C1)C=CC=C2') (None, None) Parameters: smi (string) : SMILE string of a molecule Returns (list) : List if all rings in a SMILE string ''' mol = Chem.MolFromSmiles(smi) rot = get_rot_bonds_posn(mol) # Get rotatble bond positions if len(rot) == 0: return None, None bond_idx = get_bond_indeces(mol, rot) new_mol = Chem.FragmentOnBonds(mol, bond_idx, addDummies=False) new_smile = Chem.MolToSmiles(new_mol) smile_split_list = new_smile.split(".") rings = [] for item in smile_split_list: if '1' in item: rings.append(item) return rings
def c2apbp(info_pds_smiles): ''' 拆解反应位点标1和4的分子,返回分子碎片(canonical) ''' mol = Chem.MolFromSmiles(info_pds_smiles) maped_atom_index = [] for atom in mol.GetAtoms(): if atom.HasProp('molAtomMapNumber'): if atom.GetProp('molAtomMapNumber') in ['1', '4']: maped_atom_index.append(atom.GetIdx()) bonds_id = [] maped_atom_index_copy = deepcopy(maped_atom_index) if len(maped_atom_index) == 1: return info_pds_smiles for x in maped_atom_index: for y in maped_atom_index_copy: if mol.GetBondBetweenAtoms(x, y) is not None: bonds_id.append(mol.GetBondBetweenAtoms(x, y).GetIdx()) set_bonds_id = list(set(bonds_id)) if len(set_bonds_id) == 0: return info_pds_smiles frags = Chem.FragmentOnBonds(mol, set_bonds_id) frags_smiles = Chem.MolToSmiles(frags, canonical=True) frags_smiles_sub = re.sub('\[([0-9]+)\*\]', '*', frags_smiles) if Chem.MolFromSmiles(frags_smiles_sub) is None: return Chem.MolToSmiles(Chem.MolFromSmarts(frags_smiles_sub), canonical=True) frags_end = Chem.MolToSmiles(Chem.MolFromSmiles(frags_smiles_sub), canonical=True) return frags_end
def cut_ring(mol): for i in range(10): if random.random() < 0.5: if not mol.HasSubstructMatch(Chem.MolFromSmarts("[R]@[R]@[R]@[R]")): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts("[R]@[R]@[R]@[R]"))) bis = ((bis[0], bis[1]), (bis[2], bis[3])) else: if not mol.HasSubstructMatch(Chem.MolFromSmarts("[R]@[R;!D2]@[R]")): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts("[R]@[R;!D2]@[R]"))) bis = ((bis[0], bis[1]), (bis[1], bis[2])) # print bis bs = [mol.GetBondBetweenAtoms(x, y).GetIdx() for x, y in bis] fragments_mol = Chem.FragmentOnBonds( mol, bs, addDummies=True, dummyLabels=[(1, 1), (1, 1)] ) try: fragments = Chem.GetMolFrags(fragments_mol, asMols=True) except: return None if len(fragments) == 2: return fragments return None
def frag_on_bonds(m, atom_ids): breaking_bonds = list() for bond in m.GetBonds(): start, end = bond.GetBeginAtom().GetIdx(), bond.GetEndAtom().GetIdx() if start in atom_ids and end not in atom_ids: breaking_bonds.append(bond.GetIdx()) if end in atom_ids and start not in atom_ids: breaking_bonds.append(bond.GetIdx()) my_copy = copy.deepcopy(m) return Chem.FragmentOnBonds(my_copy, breaking_bonds)
def break_on_bond(mol, bond, min_length=3): if mol.GetNumAtoms() - bond <= min_length: return [mol] broken = Chem.FragmentOnBonds(mol, bondIndices=[bond], dummyLabels=[(0, 0)]) res = Chem.GetMolFrags(broken, asMols=True, sanitizeFrags=False) return res
def remove_bonds(mol, list_of_atomiso_bondsets_to_remove): """ This function removes bond from an rdkit mol based on a provided list. This list is a list of sets, with each set containing two atoms with the isotope label of that atom. Using Isotopes is to ensure that atom Idx dont change. Inputs: :param rdkit.Chem.rdchem.Mol mol: any rdkit mol :param list list_of_atomiso_bondsets_to_remove: a list of idx values to remove from mol Returns: :returns: rdkit.Chem.rdchem.Mol new_mol: the rdkit mol as input but with the atoms from the list removed """ # None's often end up in a pipeline use of RDKit so we handle this data type as return None # instead of raise TypeError if mol is None: return None # If mol is wrong data type (excluding None) raise TypeError if type(mol) != rdkit.Chem.rdchem.Mol and type( mol) != rdkit.Chem.rdchem.RWMol: printout = "mol is the wrong data type. \n" printout = printout + "Input should be a rdkit.Chem.rdchem.Mol\n" printout = printout + "Input mol was {} type.".format(type(mol)) raise TypeError(printout) new_mol = copy.deepcopy(mol) if len(list_of_atomiso_bondsets_to_remove) == 0: return None for atomiso_bondsets in list_of_atomiso_bondsets_to_remove: if len(atomiso_bondsets) == 0: continue if len(atomiso_bondsets) != 2: printout = "list_of_atomiso_bondsets_to_remove needs to be 2 isolabels for the atoms" raise TypeError(printout) atom_1_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[0])) atom_2_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[1])) try: new_mol = Chem.FragmentOnBonds(new_mol, [atom_1_idx, atom_2_idx], addDummies=False) except: return None new_mol = MOH.check_sanitization(new_mol) if new_mol is None: return None new_mol = MOH.check_sanitization(new_mol) if new_mol is None: return None return new_mol
def GetFragments( smiles: str, mol: Chem.rdchem.Mol, neighbor_ids: list, atomic_nums: list, bond_id_matrix: list, bond_type_matrix: list, ) -> Tuple[list, list]: """Fragment the molecule with isolated carbons method, see Lian and Yalkowsky, JOURNAL OF PHARMACEUTICAL SCIENCES 103:2710-2723.""" # carbons cids = [i for (i, x) in enumerate(atomic_nums) if x == 6] # carbon neighbor ids cnids = NeighborIDs(neighbor_ids, atomic_nums, cids) # bond ids bids = [[bond_id_matrix[cid][cnid] for cnid in cnids] for (cid, cnids) in zip(cids, cnids)] # bond types bts = [[bond_type_matrix[cid][cnid] for cnid in cnids] for (cid, cnids) in zip(cids, cnids)] # broken bond ids bbids = FindBreakingBonds(cnids, bids, bts, atomic_nums) # break bonds, get fragments try: fmol = Chem.FragmentOnBonds( mol, UniqueElements(list(itertools.chain.from_iterable(bbids)))) except: fmol = mol logging.info("fragmentation exception: %s" % (smiles)) # draw fragments, debugging only, expensive # Draw.MolToFile(fmol,'fmol.png') # fragment atom ids faids = [list(x) for x in Chem.rdmolops.GetMolFrags(fmol)] # fragment smiles fsmiles = [ Chem.rdmolfiles.MolFragmentToSmiles(fmol, frag) for frag in faids ] # fragment smarts fsmarts = [ Chem.rdmolfiles.MolFragmentToSmarts(fmol, frag) for frag in faids ] return faids, fsmiles, fsmarts
def all_fragment_on_bond(mol, asMols=False, max_num_action=float("Inf"), break_aromatic=True): """Fragment all possible bond in a molecule and return the set of resulting fragments This is similar to `random_bond_cut`, but is not stochastic as it does not return a random fragment but all the fragments resulting from all potential bond break in the molecule. .. note:: This will always be a subset of all_bond_remove, the main difference being that all_bond_remove, allow decreasing bond count, while this one will always break a molecule into two. Args: mol: <Chem.Mol> input molecule asMols: bool, optional Whether to return results as mols or smiles max_num_action: float, optional Maximum number of action to reduce complexity break_aromatic: bool, optional Whether to attempt to break even aromatic bonds (Default: True) Returns: set of fragments """ mol.GetRingInfo().AtomRings() fragment_set = set([]) bonds = list(mol.GetBonds()) stop = False if bonds: if break_aromatic: Chem.Kekulize(mol, clearAromaticFlags=True) for bond in bonds: if stop: break if break_aromatic or not bond.GetIsAromatic(): truncate = Chem.FragmentOnBonds(mol, [bond.GetIdx()], addDummies=False) truncate = dm.sanitize_mol(truncate) if truncate is not None: for frag in rdmolops.GetMolFrags(truncate, asMols=True): frag = dm.sanitize_mol(frag) if frag: if not asMols: frag = dm.to_smiles(frag) fragment_set.add(frag) if len(fragment_set) > max_num_action: stop = True break return fragment_set
def break_bonds(mol_dict, num_bonds): frag_smiles = [] mol_frags = {} for key in mol_dict: mol = mol_dict[key] for bond in range(num_bonds): mol_frag = Chem.FragmentOnBonds(mol, [bond], addDummies=False) smiles = Chem.MolToSmiles(mol_frag, isomericSmiles=False) if smiles not in frag_smiles: frag_smiles.append(smiles) mol_frags[smiles] = mol_frag return frag_smiles, mol_frags
def cut(mol): if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) # single bond not in ring bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()] fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)]) try: fragments = Chem.GetMolFrags(fragments_mol,asMols=True) return fragments except: return None
def get_fragments(input_mol, iso_labels=True, get_index_iso_map=False): """ Find the frgments for a given molecule :param input_mol: :return: """ index_isotope_map = {} atom_indices = input_mol.GetSubstructMatches( Chem.MolFromSmarts(SMARTS_PATTERN)) if atom_indices and iso_labels: counter = 100 labels = [] bs = [] for bi in atom_indices: b = input_mol.GetBondBetweenAtoms(bi[0], bi[1]) if counter in index_isotope_map: index_isotope_map[counter].append(b.GetIdx()) else: index_isotope_map[counter] = [b.GetIdx()] labels.append((counter, counter)) bs.append(b.GetIdx()) counter += 1 input_mol = Chem.FragmentOnBonds(input_mol, bs, dummyLabels=labels) elif atom_indices: bs = [] labels = [] for bi in atom_indices: b = input_mol.GetBondBetweenAtoms(bi[0], bi[1]) bs.append(b.GetIdx()) comb_index = get_comb_index(bi[0], bi[1]) labels.append((comb_index, comb_index)) input_mol = Chem.FragmentOnBonds(input_mol, bs, dummyLabels=labels) return get_frag_list(str_find="*", input_mol=input_mol) if get_index_iso_map: return get_frag_list(str_find="*", input_mol=input_mol), index_isotope_map else: return get_frag_list(str_find="*", input_mol=input_mol)
def enumerate(self, mol, cuts): """ Enumerates all possible combination of slicings of a molecule given a number of cuts. :param mol: A mol object with the molecule to slice. :param cuts: The number of cuts to perform. :return : A list with all the possible (scaffold, decorations) pairs as SlicedMol objects. """ matches = self._get_matches(mol) sliced_mols = set() for atom_pairs_to_cut in itertools.combinations(matches, cuts): to_cut_bonds = list( sorted( mol.GetBondBetweenAtoms(aidx, oaidx).GetIdx() for aidx, oaidx in atom_pairs_to_cut)) attachment_point_idxs = [(i, i) for i in range(len(to_cut_bonds))] cut_mol = rkc.FragmentOnBonds(mol, bondIndices=to_cut_bonds, dummyLabels=attachment_point_idxs) for atom in cut_mol.GetAtoms(): if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN: num = atom.GetIsotope() atom.SetIsotope(0) atom.SetProp("molAtomMapNumber", str(num)) cut_mol.UpdatePropertyCache() fragments = rkc.GetMolFrags(cut_mol, asMols=True, sanitizeFrags=True) # detect whether there is one fragment with as many attachment points as cuts (scaffold) # the rest are decorations if cuts == 1: sliced_mols.add(SlicedMol(fragments[0], [fragments[1]])) sliced_mols.add(SlicedMol(fragments[1], [fragments[0]])) else: scaffold = None decorations = [] for frag in fragments: num_att = len([ atom for atom in frag.GetAtoms() if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN ]) if num_att == cuts and not scaffold: scaffold = frag else: decorations.append(frag) if scaffold: sliced_mols.add(SlicedMol(scaffold, decorations)) return list(filter(self._filter, sliced_mols))
def delete_bonds(mol, bonds, ftype, hac): """ Fragment molecule on bonds and reduce to fraggle fragmentation SMILES. If none exists, returns None """ # Replace the given bonds with attachment points (B1-B2 -> B1-[*].[*]-B2) bondIdx = [mol.GetBondBetweenAtoms(*bond).GetIdx() for bond in bonds] modifiedMol = Chem.FragmentOnBonds(mol, bondIdx, dummyLabels=[(0, 0)] * len(bondIdx)) # should be able to get away without sanitising mol as the valencies should be okay # do not do a full sanitization, but do find rings and calculate valences: Chem.SanitizeMol(modifiedMol, Chem.SanitizeFlags.SANITIZE_PROPERTIES | Chem.SanitizeFlags.SANITIZE_SYMMRINGS) fragments = Chem.GetMolFrags(modifiedMol, asMols=True, sanitizeFrags=False) return select_fragments(fragments, ftype, hac)
def spf(mol, split_id): bonds = mol.GetBonds() for i in range(len(bonds)): if okToBreak(bonds[i]): mol = Chem.FragmentOnBonds(mol, [i], addDummies=True, dummyLabels=[(0, 0)]) # Dummy atoms are always added last n_at = mol.GetNumAtoms() mol.GetAtomWithIdx(n_at - 1).SetAtomicNum(split_id) mol.GetAtomWithIdx(n_at - 2).SetAtomicNum(split_id) return Chem.rdmolops.GetMolFrags(mol, asMols=True) # If the molecule could not been split, return original molecule return [mol]
def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol: for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] f = Chem.FragmentOnBonds(fragmentanda, [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ], addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) if self._debug_draw: self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print(scaffold_anchor_index, scaffold_attachment_index, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: self.draw_nicely(combo) scaffold = combo return scaffold
def fragment_on_bonds_and_label(mol, bonds): labels = [] atom_type_to_index = {} for bi in bonds: b = mol.GetBondWithIdx(bi) i = b.GetBeginAtomIdx() j = b.GetEndAtomIdx() # get or create dictionary keys for those atom types ai = mol.GetAtomWithIdx(i) aj = mol.GetAtomWithIdx(j) at_i = common.type_atom(ai) at_j = common.type_atom(aj) vi = index_for_atom_type(atom_type_to_index, at_i) vj = index_for_atom_type(atom_type_to_index, at_j) labels.append((vi, vj)) fragmented = Chem.FragmentOnBonds(mol, bonds, dummyLabels=labels) smi = Chem.MolToSmiles(fragmented) name = get_name(mol) index_to_atom_type = dict_reverse_binding(atom_type_to_index) return (smi, name, index_to_atom_type)
def fragment_mol(mol, frag_label): excl_smarts = [ '[CX3](=[OX1])[OX1]', '[SX4](=[OX1])=[OX1]', '[NX3](=[OX1])[OX1]', '[OX1]=C[NX3][*]', '[#6r5]~[#7r6]~[#6r6]~[#7r6]~[#6r6]~[#6r5]', # Special for aspire '[SX3](=[OX1])' ] excl_substructs = [Chem.MolFromSmarts(smart) for smart in excl_smarts] excl_idx = [] for es in excl_substructs: excl_idx.extend(list(mol.GetSubstructMatches(es))) excl_idx = [i for sublist in excl_idx for i in sublist] sigma_bond_idx = [] frag_dict = dict() for bnd in mol.GetBonds(): if bnd.GetIsAromatic(): continue elif bnd.GetBeginAtom().GetAtomicNum() == 1: continue elif bnd.GetEndAtom().GetAtomicNum() == 1: continue elif bnd.GetEndAtomIdx() in excl_idx and bnd.GetBeginAtomIdx( ) in excl_idx: continue else: sigma_bond_idx.append(bnd.GetIdx()) fragmentation = Chem.FragmentOnBonds(mol, sigma_bond_idx, addDummies=False) frags_as_idx = Chem.GetMolFrags(fragmentation) for n, indices in enumerate(frags_as_idx): resname = frag_label resid = n + 1 for idx in indices: frag_dict[idx] = {'resname': resname, 'resid': resid} return frag_dict, frags_as_idx
def fragment_molecule_on_explicit_hydrogens(smiles): num_heavies = get_num_heavies_from_smiles(smiles) smiles_with_H = Chem.CanonSmiles(smiles) input_mol = Chem.MolFromSmiles( smiles, sanitize=False) # use santize=False to preserve explicit hydrogens Chem.SanitizeMol(input_mol, Chem.SANITIZE_ALL) cut_pairs = input_mol.GetSubstructMatches(_hydrogen_cut_pat) fragmentations = [] for cut_pair in cut_pairs: bond_idx = input_mol.GetBondBetweenAtoms(*cut_pair).GetIdx() fragmented_mol = Chem.FragmentOnBonds(input_mol, [bond_idx], dummyLabels=[(0, 0)]) new_smiles = Chem.MolToSmiles(fragmented_mol, isomericSmiles=True) left, mid, right = new_smiles.partition(".") assert mid == ".", new_smiles if left == "[*][H]": # Hard-coded cut_smiles = right elif right == "[*][H]": cut_smiles = left else: raise AssertionError("did not split hydrogen correctly: %r %r" % (smiles, new_smiles)) if "[H]" in cut_smiles: # If there were multiple [H] atoms, then we cut on one but others remain. # Recanonicalize to remove them. cut_smiles = Chem.CanonSmiles(cut_smiles) new_fragmentation = Fragmentation(1, EnumerationLabel.NO_ENUMERATION, 0, "1", "[*][H]", "0", num_heavies, "1", cut_smiles, None) fragmentations.append(new_fragmentation) return fragmentations
def encode(self, smiles, subs): output = np.zeros([len(smiles), self.max_len - self.n_frags - 1, 5], dtype=np.long) connect = np.zeros([len(smiles), self.n_frags + 1, 5], dtype=np.long) for i, s in enumerate(smiles): mol = Chem.MolFromSmiles(s) sub = Chem.MolFromSmiles(subs[i]) # Chem.Kekulize(sub) sub_idxs = mol.GetSubstructMatches(sub) for sub_idx in sub_idxs: sub_bond = [ mol.GetBondBetweenAtoms( sub_idx[b.GetBeginAtomIdx()], sub_idx[b.GetEndAtomIdx()]).GetIdx() for b in sub.GetBonds() ] sub_atom = [mol.GetAtomWithIdx(ix) for ix in sub_idx] split_bond = { b.GetIdx() for a in sub_atom for b in a.GetBonds() if b.GetIdx() not in sub_bond } single = sum([ int(mol.GetBondWithIdx(b).GetBondType()) for b in split_bond ]) if single == len(split_bond): break frags = Chem.FragmentOnBonds(mol, list(split_bond)) Chem.MolToSmiles(frags) rank = eval(frags.GetProp('_smilesAtomOutputOrder')) mol_idx = list(sub_idx) + [ idx for idx in rank if idx not in sub_idx and idx < mol.GetNumAtoms() ] frg_idx = [ i + 1 for i, f in enumerate(Chem.GetMolFrags(sub)) for _ in f ] Chem.Kekulize(mol) m, n, c = [(self.tk2ix['GO'], 0, 0, 0, 1)], [], [(self.tk2ix['GO'], 0, 0, 0, 0)] mol2sub = {ix: i for i, ix in enumerate(mol_idx)} for j, idx in enumerate(mol_idx): atom = mol.GetAtomWithIdx(idx) bonds = sorted(atom.GetBonds(), key=lambda x: mol2sub[x.GetOtherAtomIdx(idx)]) bonds = [ b for b in bonds if j > mol2sub[b.GetOtherAtomIdx(idx)] ] n_split = sum( [1 if b.GetIdx() in split_bond else 0 for b in bonds]) tk = self.get_atom_tk(atom) for k, bond in enumerate(bonds): ix2 = mol2sub[bond.GetOtherAtomIdx(idx)] is_split = bond.GetIdx() in split_bond if idx in sub_idx: is_connect = is_split elif len(bonds) == 1: is_connect = False elif n_split == len(bonds): is_connect = is_split and k != 0 else: is_connect = False if bond.GetIdx() in sub_bond: bin, f = m, frg_idx[j] elif is_connect: bin, f = c, 0 else: bin, f = n, 0 if bond.GetIdx() in sub_bond or not is_connect: tk2 = tk tk = self.tk2ix['*'] else: tk2 = self.tk2ix['*'] bin.append((tk2, j, ix2, int(bond.GetBondType()), f)) if tk != self.tk2ix['*']: bin, f = (m, frg_idx[j]) if idx in sub_idx else (n, f) bin.append((tk, j, j, 0, f)) output[i, :len(m + n), :] = m + n if len(c) > 0: connect[i, :len(c)] = c return np.concatenate([output, connect], axis=1)
def maximum_curvature(smi): mols = [] m = Chem.MolFromSmiles(smi) # Draw.MolToImage(m, options=opts).save('m3d.png') # Draw.MolToImage(m).save('m3d1.png') mols.append(m) m3d = Chem.AddHs(m) AllChem.EmbedMolecule(m3d, randomSeed=1) # Draw.MolToImage(m3d, size=(250, 250)).show() m3d_without_h = Chem.RemoveHs(m3d) mols.append(m3d_without_h) bonds = m3d.GetBonds() single_bonds_id = [] # find all single bonds without 'H' as end atom for bond in bonds: bond_type = bond.GetBondType() # print(bond_type) begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() # print('begin atom: ' + begin_atom.GetSymbol()) # print('end atom: ' + end_atom.GetSymbol()) if str(bond_type) == 'SINGLE': if end_atom.GetSymbol() != 'H': single_bonds_id.append(bond.GetIdx()) if len(single_bonds_id) == 0: print('no single bond, return') return frags = Chem.FragmentOnBonds(m, single_bonds_id) smis = Chem.MolToSmiles(frags) smis = smis.split('.') frags_ids = [] for smi in smis: # print(smi) frag_ids = [] mols.append(Chem.MolFromSmiles(smi)) patt = Chem.MolFromSmarts(smi) flag = m.HasSubstructMatch(patt) if flag: atomids = m.GetSubstructMatches(patt) # print("matched atom id:", atomids) for atomid in atomids: frag_ids.append(atomid) if frag_ids not in frags_ids: frags_ids.append(frag_ids) else: print("molecular m do not contain group: ", smi) # print(frags_ids) frags_ids: [[(14, 15, 16), (17, 26, 27), (36, 37, 38), (39, 48, 49)], [(4, 5, 6, 28)], [(5, 6, # 7, 8, 9, 12, 10, 11), (5, 28, 29, 30, 31, 34, 32, 33)] ...] frag_angle = [] for frag_ids in frags_ids: for atomids in frag_ids: atom_planes = atom_plane_segmentation(atomids, m3d) if atom_planes is None or len(atom_planes) == 0: continue else: plane_angles = [] for i in range(len(atom_planes)): for j in range(i + 1, len(atom_planes)): plane_angle = angle_of_two_planes(atom_planes[i], atom_planes[j]) plane_angles.append(plane_angle) max_plane_angle = max(plane_angles) frag_angle.append(max_plane_angle) print('max angle:', max(frag_angle)) # img = Draw.MolsToGridImage(mols, molsPerRow=5, subImgSize=(400, 400), legends=['' for x in mols], options=opts) # img.show() return frag_angle
def __decompose_mol(self, Mol): ### Mol: Molecule that will be decompsed. Must be an instance of ### Rdkit molecule object. ### Holds bond indices of broken bonds at r/l anchor ranc_bond_idcs = list() lanc_bond_idcs = list() ### Holds atom indices of broken bonds at r/l anchor ranc_atom_idcs = list() lanc_atom_idcs = list() ### Holds atom indices of bonds at terminal r/l anchor. ### They won't be broken. ranc_atom_idcs_t = list() lanc_atom_idcs_t = list() ### Holds atom indices of broken bonds at connectors rconn_atom_idcs = list() lconn_atom_idcs = list() ### Holds atom indices of bonds between fragment and ### terminal connectors. They won't be broken. rconn_atom_idcs_t = list() lconn_atom_idcs_t = list() ### Holds permutation parity of atoms involved in broken bonds ranc_parity = list() lanc_parity = list() ### Stores if anchor atom is chiral or not ranc_chiral = list() lanc_chiral = list() ### Stores bond type information for bonds between fragment and connector/cap ranc_bondtype = list() lanc_bondtype = list() ### Stores the connector index of the bond rconn_idcs = list() lconn_idcs = list() ### Stores the connector index of the bond with terminal anchor rconn_idcs_t = list() lconn_idcs_t = list() ### Stores atom indices of all anchor atoms in molecule ranc_mol = list() lanc_mol = list() ### Stores atom indices of all terminal anchor atoms in molecule ranc_mol_t = list() lanc_mol_t = list() ### List that holds all atom indices Mol_atm_idxs = range(Mol.GetNumAtoms()) if self.verbose: print("Start analyzing bonds...") ### Begin loop over all connectors for connector_idx in range(self.__connector_count): conn_obj = self.connectors[connector_idx] connector = conn_obj.get_connector() lanc = conn_obj.get_lanc() ranc = conn_obj.get_ranc() ring = conn_obj.get_ring() ranc_map = conn_obj.get_ranc_map() lanc_map = conn_obj.get_lanc_map() connector_mol_matches = Mol.GetSubstructMatches(connector, useChirality=True) connector_mol_match_count = len(connector_mol_matches) connector_mol_match_exclude = list() if self.verbose: print( "Decomposing with connector ID %d (%s)..." % (connector_idx, self.connectors[connector_idx].get_name())) print("Found %d matches." % connector_mol_match_count) if connector_mol_match_count == 0: if self.verbose: print("No matches found for current connector.", ) print("No decomposition.") continue ### Find the corresponding atom indices of the anchor atoms lanc_mol_matches = list() ranc_mol_matches = list() for idx1 in range(connector_mol_match_count): ranc_mol_matches.append(list()) lanc_mol_matches.append(list()) for r_i in ranc: ranc_mol_matches[-1].append( connector_mol_matches[idx1][r_i]) for l_i in lanc: lanc_mol_matches[-1].append( connector_mol_matches[idx1][l_i]) ### Filter substructure matches that do not unambigously ### identify connectors, i.e. they have the same connector ### atoms except the anchor atoms. Also, identify all ### connectors, that overlap with other, previously matched, ### connectors. ### Add those connector matches to an exclcude list. for idx1 in range(connector_mol_match_count): for idx2 in range(connector_mol_match_count): if idx1 < idx2: for idx1_match in connector_mol_matches[idx1]: idx1_match_1 = -1 idx1_match_2 = -1 if idx1_match in lanc_mol_matches[idx1]: idx1_match_1 = lanc_mol_matches[idx1].index( idx1_match) if idx1_match in lanc_mol_matches[idx2]: idx1_match_2 = lanc_mol_matches[idx2].index( idx1_match) if idx1_match_1 > -1 and idx1_match_1 == idx1_match_2: if lanc_map[idx1_match_1] == -1: continue if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) if idx2 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx2) if self.verbose: print("Excluded connector match due to", ) print("ambigious matching on lanc atoms", ) print("%s (match id %d,%d)." % (idx1_match, idx1, idx2)) continue idx1_match_1 = -1 idx1_match_2 = -1 if idx1_match in ranc_mol_matches[idx1]: idx1_match_1 = ranc_mol_matches[idx1].index( idx1_match) if idx1_match in ranc_mol_matches[idx2]: idx1_match_2 = ranc_mol_matches[idx2].index( idx1_match) if idx1_match_1 > -1 and idx1_match_1 == idx1_match_2: if ranc_map[idx1_match_1] == -1: continue if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) if idx2 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx2) if self.verbose: print("Excluded connector match due to", ) print("ambigious matching on ranc atoms", ) print("%s (match id %d,%d)." % (idx1_match, idx1, idx2)) continue if self.verbose: print("Excluded connector matches: ", connector_mol_match_exclude) ### Begin loop over connector_mol_matches for idx1 in range(connector_mol_match_count): connector_mol_match = connector_mol_matches[idx1] ranc_mol_match = ranc_mol_matches[idx1] lanc_mol_match = lanc_mol_matches[idx1] if conn_obj.get_terminal(): for term_anc_idx in conn_obj.get_terminal_anc(): term_anc_atm = Mol.GetAtomWithIdx( connector_mol_match[term_anc_idx]) for term_neighbor in term_anc_atm.GetNeighbors(): term_neighbor_idx = term_neighbor.GetIdx() if term_neighbor_idx not in connector_mol_match: connector_mol_match_exclude.append(idx1) if self.verbose: print("Excluded current connector match", ) print( "due to terminal anchor atom %d at non-terminal" % connector_mol_match[term_anc_idx], ) print("position connected to atom %d." % term_neighbor_idx) if self.verbose: print("Current connector_mol_match ID", idx1) print("Current connector:", connector_mol_match) ranc_bond_idcs_tmp, ratom_idcs_tmp = get_frag_bonds( Mol, ranc_mol_match, connector_mol_match, ring, self.verbose) lanc_bond_idcs_tmp, latom_idcs_tmp = get_frag_bonds( Mol, lanc_mol_match, connector_mol_match, ring, self.verbose) _check_ranc_atom_idcs = ranc_atom_idcs + ranc_atom_idcs_t _check_lanc_atom_idcs = lanc_atom_idcs + lanc_atom_idcs_t _check_rconn_atom_idcs = rconn_atom_idcs + rconn_atom_idcs_t _check_lconn_atom_idcs = lconn_atom_idcs + lconn_atom_idcs_t _check_rconn_idcs = rconn_idcs + rconn_idcs_t _check_lconn_idcs = lconn_idcs + lconn_idcs_t _check_ranc_mol = ranc_mol + ranc_mol_t _check_lanc_mol = lanc_mol + lanc_mol_t ### Sanity checking for ranc for _anc_atom, _anc_bond in zip(ratom_idcs_tmp, ranc_bond_idcs_tmp): for list_idx in range(len(_check_ranc_mol)): _ranc_mol_match = _check_ranc_mol[list_idx] _lanc_mol_match = _check_lanc_mol[list_idx] rconnector_idx = _check_rconn_idcs[list_idx] lconnector_idx = _check_lconn_idcs[list_idx] if _check_ranc_atom_idcs[list_idx] in ranc_mol_match \ and _check_rconn_atom_idcs[list_idx] in ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _check_lanc_atom_idcs[list_idx] in ranc_mol_match \ and _check_lconn_atom_idcs[list_idx] in ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif (_check_ranc_atom_idcs[list_idx]==_anc_atom[0] \ and _check_rconn_atom_idcs[list_idx]==_anc_atom[1]) \ or (_check_ranc_atom_idcs[list_idx]==_anc_atom[1] \ and _check_rconn_atom_idcs[list_idx]==_anc_atom[0]): if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif (_check_lanc_atom_idcs[list_idx]==_anc_atom[0] \ and _check_lconn_atom_idcs[list_idx]==_anc_atom[1]) \ or (_check_lanc_atom_idcs[list_idx]==_anc_atom[1] \ and _check_lconn_atom_idcs[list_idx]==_anc_atom[0]): if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _anc_atom[0] in _ranc_mol_match \ and _anc_atom[1] in _ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _anc_atom[0] in _lanc_mol_match \ and _anc_atom[1] in _lanc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to ranc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) if self.verbose and idx1 not in connector_mol_match_exclude: print( "Ranc decompostion of bond %d of anc atom %d - conn atom %d." % (_anc_bond, _anc_atom[0], _anc_atom[1])) ### Sanity checking for lanc for _anc_atom, _anc_bond in zip(latom_idcs_tmp, lanc_bond_idcs_tmp): for list_idx in range(len(lanc_mol)): _ranc_mol_match = _check_ranc_mol[list_idx] _lanc_mol_match = _check_lanc_mol[list_idx] rconnector_idx = _check_rconn_idcs[list_idx] lconnector_idx = _check_lconn_idcs[list_idx] if _check_ranc_atom_idcs[list_idx] in ranc_mol_match \ and _check_rconn_atom_idcs[list_idx] in ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _check_lanc_atom_idcs[list_idx] in ranc_mol_match \ and _check_lconn_atom_idcs[list_idx] in ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif (_check_ranc_atom_idcs[list_idx]==_anc_atom[0] \ and _check_rconn_atom_idcs[list_idx]==_anc_atom[1]) \ or (_check_ranc_atom_idcs[list_idx]==_anc_atom[1] \ and _check_rconn_atom_idcs[list_idx]==_anc_atom[0]): if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif (_check_lanc_atom_idcs[list_idx]==_anc_atom[0] \ and _check_lconn_atom_idcs[list_idx]==_anc_atom[1]) \ or (_check_lanc_atom_idcs[list_idx]==_anc_atom[1] \ and _check_lconn_atom_idcs[list_idx]==_anc_atom[0]): if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _anc_atom[0] in _ranc_mol_match \ and _anc_atom[1] in _ranc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with ranc of connector", rconnector_idx, ) print( self.connectors[rconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) elif _anc_atom[0] in _lanc_mol_match \ and _anc_atom[1] in _lanc_mol_match: if self.verbose: print("Excluded current connector match", ) print( "due to lanc overlap with lanc of connector", lconnector_idx, ) print( self.connectors[lconnector_idx].get_name()) if idx1 not in connector_mol_match_exclude: connector_mol_match_exclude.append(idx1) if self.verbose and idx1 not in connector_mol_match_exclude: print( "Lanc decompostion of bond %d of anc atom %d - conn atom %d." % (_anc_bond, _anc_atom[0], _anc_atom[1])) if len(ranc_bond_idcs_tmp)==0 and \ len(lanc_bond_idcs_tmp)==0: connector_mol_match_exclude.append(idx1) if idx1 in connector_mol_match_exclude: if self.verbose: print("Current connector_mol_match excluded.") continue if self.verbose: print("R anchor atom indices in molecule:", ranc_mol_match) print("L anchor atom indices in molecule:", lanc_mol_match) ### Keep track of bonds that will be broken (r site) for bond_idx,\ atom_idx in zip(ranc_bond_idcs_tmp, ratom_idcs_tmp): anc_idx, conn_idx = atom_idx atm_anc = Mol.GetAtomWithIdx(anc_idx) atm_conn = Mol.GetAtomWithIdx(conn_idx) if conn_obj.get_terminal(): ranc_atom_idcs_t.append(anc_idx) ranc_mol_t.append(ranc_mol_match) rconn_atom_idcs_t.append(conn_idx) rconn_idcs_t.append(connector_idx) else: ranc_bond_idcs.append(bond_idx) ranc_atom_idcs.append(anc_idx) ranc_mol.append(ranc_mol_match) rconn_atom_idcs.append(conn_idx) rconn_idcs.append(connector_idx) ### Keep track of bonds that will be broken (l site) for bond_idx,\ atom_idx in zip(lanc_bond_idcs_tmp, latom_idcs_tmp): anc_idx, conn_idx = atom_idx atm_anc = Mol.GetAtomWithIdx(anc_idx) atm_conn = Mol.GetAtomWithIdx(conn_idx) if conn_obj.get_terminal(): lanc_atom_idcs_t.append(anc_idx) lanc_mol_t.append(lanc_mol_match) lconn_atom_idcs_t.append(conn_idx) lconn_idcs_t.append(connector_idx) else: lanc_bond_idcs.append(bond_idx) lanc_atom_idcs.append(anc_idx) lanc_mol.append(lanc_mol_match) lconn_atom_idcs.append(conn_idx) lconn_idcs.append(connector_idx) ### End loop over connector_mol_matches ### End loop over all connectors if len(ranc_bond_idcs) > 0 or len(lanc_bond_idcs) > 0: if self.verbose: print("Start decomposing molecule....") frags = Chem.FragmentOnBonds( Mol, list(set(ranc_bond_idcs + lanc_bond_idcs)), #Must be uniq addDummies=False) frags_mol, mol_frags_atm_idxs = Chem.GetMolFrags(frags, asMols=True, sanitizeFrags=False),\ Chem.GetMolFrags(frags, asMols=False, sanitizeFrags=False) if self.verbose: self.process_list = frags_mol ### Make fragment to fragment cross couplings frag_canonical_ranks = list() for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs): frag_canonical_ranks.append(list()) for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs): frag_canonical_ranks[-1].append( self.canonical_rank[mol_atm_idx]) N_frags = len(frag_canonical_ranks) for i1 in range(N_frags): canonical_ranks1 = frag_canonical_ranks[i1] for i2 in range(N_frags): if (i2 - 1) < i1: continue canonical_ranks2 = frag_canonical_ranks[i2] for atm_idx2, rank2 in enumerate(canonical_ranks2): if rank2 in canonical_ranks1: atm_idx1 = canonical_ranks1.index(rank2) self.frag2frag_frgs.append([i1, i2]) self.frag2frag_atms.append([atm_idx1, atm_idx2]) ### ### Loop over all fragments generated by FragmentOnBonds. ### For each fragment that is not a connector find the ### corresponding connector fragment. ### ### frag_mol : rdkit Mol instance ### mol_atm_idxs: atom indices of the original molecule that ### generated the Mol instance of the original molecule ### for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs): atm_idx_list = list() parity_list = list() chirality_list = list() bondtype_list = list() combo = copy.copy(frag_mol) rcap_list_map = list() lcap_list_map = list() rcap_conn_idx = list() lcap_conn_idx = list() ranc_list = list() lanc_list = list() ### ### Loop over all atoms in the fragment ### ### atm_idx : atom index in the fragment indexing scheme ### mol_atm_idx: atom index in the original molecule indexing scheme ### ### Find fragments and the corresponding connector entry ### for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs): ### ### A connector atom is stored in connector_mol_idcs ### but will not be found in the anchor lists ranc_atom_idcs ### and lanc_atom_idcs. ### ### Work through the terminal groups. ### They have not been modified, since no ### bonds are broken here. for list_idx, _ranc_idx in enumerate(ranc_atom_idcs_t): if mol_atm_idx != _ranc_idx: continue connector_idx = rconn_idcs_t[list_idx] ranc_list.append(list()) for _anc_idx in ranc_mol_t[list_idx]: if _anc_idx in mol_atm_idxs: ranc_list[-1].append( mol_atm_idxs.index(_anc_idx)) rcap_conn_idx.append(connector_idx) rcap_list_map.append(list()) for list_idx, _lanc_idx in enumerate(lanc_atom_idcs_t): if mol_atm_idx != _lanc_idx: continue connector_idx = lconn_idcs_t[list_idx] lanc_list.append(list()) for _anc_idx in lanc_mol_t[list_idx]: if _anc_idx in mol_atm_idxs: lanc_list[-1].append( mol_atm_idxs.index(_anc_idx)) lcap_conn_idx.append(connector_idx) lcap_list_map.append(list()) ### Now, work through the non-terminal groups. ### All these groups have open bond endings, since we have ### broken them during the fragmentation process. Connect ### all these open bond endings to capping groups. for list_idx, _ranc_idx in enumerate(ranc_atom_idcs): if mol_atm_idx != _ranc_idx: continue ranc_list.append(list()) for _anc_idx in ranc_mol[list_idx]: if _anc_idx in mol_atm_idxs: ranc_list[-1].append( mol_atm_idxs.index(_anc_idx)) ### combo_length is also the offset of the idx for ### the newly added cap. connector_idx = rconn_idcs[list_idx] conn = self.connectors[connector_idx] cap = conn.get_rcap() combo_length = combo.GetNumAtoms() cap_length = cap.GetNumAtoms() ### Connect fragment and connector cap combo = Chem.CombineMols(combo, cap) atm_idx_list.append([atm_idx, combo_length]) parity_list.append( [get_parity(Mol, mol_atm_idx), get_parity(cap, 0)]) chirality_list.append([ Mol.GetAtomWithIdx(atm_idx).GetChiralTag(), cap.GetAtomWithIdx(0).GetChiralTag() ]) bondtype_list.append( Mol.GetBondWithIdx( ranc_bond_idcs[list_idx]).GetBondType()) rcap_list_map.append( range(combo_length, combo_length + cap_length)) rcap_conn_idx.append(connector_idx) for list_idx, _lanc_idx in enumerate(lanc_atom_idcs): if mol_atm_idx != _lanc_idx: continue lanc_list.append(list()) for _anc_idx in lanc_mol[list_idx]: if _anc_idx in mol_atm_idxs: lanc_list[-1].append( mol_atm_idxs.index(_anc_idx)) ### combo_length is also the offset of the idx for ### the newly added cap. connector_idx = lconn_idcs[list_idx] conn = self.connectors[connector_idx] cap = conn.get_lcap() combo_length = combo.GetNumAtoms() cap_length = cap.GetNumAtoms() ### Connect fragment and connector cap combo = Chem.CombineMols(combo, cap) atm_idx_list.append([atm_idx, combo_length]) parity_list.append( [get_parity(Mol, mol_atm_idx), get_parity(cap, 0)]) chirality_list.append([ Mol.GetAtomWithIdx(atm_idx).GetChiralTag(), cap.GetAtomWithIdx(0).GetChiralTag() ]) bondtype_list.append( Mol.GetBondWithIdx( lanc_bond_idcs[list_idx]).GetBondType()) lcap_list_map.append( range(combo_length, combo_length + cap_length)) lcap_conn_idx.append(connector_idx) ### Put molecule together e_combo = Chem.RWMol(combo) for atm_idx, bondtype in zip(atm_idx_list, bondtype_list): e_combo.AddBond(atm_idx[0], atm_idx[1], bondtype) combo = e_combo.GetMol() for atm_idx, parity, chirality in zip(atm_idx_list,\ parity_list,\ chirality_list): for i in range(2): if get_parity(combo, atm_idx[i]) != parity[i]: new_chiral = None if chirality[i] == CHI_TETRAHEDRAL_CCW: new_chiral = CHI_TETRAHEDRAL_CW elif chirality[i] == CHI_TETRAHEDRAL_CW: new_chiral = CHI_TETRAHEDRAL_CCW if new_chiral != None: atm = e_combo.GetAtomWithIdx(atm_idx[i]) atm.SetChiralTag(new_chiral) self.frag_list.append(combo) self.frag_list_map.append(mol_atm_idxs) self.rcap_list_map.append(rcap_list_map) self.lcap_list_map.append(lcap_list_map) self.rcap_conn_idx.append(rcap_conn_idx) self.lcap_conn_idx.append(lcap_conn_idx) self.ranc_list.append(ranc_list) self.lanc_list.append(lanc_list) self.__frag_count += 1 ### end loop over all fragments elif self.verbose: print("Nothing to decompose.") return True
def __make_surrogate_cap(self): ranc_list = self.get_s_ranc() lanc_list = self.get_s_lanc() rcap_mol = self.get_rcap() lcap_mol = self.get_lcap() conn_mol = self.get_surrogate() ranc_conn = list() lanc_conn = list() ranc_bond = list() lanc_bond = list() ranc_bondtype = list() lanc_bondtype = list() ranc_parity = list() lanc_parity = list() ranc_chirality = list() lanc_chirality = list() for atm_idx in ranc_list: atm = conn_mol.GetAtomWithIdx(atm_idx) for neighbor in atm.GetNeighbors(): neighbor_idx = neighbor.GetIdx() if neighbor_idx not in ranc_list: bond = conn_mol.GetBondBetweenAtoms(atm_idx,\ neighbor_idx) ranc_conn.append(neighbor_idx) ranc_bondtype.append(bond.GetBondType()) ranc_parity.append(get_parity(conn_mol, neighbor_idx)) ranc_chirality.append(neighbor.GetChiralTag()) ranc_bond.append(bond.GetIdx()) for atm_idx in lanc_list: atm = conn_mol.GetAtomWithIdx(atm_idx) for neighbor in atm.GetNeighbors(): neighbor_idx = neighbor.GetIdx() if neighbor_idx not in lanc_list: bond = conn_mol.GetBondBetweenAtoms(atm_idx,\ neighbor_idx) lanc_conn.append(neighbor_idx) lanc_bondtype.append(bond.GetBondType()) lanc_parity.append(get_parity(conn_mol, neighbor_idx)) lanc_chirality.append(neighbor.GetChiralTag()) lanc_bond.append(bond.GetIdx()) frags = Chem.FragmentOnBonds(conn_mol, ranc_bond + lanc_bond, addDummies=False) frags_mol, mol_frags_atm_idxs = Chem.GetMolFrags(frags, asMols=True),\ Chem.GetMolFrags(frags, asMols=False) rcap_length = rcap_mol.GetNumAtoms() lcap_length = lcap_mol.GetNumAtoms() ### ### Loop over all fragments generated by FragmentOnBonds ### ### frag_mol : rdkit Mol instance ### mol_atm_idxs: atom indices of the original molecules that ### generated the molecule ### for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs): frag_length = frag_mol.GetNumAtoms() rcap_start = frag_length lcap_start = frag_length + rcap_length combo_mol = Chem.CombineMols(frag_mol, rcap_mol) combo_mol = Chem.CombineMols(combo_mol, lcap_mol) e_combo_mol = Chem.RWMol(combo_mol) is_frag = False ranc_check = list() lanc_check = list() rcap_list_map = range(frag_length, frag_length + rcap_length) lcap_list_map = range(frag_length + rcap_length, frag_length + rcap_length + lcap_length) ### ### Loop over all atoms in the fragment ### ### atm_idx : atom index in the fragment indexing scheme ### mol_atm_idx: atom index in the original molecule indexing scheme ### for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs): if mol_atm_idx in ranc_conn: idx = ranc_conn.index(mol_atm_idx) e_combo_mol.AddBond(atm_idx, rcap_start, ranc_bondtype[idx]) ranc_check.append([atm_idx, mol_atm_idx, ranc_parity[idx]]) is_frag = True if mol_atm_idx in lanc_conn: idx = lanc_conn.index(mol_atm_idx) e_combo_mol.AddBond(atm_idx, lcap_start, lanc_bondtype[idx]) lanc_check.append([atm_idx, mol_atm_idx, lanc_parity[idx]]) is_frag = True combo_mol = e_combo_mol.GetMol() if is_frag: for atm_idx, mol_atm_idx, parity in ranc_check: if get_parity(combo_mol, atm_idx) != parity: atm = combo_mol.GetAtomWithIdx(atm_idx) new_chiral = None if ranc_chirality == CHI_TETRAHEDRAL_CCW: new_chiral = CHI_TETRAHEDRAL_CW elif ranc_chirality == CHI_TETRAHEDRAL_CW: new_chiral = CHI_TETRAHEDRAL_CCW if new_chiral != None: atm.SetChiralTag(new_chiral) for atm_idx, mol_atm_idx, parity in lanc_check: if get_parity(combo_mol, atm_idx) != parity: atm = combo_mol.GetAtomWithIdx(atm_idx) new_chiral = None if lanc_chirality == CHI_TETRAHEDRAL_CCW: new_chiral = CHI_TETRAHEDRAL_CW elif lanc_chirality == CHI_TETRAHEDRAL_CW: new_chiral = CHI_TETRAHEDRAL_CCW if new_chiral != None: atm.SetChiralTag(new_chiral) if get_parity(combo_mol, rcap_start) != get_parity( rcap_mol, 0): atm = combo_mol.GetAtomWithIdx(rcap_start) chirality = atm.GetChiralTag() new_chiral = None if chirality == CHI_TETRAHEDRAL_CCW: new_chiral = CHI_TETRAHEDRAL_CW elif chirality == CHI_TETRAHEDRAL_CW: new_chiral = CHI_TETRAHEDRAL_CCW if new_chiral != None: atm.SetChiralTag(new_chiral) if get_parity(combo_mol, lcap_start) != get_parity( lcap_mol, 0): atm = combo_mol.GetAtomWithIdx(lcap_start) chirality = atm.GetChiralTag() new_chiral = None if chirality == CHI_TETRAHEDRAL_CCW: new_chiral = CHI_TETRAHEDRAL_CW elif chirality == CHI_TETRAHEDRAL_CW: new_chiral = CHI_TETRAHEDRAL_CCW if new_chiral != None: atm.SetChiralTag(new_chiral) self.__surr_cap = copy.copy(combo_mol) self.__rcap_list_map = rcap_list_map self.__lcap_list_map = lcap_list_map self.__surr2sur_cap_map = mol_atm_idxs return True return False
def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict], other_attachments: List[int], other_attachment_details: List[List[Dict]]) -> Chem.Mol: """ This does the messy work for merge_pair. :param scaffold: :param fragmentanda: :param anchor_index: :param attachment_details: :param other_attachments: :param other_attachment_details: :return: """ # get bit to add. bonds_to_frag = [] for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ] bonds_to_frag += [ fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx() for oi, oad in zip(other_attachments, other_attachment_details) ] if self._debug_draw and other_attachments: print('ring!', other_attachments) print('ring!', other_attachment_details) f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print('Fragment splits') print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) # pre-emptively fix atom ori_i # offset collapsed to avoid clashes. self._offset_collapsed_ring(frag) self._offset_origins(frag) # Experimental code. # TODO: finish! # frag_atom = frag.GetAtomWithIdx(frag_anchor_index) # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()} # del old2future[-1] # does nothing but nice to double tap # if frag_atom.GetIntProp('_ori_i') == -1: #damn. # for absent in self._get_mystery_ori_i(frag): # old2future[absent] = scaffold_attachment_index # self._renumber_original_indices(frag, old2future) if self._debug_draw: print('Fragment to add') self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print('Pre-merger') print(scaffold_anchor_index, attachment_details, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) for oi, oad in zip(other_attachments, other_attachment_details): bond_type = oad[0]['type'] scaffold_attachment_index = oad[0]['idx_S'] scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms() combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) if self._debug_draw: print( f"Added additional {bond_type.name} bond between {scaffold_attachment_index} and {scaffold_anchor_index} " + \ f"(formerly {indices.index(oi)})") Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: print('Merged') self.draw_nicely(combo) self._prevent_two_bonds_on_dummy(combo) scaffold = combo.GetMol() return scaffold