def __fragment_mol(mol, radius=3, return_ids=True, keep_stereo=False, protected_ids=None): """ INPUT: mol - Mol radius - integer, number of bonds to cut context keep_stereo - bool, keep or discard information about stereoconfiguration protected_ids - set/list/tuple os atom ids which cannot be present in core fragments OUTPUT: list of tuples (env_smi, core_smi, tuple of core atom ids) ('C[*:1].C[*:2]', 'CC(C(=O)O)c1ccc(CC([*:1])[*:2])c(Br)c1', (1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) ('Cc(c)c(cc)[*:1]', '[H][*:1]', (25,)) If input mol has explicit hydrogens the output will contain also fragments where core = [H][*:1]. Smiles of fragments with heavy atoms will contain only heavy atoms """ def get_atom_prop(molecule, prop="Index"): res = [] for a in molecule.GetAtoms(): if a.GetAtomicNum(): res.append(a.GetIntProp(prop)) return tuple(sorted(res)) if protected_ids: return_ids = True output = [] # set original atom idx to keep them in fragmented mol if return_ids: for atom in mol.GetAtoms(): atom.SetIntProp("Index", atom.GetIdx()) # heavy atoms frags = rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=4, resultsAsMols=True, maxCutBonds=30) # hydrogen atoms frags += rdMMPA.FragmentMol(mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100) for i, (core, chains) in enumerate(frags): if core is None: # single cut components = list(Chem.GetMolFrags(chains, asMols=True)) ids_0 = get_atom_prop(components[0]) if return_ids else tuple() ids_1 = get_atom_prop(components[1]) if return_ids else tuple() if Chem.MolToSmiles(components[0]) != '[H][*:1]': # context cannot be H env, frag = get_canon_context_core(components[0], components[1], radius, keep_stereo) output.append((env, frag, ids_1)) if Chem.MolToSmiles(components[1]) != '[H][*:1]': # context cannot be H env, frag = get_canon_context_core(components[1], components[0], radius, keep_stereo) output.append((env, frag, ids_0)) else: # multiple cuts # there are no checks for H needed because H can be present only in single cuts env, frag = get_canon_context_core(chains, core, radius, keep_stereo) output.append((env, frag, get_atom_prop(core) if return_ids else tuple())) if protected_ids: protected_ids = set(protected_ids) output = [item for item in output if protected_ids.isdisjoint(item[2])] return output # list of tuples (env smiles, core smiles, list of atom ids)
def test5(self): m = Chem.MolFromSmiles( "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N") # ALPHA-CONOTOXIN SI frags = rdMMPA.FragmentMol(m, resultsAsMols=False) self.assertFalse(len(frags)) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) self.assertEqual(len(frags), 231)
def mmpa_frag( mol, pattern: str = None, max_cut: int = 1, max_bond_cut: int = 20, h_split: bool = False, ) -> Optional[Set[Chem.Mol]]: """Fragment molecule on specific bonds suitable for a MMPA analysis. Args: mol: Molecule to fragment. pattern: Bond pattern to split on. Will use default rdkit pattern '[#6+0;!$(*=,#[!#6])]!@!=!#[*]' if not provided. max_cut: Number of cuts. max_bond_cut: Maximum number of bond to cut. Default to 20. h_split: Whether to split at hydrogen position too. This is equivalent to enabling the addition of new fragments. Returns: List of fragments """ frags = [] if pattern is None: frags = rdMMPA.FragmentMol( mol, maxCuts=max_cut, resultsAsMols=False, maxCutBonds=max_bond_cut, ) elif pattern: frags = rdMMPA.FragmentMol( mol, pattern=pattern, maxCuts=max_cut, resultsAsMols=False, maxCutBonds=max_bond_cut, ) if h_split: mol = Chem.AddHs(mol) frags += rdMMPA.FragmentMol( mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=False, maxCutBonds=max_bond_cut, ) return set(frags)
def test8(self): m = Chem.MolFromSmiles( 'Cc1ccccc1NC(=O)C(C)[NH+]1CCCC1') # ZINC00000051 sm = Chem.MolFromSmarts("[#6+0;!$(*=,#[!#6])]!@!=!#[*]") matching_atoms = m.GetSubstructMatches(sm) bonds = [] for a, b in matching_atoms: bond = m.GetBondBetweenAtoms(a, b) bonds.append(bond.GetIdx()) frags = rdMMPA.FragmentMol(m, resultsAsMols=False) frags2 = rdMMPA.FragmentMol(m, bonds, resultsAsMols=False) frags3 = rdMMPA.FragmentMol(m, tuple(bonds), resultsAsMols=False) self.assertEqual(frags, frags2) self.assertEqual(frags2, frags3)
def test1(self): m = Chem.MolFromSmiles('c1ccccc1OC') frags = rdMMPA.FragmentMol(m) self.assertEqual(len(frags), 3) for frag in frags: self.assertEqual(len(frag), 2) frags = sorted(frags, key=natoms) self.assertEqual(frags[0][0], None) self.assertEqual(frags[1][0], None) self.assertNotEqual(frags[2][0], None) self.assertNotEqual(frags[0][1], None) self.assertNotEqual(frags[1][1], None) self.assertNotEqual(frags[2][1], None) self.assertEqual(frags[0][1].GetNumAtoms(), m.GetNumAtoms() + 2) self.assertEqual(frags[1][1].GetNumAtoms(), m.GetNumAtoms() + 2) fs = Chem.GetMolFrags(frags[0][1], asMols=True) self.assertEqual(len(fs), 2) self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc([*:1])cc1') self.assertEqual(Chem.MolToSmiles(fs[1], True), 'CO[*:1]') fs = Chem.GetMolFrags(frags[1][1], asMols=True) self.assertEqual(len(fs), 2) self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc(O[*:1])cc1') self.assertEqual(Chem.MolToSmiles(fs[1], True), 'C[*:1]') fs = Chem.GetMolFrags(frags[2][0], asMols=True) self.assertEqual(len(fs), 1) self.assertEqual(Chem.MolToSmiles(fs[0], True), 'O([*:1])[*:2]') fs = Chem.GetMolFrags(frags[2][1], asMols=True) self.assertEqual(len(fs), 2) self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc([*:1])cc1') self.assertEqual(Chem.MolToSmiles(fs[1], True), 'C[*:2]')
def fragment_mol(smi, cid, pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]"): mol = Chem.MolFromSmiles(smi) #different cuts can give the same fragments #to use outlines to remove them outlines = set() if (mol == None): sys.stderr.write("Can't generate mol for: %s\n" % (smi)) else: frags = rdMMPA.FragmentMol(mol, minCuts=2, maxCuts=2, maxCutBonds=100, pattern=pattern, resultsAsMols=False) for core, chains in frags: output = '%s,%s,%s,%s' % (smi, cid, core, chains) if (not (output in outlines)): outlines.add(output) if not outlines: # for molecules with no cuts, output the parent molecule itself outlines.add('%s,%s,,' % (smi, cid)) return outlines
def fragment_mol(mol, query, max_cuts, keep_stereo, radius): # returns list of lists: [['F', [0]], ['C#N', [3, 4]], ... ] def get_atom_prop(molecule, prop="Index", only_heavy=True): res = [] for a in molecule.GetAtoms(): if only_heavy and a.GetAtomicNum() > 1: try: res.append(a.GetIntProp(prop)) except KeyError: continue return tuple(sorted(res)) def get_frag_name(context, core, radius, keep_stereo): line = [] for r in radius: env_smi, core_smi = get_canon_context_core(context, core, r, keep_stereo) if r == 0: # for radius = 0 there is no env (empty string) line.append(core_smi) else: if env_smi and core_smi: line.append('%s|%s' % (core_smi, env_smi)) return '||'.join(line) if line else None # modify representation of NO2 groups to charged version mol = replace_no2(mol) err = Chem.SanitizeMol(mol, catchErrors=True) if err != 0: print('Molecule %s failed to sanitize due to: ' % mol.GetProp("_Name") + str(err)) return [] output = [] for atom in mol.GetAtoms(): atom.SetIntProp("Index", atom.GetIdx()) frags = rdMMPA.FragmentMol(mol, pattern=query, maxCuts=max_cuts, resultsAsMols=True, maxCutBonds=30) for core, chains in frags: if core is None: # single cut components = list(Chem.GetMolFrags(chains, asMols=True)) ids_0 = get_atom_prop(components[0]) ids_1 = get_atom_prop(components[1]) if Chem.MolToSmiles(components[0]) != '[H][*:1]': # context cannot be H frag_name = get_frag_name(components[0], components[1], radius, keep_stereo) if frag_name: output.append((frag_name, ids_1)) if Chem.MolToSmiles(components[1]) != '[H][*:1]': # context cannot be H frag_name = get_frag_name(components[1], components[0], radius, keep_stereo) if frag_name: output.append((frag_name, ids_0)) else: # multiple cuts # there are no checks for H needed because H can be present only in single cuts frag_name = get_frag_name(chains, core, radius, keep_stereo) if frag_name: output.append((frag_name, get_atom_prop(core))) return output
def test3(self): m = Chem.MolFromSmiles('c1ccccc1OC') frags = rdMMPA.FragmentMol(m, resultsAsMols=False, pattern='cO') self.assertEqual(len(frags), 1) for frag in frags: self.assertEqual(len(frag), 2) frags = sorted(frags) self.assertEqual(frags[0][0], '') self.assertNotEqual(frags[0][1], '') self.assertEqual(frags[0][1], 'CO[*:1].c1ccc(cc1)[*:1]')
def mmps_cutting(mol, pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]", dummy=True, filtering=True): """ MMPs function""" FMQs = [] fmq = None #mol = Chem.MolFromSmiles(smi) try: smi = Chem.MolToSmiles(mol) bricks = rdMMPA.FragmentMol(mol, minCuts=2, maxCuts=2, maxCutBonds=100, \ pattern=pattern, resultsAsMols=False) for linker, chains in bricks: linker_mol = Chem.MolFromSmiles(linker) linker_size = linker_mol.GetNumHeavyAtoms() linker_site_idxs = [atom.GetIdx() for atom in linker_mol.GetAtoms() if atom.GetAtomicNum() == 0] linker_length = len(Chem.rdmolops.GetShortestPath(linker_mol, \ linker_site_idxs[0], linker_site_idxs[1])) - 2 if (linker_size >= 2) & (linker_length >= 1): frag1_mol = Chem.MolFromSmiles(chains.split(".")[0]) frag2_mol = Chem.MolFromSmiles(chains.split(".")[1]) frag1_size = frag1_mol.GetNumHeavyAtoms() frag2_size = frag2_mol.GetNumHeavyAtoms() if (frag1_size >= 5) & ((frag2_size >= 5) & ((frag1_size + frag1_size) >= linker_size)): if filtering: action = filter(linker_mol, type="frags") & filter(frag1_mol, type="frags") \ & filter(frag2_mol, type="frags") if action: if dummy: fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \ + "%s" % (chains) + ">" + "%s" % (smi) else: fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \ + "%s" % (remove_dummys(chains)) + ">" + "%s" % (smi) else: if dummy: fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \ + "%s" % (chains) + ">" + "%s" % (smi) else: fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \ + "%s" % (remove_dummys(chains)) + ">" + "%s" % (smi) FMQs.append(fmq) except: print("error") FMQs = [] return FMQs
def test7(self): m = Chem.MolFromSmiles("Oc1ccccc1N") frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, resultsAsMols=False) frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, resultsAsMols=False) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) self.assertEqual(set(frags1 + frags2), set(frags))
def test4(self): m = Chem.MolFromSmiles('Cc1ccccc1NC(=O)C(C)[NH+]1CCCC1') # ZINC00000051 frags = rdMMPA.FragmentMol(m, resultsAsMols=False) #for frag in sorted(frags): # print(frag) cores = set(x[0] for x in frags) self.assertTrue('C([*:1])([*:2])[*:3]' in cores) # FIX: this needs to be investigated, it's not currently passing #self.assertTrue('O=C(N[*:3])C([*:1])[*:2]' in cores) self.assertEqual(len(frags), 18) for frag in frags: self.assertEqual(len(frag), 2)
def fragment_mol(smi, smi_id='', mode=0): mol = Chem.MolFromSmiles(smi) outlines = set() if mol is None: sys.stderr.write("Can't generate mol for: %s\n" % smi) else: # heavy atoms if mode == 0 or mode == 1: frags = rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=4, resultsAsMols=False, maxCutBonds=30) frags += rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=3, resultsAsMols=False, maxCutBonds=30) frags = set(frags) for core, chains in frags: output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains) outlines.add(output) # hydrogen splitting if mode == 1 or mode == 2: mol = Chem.AddHs(mol) n = mol.GetNumAtoms() - mol.GetNumHeavyAtoms() if n < 60: frags = rdMMPA.FragmentMol(mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=False, maxCutBonds=100) for core, chains in frags: output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains) outlines.add(output) return outlines
def test9(self): m = Chem.MolFromSmiles("Oc1ccccc1N") try: frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=0, maxCutBonds=21, resultsAsMols=False) self.assertTrue(False) # should not get here except ValueError as e: self.assertEqual(str(e), "supplied maxCuts is less than minCuts") try: frags1 = rdMMPA.FragmentMol(m, minCuts=0, maxCuts=0, maxCutBonds=21, resultsAsMols=False) self.assertTrue(False) # should not get here except ValueError as e: self.assertEqual(str(e), "minCuts must be greater than 0")
def test6(self): m = Chem.MolFromSmiles( "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N" ) # ALPHA-CONOTOXIN SI frags = rdMMPA.FragmentMol(m, resultsAsMols=False) self.assertFalse(len(frags)) frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, resultsAsMols=False) frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, resultsAsMols=False) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) self.assertEqual(set(frags1 + frags2), set(frags)) self.assertEqual(set(frags1).intersection(set(frags2)), set())
def fragment(self, molecule_pair): graph_cores = [] graph_sidechains = [] for molecule in molecule_pair: graph_frags = rdMMPA.FragmentMol(Chem.MolFromSmiles( molecule.smiles), maxCuts=1, resultsAsMols=False) if len(graph_frags) > 0: _, graph_frags = map(list, zip(*graph_frags)) for frag_pair in graph_frags: core, sidechain = frag_pair.split(".") graph_cores.append( Chem.MolFromSmiles(core.replace("[*:1]", "[1*]"))) graph_sidechains.append( Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]"))) return graph_cores, graph_sidechains
def fragmentate( self, molecule_pair: Tuple[Chem.Mol, Chem.Mol] ) -> Tuple[List[Chem.Mol], List[Chem.Mol]]: molecule_cores = [] molecule_sidechains = [] for molecule in molecule_pair: molecule_frags = rdMMPA.FragmentMol(molecule, maxCuts=1, resultsAsMols=False) _, molecule_frags = map(list, zip(*molecule_frags)) for molecule_pair in molecule_frags: core, sidechain = molecule_pair.split(".") molecule_cores.append( Chem.MolFromSmiles(core.replace("[*:1]", "[1*]"))) molecule_sidechains.append( Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]"))) return molecule_cores, molecule_sidechains
def test2(self): m = Chem.MolFromSmiles('c1ccccc1OC') frags = rdMMPA.FragmentMol(m, resultsAsMols=False) self.assertEqual(len(frags), 3) for frag in frags: self.assertEqual(len(frag), 2) frags = sorted(frags) self.assertEqual(frags[0][0], '') self.assertEqual(frags[1][0], '') self.assertNotEqual(frags[2][0], '') self.assertNotEqual(frags[0][1], '') self.assertNotEqual(frags[1][1], '') self.assertNotEqual(frags[2][1], '') self.assertEqual(frags[0][1], 'CO[*:1].c1ccc(cc1)[*:1]') self.assertEqual(frags[1][1], 'C[*:1].c1ccc(cc1)O[*:1]') self.assertEqual(frags[2][0], 'O([*:1])[*:2]') self.assertEqual(frags[2][1], 'C[*:2].c1ccc([*:1])cc1')
def fragment_mol(smi, id): mol = Chem.MolFromSmiles(smi) #different cuts can give the same fragments #to use outlines to remove them outlines = set() if (mol == None): sys.stderr.write("Can't generate mol for: %s\n" % (smi)) else: frags = rdMMPA.FragmentMol(mol, pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]", resultsAsMols=False) for core, chains in frags: output = '%s,%s,%s,%s' % (smi, id, core, chains) if (not (output in outlines)): outlines.add(output) return outlines
def __fragment_mol_link(mol1, mol2, radius=3, keep_stereo=False, protected_ids_1=None, protected_ids_2=None, return_ids=True): def filter_frags(frags, protected_ids): output = [] protected_ids = set(protected_ids) for _, chains in frags: for atom in chains.GetAtoms(): if atom.GetAtomicNum() == 0: for d in atom.GetNeighbors(): if d.GetAtomicNum() != 1 and d.GetIdx() not in protected_ids: output.append((None, chains)) return output def prep_frags(frags, keep_stereo=False): # frags is a list of tuples [(None, frag_mol_1), (None, frag_mol_2), ...] ls = [] for _, chains in frags: ids = [] for atom in chains.GetAtoms(): if atom.GetAtomicNum() == 0: for d in atom.GetNeighbors(): if d.GetAtomicNum() == 1: ids = [d.GetIntProp('Index')] if ids: break # only one such occurrence can be a, b = Chem.MolToSmiles(chains, isomericSmiles=keep_stereo).split('.') if a == '[H][*:1]': ls.append([b, ids]) else: ls.append([a, ids]) return ls if protected_ids_1 or protected_ids_2: return_ids = True if return_ids: for atom in mol1.GetAtoms(): atom.SetIntProp("Index", atom.GetIdx()) for atom in mol2.GetAtoms(): atom.SetIntProp("Index", atom.GetIdx()) frags_1 = rdMMPA.FragmentMol(mol1, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100) frags_2 = rdMMPA.FragmentMol(mol2, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100) if protected_ids_1: frags_1 = filter_frags(frags_1, protected_ids_1) if protected_ids_2: frags_2 = filter_frags(frags_2, protected_ids_2) frags_1 = prep_frags(frags_1, keep_stereo) frags_2 = prep_frags(frags_2, keep_stereo) for i in range(len(frags_1)): frags_1[i][0] = frags_1[i][0].replace('*:1', '*:2') q = [] for (fr1, ids1), (fr2, ids2) in product(frags_1, frags_2): q.append(['%s.%s' % (fr1, fr2), ids1, ids2]) fake_core = '[*:1]C[*:2]' output = [] for (chains, ids_1, ids_2) in q: env, frag = get_canon_context_core(chains, fake_core, radius=radius, keep_stereo=keep_stereo) output.append((env, '[H][*:1].[H][*:2]', ids_1, ids_2)) return output # list of tuples (env smiles, core smiles, list of atom ids)
def __fragment_mol(mol, radius=3, return_ids=True, keep_stereo=False, protected_ids=None, symmetry_fixes=False): """ INPUT: mol - Mol radius - integer, number of bonds to cut context keep_stereo - bool, keep or discard information about stereoconfiguration protected_ids - set/list/tuple os atom ids which cannot be present in core fragments symmetry_fixes - if set, then duplicated fragments having different ids will be returned (useful when one wants to alter only small part of a molecule and it is important atoms with which ids will be replaced) OUTPUT: list of tuples (env_smi, core_smi, tuple of core atom ids) ('C[*:1].C[*:2]', 'CC(C(=O)O)c1ccc(CC([*:1])[*:2])c(Br)c1', (1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) ('Cc(c)c(cc)[*:1]', '[H][*:1]', (25,)) If input mol has explicit hydrogens the output will contain also fragments where core = [H][*:1]. Smiles of fragments with heavy atoms will contain only heavy atoms """ def get_atom_prop(molecule, prop="Index"): res = [] for a in molecule.GetAtoms(): if a.GetAtomicNum(): res.append(a.GetIntProp(prop)) return tuple(sorted(res)) if protected_ids: return_ids = True # due to the bug https://github.com/rdkit/rdkit/issues/3040 # outputs of rdMMPA.FragmentMol calls will contain duplicated fragments # their are removed by using this set output = set() # set original atom idx to keep them in fragmented mol if return_ids: for atom in mol.GetAtoms(): atom.SetIntProp("Index", atom.GetIdx()) # heavy atoms frags = rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=4, resultsAsMols=True, maxCutBonds=30) frags += rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=3, resultsAsMols=True, maxCutBonds=30) # hydrogen atoms frags += rdMMPA.FragmentMol(mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100) for i, (core, chains) in enumerate(frags): if core is None: # single cut components = list(Chem.GetMolFrags(chains, asMols=True)) ids_0 = get_atom_prop(components[0]) if return_ids else tuple() ids_1 = get_atom_prop(components[1]) if return_ids else tuple() if Chem.MolToSmiles( components[0]) != '[H][*:1]': # context cannot be H env, frag = get_canon_context_core(components[0], components[1], radius, keep_stereo) output.add((env, frag, ids_1)) if Chem.MolToSmiles( components[1]) != '[H][*:1]': # context cannot be H env, frag = get_canon_context_core(components[1], components[0], radius, keep_stereo) output.add((env, frag, ids_0)) else: # multiple cuts # there are no checks for H needed because H can be present only in single cuts env, frag = get_canon_context_core(chains, core, radius, keep_stereo) output.add( (env, frag, get_atom_prop(core) if return_ids else tuple())) if symmetry_fixes: extended_output = __extend_output_by_equivalent_atoms(mol, output) if extended_output: output.update(extended_output) if protected_ids: protected_ids = set(protected_ids) output = [item for item in output if protected_ids.isdisjoint(item[2])] return list( output) # list of tuples (env smiles, core smiles, list of atom ids)