def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def _generateFPs(mol, fragmentMethod='Morgan'): aBits = {} fp = None # circular Morgan fingerprint fragmentation, we use a simple invariant than ususal here if fragmentMethod == 'Morgan': tmp = {} fp = AllChem.GetMorganFingerprint( mol, radius=2, invariants=utilsFP.generateAtomInvariant(mol), bitInfo=tmp) aBits = utilsFP.getMorganEnvironment(mol, tmp, fp=fp, minRad=2) fp = fp.GetNonzeroElements() # path-based RDKit fingerprint fragmentation elif fragmentMethod == 'RDK': fp = AllChem.UnfoldedRDKFingerprintCountBased(mol, maxPath=5, minPath=3, bitInfo=aBits) fp = fp.GetNonzeroElements() # get the final BRICS fragmentation (= smallest possible BRICS fragments of a molecule) elif fragmentMethod == 'Brics': fragMol = BRICS.BreakBRICSBonds(mol) propSmi = _prepBRICSSmiles(fragMol) fp = Counter(propSmi.split('.')) else: print("Unknown fragment method") return fp, aBits
def get_ddi_mask(atc42SMLES, med_voc): # ATC3_List[22] = {0} # ATC3_List[25] = {0} # ATC3_List[27] = {0} fraction = [] for k, v in med_voc.idx2word.items(): tempF = set() for SMILES in atc42SMLES[v]: try: m = BRICS.BRICSDecompose(Chem.MolFromSmiles(SMILES)) for frac in m: tempF.add(frac) except: pass fraction.append(tempF) fracSet = [] for i in fraction: fracSet += i fracSet = list(set(fracSet)) # set of all segments ddi_matrix = np.zeros((len(med_voc.idx2word), len(fracSet))) for i, fracList in enumerate(fraction): for frac in fracList: ddi_matrix[i, fracSet.index(frac)] = 1 return ddi_matrix
def pair_frags(fname, out, method='Recap', is_mf=True): smiles = pd.read_table(fname).Smiles.dropna() pairs = [] for i, smile in enumerate(tqdm(smiles)): smile = utils.clean_mol(smile) mol = Chem.MolFromSmiles(smile) if method == 'recap': frags = np.array(sorted(Recap.RecapDecompose(mol).GetLeaves().keys())) else: frags = BRICS.BRICSDecompose(mol) frags = np.array(sorted({re.sub(r'\[\d+\*\]', '*', f) for f in frags})) if len(frags) == 1: continue du, hy = Chem.MolFromSmiles('*'), Chem.MolFromSmiles('[H]') subs = np.array([Chem.MolFromSmiles(f) for f in frags]) subs = np.array([Chem.RemoveHs(Chem.ReplaceSubstructs(f, du, hy, replaceAll=True)[0]) for f in subs]) subs = np.array([m for m in subs if m.GetNumAtoms() > 1]) match = np.array([[m.HasSubstructMatch(f) for f in subs] for m in subs]) frags = subs[match.sum(axis=0) == 1] frags = sorted(frags, key=lambda x:-x.GetNumAtoms())[:voc.n_frags] frags = [Chem.MolToSmiles(Chem.RemoveHs(f)) for f in frags] max_comb = len(frags) if is_mf else 1 for ix in range(1, max_comb+1): combs = combinations(frags, ix) for comb in combs: input = '.'.join(comb) if len(input) > len(smile): continue if mol.HasSubstructMatch(Chem.MolFromSmarts(input)): pairs.append([input, smile]) df = pd.DataFrame(pairs, columns=['Frags', 'Smiles']) df.to_csv(out, sep='\t', index=False)
def get_sgs(record_dict, n_min, n_max, method="exhaustive"): if method == "exhaustive": return Chem.rdmolops.FindAllSubgraphsOfLengthMToN( record_dict["mol"], n_min, n_max) elif method == "RECAP": hierarchy = Recap.RecapDecompose(record_dict["mol"]) sgs = [] for substructure in hierarchy.GetAllChildren().values(): substructure = Chem.DeleteSubstructs(substructure.mol, Chem.MolFromSmarts('[#0]')) edge_idxs = get_substructure_bond_idx(substructure, record_dict["mol"]) if edge_idxs is not None: sgs.append(edge_idxs) return subset_sgs_sizes([sgs], n_min, n_max) elif method == "BRICS": substructures = BRICS.BRICSDecompose(record_dict["mol"]) sgs = [] for substructure in substructures: substructure = Chem.DeleteSubstructs( Chem.MolFromSmiles(substructure), Chem.MolFromSmarts('[#0]')) edge_idxs = get_substructure_bond_idx(substructure, record_dict["mol"]) if edge_idxs is not None: sgs.append(edge_idxs) return subset_sgs_sizes([sgs], n_min, n_max)
def brics( mol: Chem.Mol, singlepass: bool = True, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Run BRICS on the molecules and potentially fix dummy atoms. Args: mol: a molecule. singlepass: Single pass for `BRICSDecompose`. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ frags = BRICS.BRICSDecompose(mol, returnMols=True, singlePass=singlepass) frags = list(frags) if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] if remove_parent: frags.pop(0) frags = [x for x in frags if x is not None] return frags
def generate_chemicals_from_fragments(smiles_list, n=10): """ reconstruct chemicals from fragments Paramters ----------------- smiles_list: list of string list of smiles of fragments n: int number of chemicals to be generated Returns --------------- smiles_list: list of string list of newly generated smiles """ # convert smiles to mol objects all_components = [Chem.MolFromSmiles(f) for f in smiles_list] builder = BRICS.BRICSBuild(all_components) generated_mol_list = [] for i in (range(n)): m = next(builder) m.UpdatePropertyCache(strict=True) generated_mol_list.append(m) smiles_list = [Chem.MolToSmiles(m) for m in generated_mol_list] return smiles_list
def fragmentate_chemicals(SMILES_list, return_only_fragments=True): """ fragmentate chemicals by BRICs algorithm Parameters --------------------- SMILES_list: list of string list of smiles return_only_fragments: bool if true, return only fragment parts Returns ---------------------- fragmentated_smiles: list of string list of fragmentated chemicals """ mols = [Chem.MolFromSmiles(SMILES) for SMILES in SMILES_list] fragmentated_smiles = [BRICS.BRICSDecompose(mol) for mol in tqdm(mols)] # nested list to normal list fragmentated_smiles = (list( itertools.chain.from_iterable(fragmentated_smiles))) fragmentated_smiles = list(set(fragmentated_smiles)) if return_only_fragments: fragmentated_smiles = [ i for i in fragmentated_smiles if i.find("*") > 0 ] return fragmentated_smiles
def fragment_iterative(mol, min_length=3): bond_data = list(BRICS.FindBRICSBonds(mol)) try: idxs, labs = zip(*bond_data) except Exception: return [] bonds = [] for a1, a2 in idxs: bond = mol.GetBondBetweenAtoms(a1, a2) bonds.append(bond.GetIdx()) order = np.argsort(bonds).tolist() bonds = [bonds[i] for i in order] frags, temp = [], deepcopy(mol) for bond in bonds: res = break_on_bond(temp, bond) if len(res) == 1: frags.append(temp) break head, tail = res if get_size(head) < min_length or get_size(tail) < min_length: continue frags.append(head) temp = deepcopy(tail) return frags
def fragment_recursive(mol, frags): try: bonds = list(BRICS.FindBRICSBonds(mol)) if bonds == []: frags.append(mol) return frags idxs, labs = list(zip(*bonds)) bond_idxs = [] for a1, a2 in idxs: bond = mol.GetBondBetweenAtoms(a1, a2) bond_idxs.append(bond.GetIdx()) order = np.argsort(bond_idxs).tolist() bond_idxs = [bond_idxs[i] for i in order] broken = Chem.FragmentOnBonds(mol, bondIndices=[bond_idxs[0]], dummyLabels=[(0, 0)]) head, tail = Chem.GetMolFrags(broken, asMols=True) print(mol_to_smiles(head), mol_to_smiles(tail)) frags.append(head) fragment_recursive(tail, frags) except Exception: pass
def fragmenter(thefile): os.remove('output.txt') id = [] for line in open(thefile): line = line.strip() id.append(line) df = pd.DataFrame() df = id count = 0 mylist = [] for y in df: base = Chem.MolFromSmiles(df[count]) catalog = BRICS.BRICSDecompose(base) mcat = [Chem.MolFromSmiles(x) for x in catalog] ms = BRICS.BRICSBuild(mcat) for m in ms: a = Chem.MolToSmiles(m) mylist.append(a) count = count + 1 df2 = pd.DataFrame({'smiles': mylist}) f3 = open('output.txt', 'w+') for j in mylist: print(j, file=f3) f3.close() return mylist
def break_on_rotatable_bonds_to_mol(inmol): """Takes a mol and breaks it on all of the rotatable bonds (well, most of them) - returns a single mol, or None if there are no rotatable bonds""" # Get the indices of the atoms around the bonds to be broken atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(inmol) if p] # Return if no bonds found... as it was given if not atom_pairs: return inmol # Get the bond indices bonds = [ inmol.GetBondBetweenAtoms(at1, at2).GetIdx() for (at1, at2) in atom_pairs ] # Fragment the molecule fragged_mol = Chem.rdmolops.FragmentOnBonds(inmol, bonds, addDummies=False) return fragged_mol
def identify_rotatable_bond_atom_pairs(mol): """find the atom quadruplets around rotatable bonds of a molecule""" # List of tuples of 4 atoms atom_sets = [] # Get the atoms on the ends of rotatable bonds atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(mol) if p] # Go through and get one of the neighbours for each for a1,a2 in atom_pairs: # Get the neighbours for a1 (removing a2) a1_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a1).GetNeighbors()] a1_neighbours.remove(a2) # Get the neighbours for a2 (removing a1) a2_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a2).GetNeighbors()] a2_neighbours.remove(a1) # Add one from either side of the double bond atom_sets.append((a1_neighbours[0], a1, a2, a2_neighbours[0])) # Now have 4 atoms from which we can calculate a dihedral angle return atom_sets
def splitMol(self, mol, bondsToKeep): ''' fragments a molecule on a particular set of BRICS bonds. Partially sanitizes the results ''' bbnds = BRICS.FindBRICSBonds(mol) bndsToTry = [] lbls = [] for aids, lbl in bbnds: if lbl in bondsToKeep: bndsToTry.append(mol.GetBondBetweenAtoms( aids[0], aids[1]).GetIdx()) lbls.append([int(x) for x in lbl]) if not bndsToTry: return [] res = Chem.FragmentOnSomeBonds(mol, bndsToTry, dummyLabels=lbls) # We need at least a partial sanitization for the rest of what we will be doing: for entry in res: entry.UpdatePropertyCache(False) Chem.FastFindRings(entry) return res
def combine_frag(self): self.generate_frag_templates() print('Merging fragments together to generate compounds...') for current_template in self.potential_cpd_templates: fragms = [Chem.MolFromSmiles(x) for x in sorted(current_template)] ms = BRICS.BRICSBuild(fragms) prods = [next(ms) for x in range(1)] # mini_frags = self.collect_mini_frags_from_each_template(current_template) # percent = len(mini_frags) # counter = 0 for i in range(1): # for j in range(len(mini_frags)): sampler = Chem.MolToSmiles(prods[i], True) # if mini_frags[j] in sampler: # counter+=1 # if counter == percent: if sampler not in self.templates: print(sampler) self.templates.append(sampler) self.chembank.write(sampler + '\n')
def fragment_database(self): fName = 'C:/RDKit_2017_03_2/Data/FunctionalGroups.txt' fparams = FragmentCatalog.FragCatParams(1, 6, fName) self.fcat = FragmentCatalog.FragCatalog(fparams) ## macrocycle_file = 'macrocycles_IDs.csv' ## suppl = [i.split(',')[0] for i in open(self.directory+name,'r').read().splitlines()][1:] # read all the macrocycle smiles from file ## ms = [Chem.MolFromSmiles(i) for i in suppl] # mols of macrocycles zinc_file = 'data/smiles_database.csv' zinc_suppl = [ i.split(',')[1] for i in open(self.directory + zinc_file, 'r').read().splitlines() ][1:] zinc_ms = [Chem.MolFromSmiles(i) for i in zinc_suppl] pre_synthetic_frag_database = [ BRICS.BRICSDecompose(i) for i in zinc_ms ] self.synthetic_frag_database = list( set(chain.from_iterable(pre_synthetic_frag_database)))
def getBits(mol): ''' Parameters ---------- mol : rdkit mol object to be broken up into fragments by breaking rotable bonds Returns ------- mols : A list of rdkit mol objects ''' # find the rotatable bonds bonds = mol.GetSubstructMatches(RotatableBondSmarts) bonds = [((x, y), (0, 0)) for x, y in bonds] p = BRICS.BreakBRICSBonds(mol, bonds=bonds) mols = [mol for mol in Chem.GetMolFrags(p, asMols=True)] return mols
def __init__(self, radius, fpSize, IC50function, molFile): self.fpgen = rdFingerprintGenerator.GetMorganGenerator( radius=radius, fpSize=fpSize) self.getIC50 = IC50function self.molFile = molFile # Open SMILES file and convert each sequence to rdkit molecule with open(self.molFile) as f: raw_text = f.read() raw_data = raw_text.split("\n") mol_list = [Chem.MolFromSmiles(x) for x in raw_data[:1000]] self.ms = [rdMolStandardize.FragmentParent(x) for x in mol_list] # Get a count of the BRICS bonds within the molecules cntr = Counter() for m in self.ms: bbnds = BRICS.FindBRICSBonds(m) for aids, lbls in bbnds: cntr[lbls] += 1 freqs = sorted([(y, x) for x, y in cntr.items()], reverse=True) # Keep the top 10 bonds self.bondsToKeep = [y for x, y in freqs]
def get_scaffolds(self, scaffolding_method=ScaffoldingMethod.MurckoScaffold): """Compute deemed scaffolds for a given compound. Args: scaffolding_method (ScaffoldingMethod, optional): Defaults to MurckoScaffold. Scaffolding method to use Returns: list[rdkit.Chem.rdchem.Mol]: Scaffolds found in the component. """ try: scaffolds = [] if scaffolding_method == ScaffoldingMethod.MurckoScaffold: scaffolds = [(MurckoScaffold.GetScaffoldForMol(self.mol_no_h))] elif scaffolding_method == ScaffoldingMethod.MurckoGeneric: scaffolds = [ (MurckoScaffold.MakeScaffoldGeneric(self.mol_no_h)) ] elif scaffolding_method == ScaffoldingMethod.Brics: scaffolds = BRICS.BRICSDecompose(self.mol_no_h) brics_smiles = [ re.sub(r"(\[[0-9]*\*\])", "[H]", i) for i in scaffolds ] # replace dummy atoms with H's to get matches https://sourceforge.net/p/rdkit/mailman/message/35261974/ brics_mols = [ rdkit.Chem.MolFromSmiles(x) for x in brics_smiles ] for mol in brics_mols: rdkit.Chem.RemoveHs(mol) brics_hits = [ self.mol_no_h.GetSubstructMatches(i) for i in brics_mols ] for index, brics_hit in enumerate(brics_hits): smiles = rdkit.Chem.MolToSmiles(brics_mols[index]) name = scaffolding_method.name source = 'RDKit scaffolds' key = f'{name}_{smiles}' brics_hit = conversions.listit(brics_hit) if not smiles: continue if key not in self._scaffolds: self._scaffolds[key] = SubstructureMapping( name, smiles, source, brics_hit) return brics_mols for s in scaffolds: scaffold_atom_names = [ atom.GetProp('name') for atom in s.GetAtoms() ] mapping = [] for at_name in scaffold_atom_names: idx = [ atom.GetIdx() for atom in self.mol.GetAtoms() if atom.GetProp('name') == at_name ][0] mapping.append(idx) smiles = rdkit.Chem.MolToSmiles(s) name = scaffolding_method.name source = 'RDKit scaffolds' if not smiles: continue if name in self._scaffolds: self._scaffolds[name].mappings.append(mapping) else: self._scaffolds[name] = SubstructureMapping( name, smiles, source, [mapping]) return scaffolds except (RuntimeError, ValueError): raise CCDUtilsError( f'Computing scaffolds using method {scaffolding_method.name} failed.' )
print 'Argument List:', str(sys.argv) if len(sys.argv ) == 3 : inp_sdf_file = sys.argv[1] out_sdf_file = sys.argv[2] else: sys.exit ("Usage: fragmenter.py infile outfile") try: suppl = Chem.SDMolSupplier(inp_sdf_file) catalog=set() for mol in suppl: if mol is None: continue print mol.GetNumAtoms() #AllChem.Compute2DCoords(mol) pieces = BRICS.BRICSDecompose(mol) catalog.update(pieces) print('Generated: ', len(catalog), ' fragments.') ofile = Chem.SDWriter(out_sdf_file) for frg in catalog: cmol = Chem.MolFromSmiles(frg) AllChem.Compute2DCoords(cmol) ofile.write(cmol) except IOError: print >> sys.stderr, "Input file could not be opened" sys.exit(1)
fgData = """AcidChloride C(=O)Cl Acid Chloride CarboxylicAcid C(=O)[O;H,-] Carboxylic acid SulfonylChloride [$(S-!@[#6])](=O)(=O)(Cl) Sulfonyl Chloride Amine [N;!H0;$(N-[#6]);!$(N-[!#6]);!$(N-C=[O,N,S])] Amine BoronicAcid [$(B-!@[#6])](O)(O) Boronic Acid Isocyanate [$(N-!@[#6])](=!@C=!@O) Isocyanate Alcohol [O;H1;$(O-!@[#6;!$(C=!@[O,N,S])])] Alcohol Aldehyde [CH;D2;!$(C-[!#6])]=O Aldehyde Halogen [$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))] Halogen""" fglines = [re.split(r'\t+', x.strip()) for x in fgData.split('\n')] hLabels = [x[0] for x in fglines] patts = [Chem.MolFromSmarts(x[1]) for x in fglines] labels = inLs[0].strip().split(delim) + hLabels + ['HasBRICSBond?'] print(delim.join(labels)) for line in inLs[1:]: splitL = line.strip().split(delim) mol = Chem.MolFromSmiles(splitL[1]) for fg in patts: if mol.HasSubstructMatch(fg): splitL.append('True') else: splitL.append('False') bricsRes = BRICS.BRICSDecompose(mol) if len(bricsRes) > 1: splitL.append('True') else: splitL.append('False') print(delim.join(splitL))
random.seed(1) max_number_of_generated_structures = 100 molecules = [ molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf') if molecule is not None ] # molecules = [molecule for molecule in Chem.SmilesMolSupplier('logSdataset1290_2d.smi', # delimiter='\t', titleLine=False) # if molecule is not None] print(len(molecules)) fragments = set() for molecule in molecules: fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2) # print(fragment) # print(list(BRICS.FindBRICSBonds(molecule))) fragments.update(fragment) print(len(fragments)) # print (fragments) generated_structures = BRICS.BRICSBuild( [Chem.MolFromSmiles(smiles) for smiles in fragments]) writer = Chem.SDWriter('generated_structures.sdf') # writer = Chem.SmilesWriter('generated_structures.smi') number_of_generated_structures = 0 for generated_structure in generated_structures: generated_structure.UpdatePropertyCache(True) AllChem.Compute2DCoords(generated_structure) writer.write(generated_structure)
def get_fc0_frags(self): """ This function breaks every bond and enumerates all the FC0 fragments. This is a needed first step, since these are used to build up all the more complex fragments. :return: """ fc0_frags = [] brics = BRICS.FindBRICSBonds(self.init_mol) brics_counter = Counter( counter=100 ) # counter starts at 100 since it's not going to be in use as a p.atom edges = [] for bond, brics_type in brics: i, j = bond x, y = brics_type pseudo_i = brics_counter.increment() pseudo_j = brics_counter.increment( ) # this is always done in pairs so that eg. 100 and 101 go together self.brics_legend[pseudo_i] = int( x) # remember to store the BRICS typing, we need it later self.brics_legend[pseudo_j] = int(y) edges.append((pseudo_i, pseudo_j)) # keep a record of this bond self.breakAndReplace(i, j, pseudo_i, pseudo_j) self.edges = edges # this while loop finds parts of a graph that are no longer connected, meaning FC0 fragments in this case untouched = set(self.atoms.keys() ) # keep a list of atoms we haven't investigated yet while untouched: # if there are still atoms we haven't processed currSet = set() currSet.add(untouched.pop()) newMol = RecomposerMol() newAtoms = {} newPseudoAtoms = {} new_brics_legend = {} new_brics_legend.update(self.brics_legend) while currSet: # perform graph search currAtom = self.atoms[currSet.pop()] for bond in currAtom.bonds: idx = None if currAtom.index == bond.endAtomIdx: idx = bond.beginAtomIdx elif currAtom.index == bond.beginAtomIdx: idx = bond.endAtomIdx # add connected atoms to curr mol atom set if idx in untouched: currSet.add(idx) untouched.remove(idx) else: continue newAtoms[currAtom.index] = self._copyAtom( currAtom) # watch out for reference errors! if currAtom.atom_num == 0: newPseudoAtoms[currAtom.isotope] = currAtom.index newMol.atoms = newAtoms newMol.pseudoIndex = newPseudoAtoms newMol.brics_legend = new_brics_legend fc0_frags.append(newMol) self.frag_cache.append(newMol) # now need to index the fragments to make it easier to bond them fc0_index = {} for i, frag in enumerate(fc0_frags): for pseudo_atom in frag.pseudoIndex.keys(): fc0_index[ pseudo_atom] = i # each FC0 fragment should be accessible by pseudoatom self.fc0_index = fc0_index self.fragments[0] = fc0_frags
def fragment_target(self): self.target_fragments = list( BRICS.BRICSDecompose(Chem.MolFromSmiles(self.target)))
if (len(Chem.MolToSmiles(rm3).split(".")) > 1): count1 = count1 + 1 [functional_list.append(i) for i in Chem.MolToSmiles(rm3).split(".")] else: if(len(Chem.MolToSmiles(rm2).split(".")) > 1): count2 = count2 + 1 [functional_list.append(i) for i in Chem.MolToSmiles(rm2).split(".")] else: if(len(Chem.MolToSmiles(rm).split(".")) > 1): count3 = count3 + 1 [functional_list.append(i) for i in Chem.MolToSmiles(rm).split(".")] else: pieces_smi = Chem.BRICS.BRICSDecompose(temp) pieces = [Chem.MolFromSmiles(x) for x in BRICS.BRICSDecompose(temp)] count_fail_no_match += 1 print(can_smi) except: count_fail += 1 print(list(set(functional_list))) #retrieve only the found functional groups print(len(list(set(functional_list)))) print(count1, count2, count3) print("total processed: "+ str(count1+count2+count3)) print("no substructured: "+ str(count_fail_no_match)) print("fail processed: "+ str(count_fail))
def update_atom_position(mol1, mol2): mol_copy = Chem.Mol(mol2) # This is a work-around to get a seedSmarts for the FMCS algorithm # and prevent the occassional hanging of FMCS # Might be unnecessary with future versions of rdkit core_frags = BRICS.BRICSDecompose(Chem.RemoveHs(mol1)) frag_smarts = [] for frag in enumerate(core_frags): smi_str = (re.sub('[[1-9][0-9]{0,2}\*]', '[*]', frag[1])) frag_smarts.append( Chem.MolToSmarts(Chem.MolFromSmiles(smi_str)).replace( ':', '~').replace('-', '~').replace('=', '~').replace('#0', '*')) seed = None for query in frag_smarts: if mol_copy.HasSubstructMatch(Chem.MolFromSmarts(query)): seed = query break # Now get MCSS res = rdFMCS.FindMCS([mol1, mol_copy], seedSmarts=seed) mcs_q = Chem.MolFromSmarts(res.smartsString) # Get atom IDs template = mol1.GetSubstructMatches(mcs_q)[0] hit_atom = mol_copy.GetSubstructMatches(mcs_q)[0] # Update XYZ coords of MCSS running_distance = 0 for i in range(0, len(template)): origin = mol1.GetConformer().GetAtomPosition(template[i]) pos = mol_copy.GetConformer().GetAtomPosition(hit_atom[i]) p1 = np.array([origin.x, origin.y, origin.z]) p2 = np.array([pos.x, pos.y, pos.z]) sq_dist = np.sum((p1 - p2)**2, axis=0) dist = np.sqrt(sq_dist) running_distance += dist mol_copy.GetConformer().SetAtomPosition(hit_atom[i], (origin.x, origin.y, origin.z)) if running_distance > 0.1: # relax atoms outside MCSS res_atom = [] for atom in mol_copy.GetAtoms(): if atom.GetIdx() not in hit_atom: res_atom.append(atom.GetIdx()) # do minimization mp = ChemicalForceFields.MMFFGetMoleculeProperties(mol_copy) ff = ChemicalForceFields.MMFFGetMoleculeForceField(mol_copy, mp) for val in hit_atom: ff.AddFixedPoint(val) for val in res_atom: ff.MMFFAddPositionConstraint(val, 1, 5) ff.Minimize() return mol_copy
number_of_free_bonds = 0 # The number of free bond(s) restricted. If 0, all fragments are saved # load molecules molecules = [ molecule for molecule in Chem.SmilesMolSupplier( 'logS_molecules_1290.smi', delimiter='\t', titleLine=False) if molecule is not None ] # molecules = [molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf') if molecule is not None] print('number of molecules :', len(molecules)) # generate fragments fragments = set() for molecule in molecules: fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2) fragments.update(fragment) # select and arange fragments new_fragments = [] number_of_generated_structures = 0 for fragment in fragments: free_bond = [] free_bond = [index for index, atom in enumerate(fragment) if atom == '*'] flag = False if number_of_free_bonds == 0: if len(free_bond): flag = True else: if len(free_bond) == number_of_free_bonds: flag = True
# In[ ]: # In[8]: from rdkit.Chem import AllChem from rdkit.Chem import BRICS from rdkit.Chem import rdMolDescriptors # In[21]: smiles = Chem.MolToSmiles(mol) frag = BRICS.BRICSDecompose(mol) print(smiles) # In[20]: print(len(frag)) # In[11]: print(frag) # In[12]:
if tests[9]: logger.info('Writing: Mol blocks') t1 = time.time() for mol in mols: mb = Chem.MolToMolBlock(mol) t2 = time.time() logger.info('Results10: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[10]: from rdkit.Chem import BRICS logger.info('BRICS decomposition') t1 = time.time() for mol in mols: d = BRICS.BreakBRICSBonds(mol) t2 = time.time() logger.info('Results11: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[11]: logger.info('Generate 2D coords') t1 = time.time() for mol in mols: AllChem.Compute2DCoords(mol) t2 = time.time() logger.info('Results12: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[12]: logger.info('Generate topological fingerprints')
# ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1'] # # 3.2 BRICS方法 # RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。 # 对下图中的分子进行BRICS分解 smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile( m, "/drug_development/studyRdkit/st_rdcit/img/mol34.jpg", size=(600, 400), legend= 'zanubrutinib(C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N)' ) frags = (BRICS.BRICSDecompose(m)) print(frags) mols = [] for fsmi in frags: mols.append(Chem.MolFromSmiles(fsmi)) img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=(200, 200), legends=['' for x in mols]) img.save('/drug_development/studyRdkit/st_rdcit/img/mol35.jpg') # 四、组合分子片段--BRICS方法 # 以上述片段进行BRICS组合产生分子 newms = BRICS.BRICSBuild(mols) newms = list(newms)