def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def fragment_recursive(mol, frags): try: bonds = list(BRICS.FindBRICSBonds(mol)) if bonds == []: frags.append(mol) return frags idxs, labs = list(zip(*bonds)) bond_idxs = [] for a1, a2 in idxs: bond = mol.GetBondBetweenAtoms(a1, a2) bond_idxs.append(bond.GetIdx()) order = np.argsort(bond_idxs).tolist() bond_idxs = [bond_idxs[i] for i in order] broken = Chem.FragmentOnBonds(mol, bondIndices=[bond_idxs[0]], dummyLabels=[(0, 0)]) head, tail = Chem.GetMolFrags(broken, asMols=True) print(mol_to_smiles(head), mol_to_smiles(tail)) frags.append(head) fragment_recursive(tail, frags) except Exception: pass
def fragment_iterative(mol, min_length=3): bond_data = list(BRICS.FindBRICSBonds(mol)) try: idxs, labs = zip(*bond_data) except Exception: return [] bonds = [] for a1, a2 in idxs: bond = mol.GetBondBetweenAtoms(a1, a2) bonds.append(bond.GetIdx()) order = np.argsort(bonds).tolist() bonds = [bonds[i] for i in order] frags, temp = [], deepcopy(mol) for bond in bonds: res = break_on_bond(temp, bond) if len(res) == 1: frags.append(temp) break head, tail = res if get_size(head) < min_length or get_size(tail) < min_length: continue frags.append(head) temp = deepcopy(tail) return frags
def break_on_rotatable_bonds_to_mol(inmol): """Takes a mol and breaks it on all of the rotatable bonds (well, most of them) - returns a single mol, or None if there are no rotatable bonds""" # Get the indices of the atoms around the bonds to be broken atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(inmol) if p] # Return if no bonds found... as it was given if not atom_pairs: return inmol # Get the bond indices bonds = [ inmol.GetBondBetweenAtoms(at1, at2).GetIdx() for (at1, at2) in atom_pairs ] # Fragment the molecule fragged_mol = Chem.rdmolops.FragmentOnBonds(inmol, bonds, addDummies=False) return fragged_mol
def identify_rotatable_bond_atom_pairs(mol): """find the atom quadruplets around rotatable bonds of a molecule""" # List of tuples of 4 atoms atom_sets = [] # Get the atoms on the ends of rotatable bonds atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(mol) if p] # Go through and get one of the neighbours for each for a1,a2 in atom_pairs: # Get the neighbours for a1 (removing a2) a1_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a1).GetNeighbors()] a1_neighbours.remove(a2) # Get the neighbours for a2 (removing a1) a2_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a2).GetNeighbors()] a2_neighbours.remove(a1) # Add one from either side of the double bond atom_sets.append((a1_neighbours[0], a1, a2, a2_neighbours[0])) # Now have 4 atoms from which we can calculate a dihedral angle return atom_sets
def splitMol(self, mol, bondsToKeep): ''' fragments a molecule on a particular set of BRICS bonds. Partially sanitizes the results ''' bbnds = BRICS.FindBRICSBonds(mol) bndsToTry = [] lbls = [] for aids, lbl in bbnds: if lbl in bondsToKeep: bndsToTry.append(mol.GetBondBetweenAtoms( aids[0], aids[1]).GetIdx()) lbls.append([int(x) for x in lbl]) if not bndsToTry: return [] res = Chem.FragmentOnSomeBonds(mol, bndsToTry, dummyLabels=lbls) # We need at least a partial sanitization for the rest of what we will be doing: for entry in res: entry.UpdatePropertyCache(False) Chem.FastFindRings(entry) return res
def __init__(self, radius, fpSize, IC50function, molFile): self.fpgen = rdFingerprintGenerator.GetMorganGenerator( radius=radius, fpSize=fpSize) self.getIC50 = IC50function self.molFile = molFile # Open SMILES file and convert each sequence to rdkit molecule with open(self.molFile) as f: raw_text = f.read() raw_data = raw_text.split("\n") mol_list = [Chem.MolFromSmiles(x) for x in raw_data[:1000]] self.ms = [rdMolStandardize.FragmentParent(x) for x in mol_list] # Get a count of the BRICS bonds within the molecules cntr = Counter() for m in self.ms: bbnds = BRICS.FindBRICSBonds(m) for aids, lbls in bbnds: cntr[lbls] += 1 freqs = sorted([(y, x) for x, y in cntr.items()], reverse=True) # Keep the top 10 bonds self.bondsToKeep = [y for x, y in freqs]
def get_fc0_frags(self): """ This function breaks every bond and enumerates all the FC0 fragments. This is a needed first step, since these are used to build up all the more complex fragments. :return: """ fc0_frags = [] brics = BRICS.FindBRICSBonds(self.init_mol) brics_counter = Counter( counter=100 ) # counter starts at 100 since it's not going to be in use as a p.atom edges = [] for bond, brics_type in brics: i, j = bond x, y = brics_type pseudo_i = brics_counter.increment() pseudo_j = brics_counter.increment( ) # this is always done in pairs so that eg. 100 and 101 go together self.brics_legend[pseudo_i] = int( x) # remember to store the BRICS typing, we need it later self.brics_legend[pseudo_j] = int(y) edges.append((pseudo_i, pseudo_j)) # keep a record of this bond self.breakAndReplace(i, j, pseudo_i, pseudo_j) self.edges = edges # this while loop finds parts of a graph that are no longer connected, meaning FC0 fragments in this case untouched = set(self.atoms.keys() ) # keep a list of atoms we haven't investigated yet while untouched: # if there are still atoms we haven't processed currSet = set() currSet.add(untouched.pop()) newMol = RecomposerMol() newAtoms = {} newPseudoAtoms = {} new_brics_legend = {} new_brics_legend.update(self.brics_legend) while currSet: # perform graph search currAtom = self.atoms[currSet.pop()] for bond in currAtom.bonds: idx = None if currAtom.index == bond.endAtomIdx: idx = bond.beginAtomIdx elif currAtom.index == bond.beginAtomIdx: idx = bond.endAtomIdx # add connected atoms to curr mol atom set if idx in untouched: currSet.add(idx) untouched.remove(idx) else: continue newAtoms[currAtom.index] = self._copyAtom( currAtom) # watch out for reference errors! if currAtom.atom_num == 0: newPseudoAtoms[currAtom.isotope] = currAtom.index newMol.atoms = newAtoms newMol.pseudoIndex = newPseudoAtoms newMol.brics_legend = new_brics_legend fc0_frags.append(newMol) self.frag_cache.append(newMol) # now need to index the fragments to make it easier to bond them fc0_index = {} for i, frag in enumerate(fc0_frags): for pseudo_atom in frag.pseudoIndex.keys(): fc0_index[ pseudo_atom] = i # each FC0 fragment should be accessible by pseudoatom self.fc0_index = fc0_index self.fragments[0] = fc0_frags