def __init__(self, donor="[#7,#8,#16][H]", acceptor="[N,O,F,-{1-};!+{1-}]", distance=3.5, angles=(130, 180)): self.donor = MolFromSmarts(donor) self.acceptor = MolFromSmarts(acceptor) self.distance = distance self.angles = tuple(radians(i) for i in angles)
def __init__(self, cation="[+{1-}]", pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1"), distance=4.5, angles=(0, 30)): self.cation = MolFromSmarts(cation) self.pi_ring = [MolFromSmarts(s) for s in pi_ring] self.distance = distance self.angles = tuple(radians(i) for i in angles)
def __init__(self, donor="[#6,#7,Si,F,Cl,Br,I]-[Cl,Br,I,At]", acceptor="[#7,#8,P,S,Se,Te,a;!+{1-}][*]", distance=3.5, axd_angles=(130, 180), xar_angles=(80, 140)): self.donor = MolFromSmarts(donor) self.acceptor = MolFromSmarts(acceptor) self.distance = distance self.axd_angles = tuple(radians(i) for i in axd_angles) self.xar_angles = tuple(radians(i) for i in xar_angles)
def _rules_molvs(): """Rules to neutralize compounds. Inspired by molvs.""" ans = {} # Neutralizable positive charge (with hydrogens attached) # ans["pos_h"] = Chem.MolFromSmarts('[+!H0!$(*~[-])]') ans["pos_h"] = MolFromSmarts('[+!H0]') # Non-neutralizable positive charge (no hydrogens attached) # ans["pos_quat"] = Chem.MolFromSmarts('[+H0!$(*~[-])]') # Negative charge, not bonded to a positive charge with no hydrogens # ans["neg"] = Chem.MolFromSmarts('[-!$(*~[+H0])]') ans["neg"] = MolFromSmarts('[-]') # Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen? # ans["neg_acid"] = Chem.MolFromSmarts('[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]') return ans
def _atom_matches_smarts(atom, smarts): idx = atom.GetIdx() patt = MolFromSmarts(smarts) for m in atom.GetOwningMol().GetSubstructMatches(patt): if idx in m: return True return False
def get_MCSs(test_mols, known_mols, nns_indices=None, murcko_scaff=False): if nns_indices is None: nns_indices = [np.arange(len(known_mols))]*len(test_mols) if murcko_scaff: f = lambda x: GetScaffoldForMol(x) else: f = lambda x: x known_mols = np.array(known_mols) MCSs, MCS_matches, NN_mols, NN_MCS_matches = [], [], [], [] for query_mol, nn_i in list(zip(test_mols, nns_indices)): known_subset = known_mols[nn_i] query_MCS = [get_mcs(query_mol, m, f) for m in tqdm(known_subset)] query_MCS_sim = [m.numAtoms for m in query_MCS] NN_mol = known_subset[np.argmax(query_MCS_sim)] mcs = query_MCS[np.argmax(query_MCS_sim)] mcs_mol = MolFromSmarts(mcs.smartsString) NN_mol_match = NN_mol.GetSubstructMatch(mcs_mol) query_mol_match = query_mol.GetSubstructMatch(mcs_mol) MCSs.append(mcs) MCS_matches.append(query_mol_match) NN_mols.append(NN_mol) NN_MCS_matches.append(NN_mol_match) return MCSs, MCS_matches, NN_mols, NN_MCS_matches
def __init__(self, centroid_distance=6.0, shortest_distance=3.8, plane_angles=(0, 90), pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1")): self.pi_ring = [MolFromSmarts(s) for s in pi_ring] self.centroid_distance = centroid_distance self.shortest_distance = shortest_distance**2 self.plane_angles = tuple(radians(i) for i in plane_angles)
def _parse_smarts_keys(smarts_keys: list) -> list: olist = [] for key in smarts_keys: olist.append(MolFromSmarts(key)) if not olist[-1]: print("Could not parse SMARTS '%s'." % key) return olist
def check_similarity_between_generic_and_complete_representation( generic_smiles, complete_smiles): complete_smiles, _ = NeutraliseCharges(complete_smiles) generic_smiles, _ = NeutraliseCharges(generic_smiles) complete_mol = MolFromSmiles(complete_smiles) generic_mol = MolFromSmarts(generic_smiles) match = complete_mol.GetSubstructMatch(generic_mol) if match: return True return False
def matches(self, smarts): """ Returns true if the RDMol matches the given SMARTS query. Uses a client-side RDKit installation. Returns ------- matches : boolean True if the rdmol molecule attribute matches the given SMARTS query. """ return self.rdmol.HasSubstructMatch(MolFromSmarts(smarts))
def klekota_roth(mols): from ._krfp_smarts import KRFP_SMARTS return ( _rdkit_dense_fingerprinter( mols=mols, which=(lambda mol: [ len(mol.GetSubstructMatches(MolFromSmarts(smarts))) for smarts in KRFP_SMARTS ]), ), _zfill_dense_header(np.array(range(len(KRFP_SMARTS)), dtype=np.str)), )
class OriginalRule03(ScaffoldMinFilterRule): """Choose the parent scaffold with the smallest number of acyclic linker bonds""" acyc_linker_smarts = MolFromSmarts('*!@!=!#*') def get_property(self, child, parent): matches = parent.mol.GetSubstructMatches(self.acyc_linker_smarts) return len(matches) @property def name(self): return 'original rule 03'
def _amide_bond(bond): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if ((a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 ) or (a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7)): # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv patt = MolFromSmarts('C(=O)-N') for m in bond.GetOwningMol().GetSubstructMatches(patt): if a1.GetIdx() in m and a2.GetIdx() in m: return True return False
def _rules_rdkit(): patts = ( ('[n+;H]', 'n'), # Imidazoles ('[N+;!H0]', 'N'), # Amines ('[$([O-]);!$([O-][#7])]', 'O'), # Carboxylic acids and alcohols ('[S-;X1]', 'S'), # Thiols ('[$([N-;X2]S(=O)=O)]', 'N'), # Sulfonamides ('[$([N-;X2][C,N]=C)]', 'N'), # Enamines ('[n-]', '[nH]'), # Tetrazoles ('[$([S-]=O)]', 'S'), # Sulfoxides ('[$([N-]C=O)]', 'N'), # Amides ) return [(MolFromSmarts(x), MolFromSmiles(y, False)) for x, y in patts]
def OH_count(smile): ''' Given the SMILES, this function compute the number of OH group in the chemical. Inputs: - smiles (str) Outputs: - Count of OH group in the molecule (int) (NaN if not found) ''' try: m = MolFromSmiles(smile) patt = MolFromSmarts('[OX2H]') return len(m.GetSubstructMatches(patt)) except: return 'NaN'
def parse_sma(self, sma): """parse smarts and return SmartsPattern after storing in global dict or return from global dict""" if sma in self.pat: return self.pat[sma] newpat = MolFromSmarts(sma) if newpat: if len(self.pat) < self.maxsma: #plpy.notice('new pat for "%s"' % sma) pass else: self.pat.popitem() #key,pat = self.pat.popitem() #plpy.notice('pattern reuse %s for %s' % (key,sma)) self.pat[sma] = newpat return newpat else: #plpy.notice('pattern None') return None
class OriginalRule12(ScaffoldFilterRule): """Remove rings first where the linker is attached to a ring hetero atom at either end of the linker""" connection_patt = MolFromSmarts('[R]!@!=*') def condition(self, child, parent): removed_ring = child.rings[parent.removed_ring_idx] connections = { x[0] for x in child.mol.GetSubstructMatches(self.connection_patt) } ring_connections = connections.intersection(removed_ring.aix) connection_atomic_nums = [ child.atoms[x].GetAtomicNum() for x in ring_connections ] return len([a for a in connection_atomic_nums if a != 1 and a != 6]) > 0 @property def name(self): return 'original rule 12'
def _mols2imageStream(mols, f, format, size, legend, highlightMatch=None): """Return an input stream for the molecule as drawn""" highlights = None if highlightMatch: pattern = MolFromSmarts(highlightMatch) highlights = [mol.GetSubstructMatch(pattern) for mol in mols] kek = True if mols[0].HasProp("_drawingBondsWedged"): kek = False fit = False options = DrawingOptions() subim = (size, size) if size > 150: subim = (size * 2, size * 2) options.coordScale = 3 options.bondLineWidth = 3.6 options.dblBondOffset = 0.435 options.atomLabelFontSize = 60 if kek: options.bondLineWidth = 4.5 options.dblBondOffset = 0.6 options.atomLabelFontSize = 150 fit = True elif kek: options.dblBondOffset = 0.4 image = Draw.MolsToGridImage(mols, molsPerRow=min(len(mols), 4), subImgSize=subim, kekulize=kek, highlightAtomLists=highlights, fitImage=fit, options=options) image.save(f, format)
class SCPNumLinkerBonds(_MinMaxScaffoldFilterRule): """Filter by number of linker bonds in the parent scaffold. Specify 'min' to prioritize scaffolds with the smallest number of acyclic linker bonds. Specify 'max' to prioritize scaffolds with the largest number of acyclic linker bonds. Parameters ---------- min_max : {'min', 'max'} Specify 'min' or 'max' to define the function used to prioritize scaffolds based on the returned property. """ acyc_linker_smarts = MolFromSmarts('*!@!=!#*') def __init__(self, min_max): super().__init__(min_max) def get_property(self, child, parent): matches = parent.mol.GetSubstructMatches(self.acyc_linker_smarts) return len(matches)
if mol is not None: strings.append(s) values.append(v) return strings, values def duplicate_analysis(df, key_col, val_col): idx_dup_f = df.duplicated(subset=key_col, keep="first") idx_dup = df.duplicated(subset=key_col, keep=False) per_dup = np.sum(idx_dup_f) / len(df) df_dup = df[idx_dup] stds = df_dup.groupby(key_col)[val_col].std() return per_dup, stds PATTERN = MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]") def neutralize_atoms(mol, pattern): at_matches = mol.GetSubstructMatches(pattern) at_matches_list = [y[0] for y in at_matches] if len(at_matches_list) > 0: for at_idx in at_matches_list: atom = mol.GetAtomWithIdx(at_idx) chg = atom.GetFormalCharge() hcount = atom.GetTotalNumHs() atom.SetFormalCharge(0) atom.SetNumExplicitHs(hcount - chg) atom.UpdatePropertyCache() return mol
def count_rings(mol): match = MolFromSmarts('c1ccccc1') ret = mol.GetSubstructMatches(match) n_rings = len(ret) return n_rings
def _molFromSmarts(smarts): return MolFromSmarts(str(smarts))
def __init__(self, lig_pattern, prot_pattern, distance): self.lig_pattern = MolFromSmarts(lig_pattern) self.prot_pattern = MolFromSmarts(prot_pattern) self.distance = distance
def smarts_reader(smarts, **kwargs): kwargs.setdefault('mergeHs', True) return MolFromSmarts(smarts, **kwargs)