def _moltoSVG(mol, sz, highlights, legend, kekulize, drawOptions=None, **kwargs): try: blocker = rdBase.BlockLogs() mol.GetAtomWithIdx(0).GetExplicitValence() except RuntimeError: mol.UpdatePropertyCache(False) kekulize = _okToKekulizeMol(mol, kekulize) try: blocker = rdBase.BlockLogs() mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize) except ValueError: # <- can happen on a kekulization failure mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=False) d2d = rdMolDraw2D.MolDraw2DSVG(sz[0], sz[1]) if drawOptions is not None: d2d.SetDrawOptions(drawOptions) bondHighlights = kwargs.get('highlightBonds', None) d2d.DrawMolecule(mc, legend=legend or "", highlightAtoms=highlights or [], highlightBonds=bondHighlights or []) d2d.FinishDrawing() svg = d2d.GetDrawingText() return svg
def _moltoimg(mol, sz, highlights, legend, returnPNG=False, drawOptions=None, **kwargs): try: blocker = rdBase.BlockLogs() mol.GetAtomWithIdx(0).GetExplicitValence() except RuntimeError: mol.UpdatePropertyCache(False) kekulize = _okToKekulizeMol(mol, kwargs.get('kekulize', True)) wedge = kwargs.get('wedgeBonds', True) try: blocker = rdBase.BlockLogs() mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize, wedgeBonds=wedge) except ValueError: # <- can happen on a kekulization failure mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=False, wedgeBonds=wedge) if not hasattr(rdMolDraw2D, 'MolDraw2DCairo'): img = MolToImage(mc, sz, legend=legend, highlightAtoms=highlights, **kwargs) if returnPNG: bio = BytesIO() img.save(bio, format='PNG') img = bio.getvalue() else: d2d = rdMolDraw2D.MolDraw2DCairo(sz[0], sz[1]) if drawOptions is not None: d2d.SetDrawOptions(drawOptions) if 'highlightColor' in kwargs: d2d.drawOptions().setHighlightColor(kwargs['highlightColor']) # we already prepared the molecule: d2d.drawOptions().prepareMolsBeforeDrawing = False bondHighlights = kwargs.get('highlightBonds', None) if bondHighlights is not None: d2d.DrawMolecule(mc, legend=legend or "", highlightAtoms=highlights or [], highlightBonds=bondHighlights) else: d2d.DrawMolecule(mc, legend=legend or "", highlightAtoms=highlights or []) d2d.FinishDrawing() if returnPNG: img = d2d.GetDrawingText() else: img = _drawerToImage(d2d) return img
def test_chembl(): logging.getLogger().setLevel(logging.INFO) smilesfile = os.path.join(PATH, "CHEMBL2321810.smi") scaffoldfile = os.path.join(PATH, "CHEMBL2321810_scaffold.mol") csvfile = os.path.join(PATH, "CHEMBL2321810_act.csv") assert os.path.exists(smilesfile) mols = [] for line in open(smilesfile): smiles, name = line.strip().split() m = Chem.MolFromSmiles(smiles) m.SetProp("_Name", name) mols.append(m) scaffold = Chem.MolFromMolBlock(open(scaffoldfile).read()) data = {k: float(v) for k, v in list(csv.reader(open(csvfile)))[1:]} scores = [data[m.GetProp("_Name")] for m in mols] assert mols and len(mols) == len(scores) blocker = rdBase.BlockLogs() free = fw.FWDecompose(scaffold, mols, scores) # let's make sure the r squared is decent assert free.r2 > 0.8 # assert we get something preds = list(fw.FWBuild(free)) assert len(preds) # check to see that the prediction filters work preds2 = list(fw.FWBuild(free, pred_filter=lambda x: x > 8)) assert len(preds2) assert len([p for p in preds if p.prediction > 8]) == len(list(preds2)) # check to see that the R groups are output in order, i.e. R10 after R3 s = io.StringIO() fw.predictions_to_csv(s, free, preds2) assert s.getvalue() s2 = io.StringIO(s.getvalue()) for i, row in enumerate(csv.reader(s2)): if i == 0: assert row == [ 'smiles', 'prediction', 'Core_smiles', 'R1_smiles', 'R3_smiles', 'R10_smiles' ] assert i > 0
def FWBuild(fw: FreeWilsonDecomposition, pred_filter=None, mw_filter=None, hvy_filter=None, mol_filter=None) -> Generator[FreeWilsonPrediction, None, None]: """Enumerate the freewilson decomposition and return their predictions :param fw: FreeWilsonDecomposition generated from FWDecompose :param pred_filter: return True if the prediction is in a desireable range e.g. lambda pic50: pic50>=8 :param mw_filter: return True if the enumerated molecular weight is in a desireable rrange e.g. lambda mw: 150 < mw < 550 :param hvy_filter: return True if the enumerated heavy couont is in a desireable rrange e.g. lambda hvy: 10 < hvy < 50 :param mol_filter: return True if the molecule is ok to be enumerated e.g. lambda mol: -3 < Descriptors.MolLogp(mol) < 5 """ blocker = rdBase.BlockLogs() # check groups for cycles cycles = set() rgroups_no_cycles = defaultdict(list) rgroup_cycles = defaultdict(list) for key, rgroup in fw.rgroups.items(): if key == 'Core': rgroups_no_cycles[key] = rgroup continue no_cycles = rgroups_no_cycles[key] for g in rgroup: if len(g.dummies) > 1: cycles.add(g.dummies) rgroup_cycles[g.dummies].append(g) else: no_cycles.append(g) logging.info("Enumerating rgroups with no broken cycles...") for k, v in rgroups_no_cycles.items(): logging.info(f"\t{k}\t{len(v)}") # do the ones that have no cycles first rgroups = [rgroup for key, rgroup in sorted(rgroups_no_cycles.items())] # core is always first for res in _enumerate(rgroups, fw, pred_filter=pred_filter, mw_filter=mw_filter, hvy_filter=hvy_filter, mol_filter=mol_filter): yield res # iterate on rgroups with cycles # basically only let one set of RGroups show up once. indices = set() for k in fw.rgroups: if k[0] == "R": indices.add(int(k[1:])) if cycles: logging.info("Enumerating rgroups with broken cycles...") for rgroup_indices in cycles: missing = indices - set(rgroup_indices) rgroups = {'Core': fw.rgroups['Core']} rgroups["R%s" % ".".join([str(x) for x in rgroup_indices ])] = rgroup_cycles[rgroup_indices] for m in missing: k = "R%s" % m rgroups[k] = rgroups_no_cycles[k] for k, v in rgroups.items(): logging.info(f"\t{k}\t{len(v)}") for res in _enumerate( [rgroup for key, rgroup in sorted(rgroups.items())], fw, pred_filter=pred_filter, mw_filter=mw_filter, hvy_filter=hvy_filter, mol_filter=mol_filter): yield res