def predict_outcome(self, list_of_mols, k=1): """ Using a predictor, produce top-k most likely reactions Params: :list_of_mols: list of reactants and reagents (both of class Molecule) (former contribute atoms, latter don't) """ react = ".".join([m.smiles for m in list_of_mols]) (react, bond_preds, bond_scores, cur_att_score) = self.directcorefinder.predict(react) #---> TODO: add input check here: some molecules seem to be 'unparseable' <---# # this might be a problem of Rexgen, though outcomes = self.directcandranker.predict(react, bond_preds, bond_scores) res = [] for out in outcomes[:k]: smiles = out["smiles"][0] mol = Molecule(smiles) mol.set_synthesis(list_of_mols) res.append(mol) return res
def get_chembl(n_mols=None, as_mols=True): """ Return list of SMILES """ path = "./datasets/ChEMBL.txt" with open(path, "r") as f: if n_mols is None: res = [line.strip() for line in f] else: res = [f.readline().strip() for _ in range(n_mols)] return [Molecule(smile) for smile in res]
def test_sas(self): sas_func = lambda mol: calculateSAScore(Chem.MolFromSmiles(mol.smiles)) print(sas_func(Molecule("CC"))) test_pool = [ "CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1", "c1ccccc1" ] test_pool = [Molecule(smiles) for smiles in test_pool] exp = RandomExplorer(sas_func, initial_pool=test_pool) print("Starting SA score optimization") t0 = time() exp.evolve(10) #check print("Completed SA score optimization, time elapsed: %.3fs" % (time() - t0)) print(exp.pool) top = exp.get_best(1)[0] print(top.get_synthesis_path())
def test_len(self): dummy_func = lambda mol: len(mol.smiles) test_pool = [ "CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1", "c1ccccc1" ] test_pool = [Molecule(smiles) for smiles in test_pool] exp = RandomExplorer(dummy_func, initial_pool=test_pool) print("Starting len of SMILES optimization") exp.evolve(2) #check print(exp.pool)
def ga_optimise_from_args(func, max_capital): # the func may accept iterable or a single Molecule mol = Molecule(smiles="c1cc(OCCCN2CCCCC2)ccc1CN1CCC2(CC1)OCCO2") try: func(mol) func_ = func except Exception as e: # print("Failed,", e) func_ = lambda m: func([m]) explorer = RandomExplorer(func_) explorer.evolve(max_capital) top = explorer.get_best(k=1) val = func(top) return top, val
def test_chembl(self): """ Problem with fixed-prop testing: Almost all of the results (<10% for init_pool of 50) seem to be outside of the database, and even less for smaller pool. Hence cannot get its score for testing; setting them to zero leads to slow exploration. """ pool_all, dd = chembl_prop_exp() # loading with mol conversions takes 8 minutes # pool_all = [Molecule(smiles, conv_enabled=True) for smiles in tqdm(pool_all[:10000])] pool_all = [ Molecule(smiles, conv_enabled=False) for smiles in pool_all ] start_pool = list(np.random.choice(pool_all, size=100, replace=False)) def print_props(pool): props = [dd[mol.smiles] for mol in pool] print("Props of pool", len(pool), np.min(props), np.mean(props), np.max(props)) print_props(pool_all) print_props(start_pool) func = lambda mol: dd[mol.smiles] exp = RandomExplorer(func, initial_pool=start_pool) print("Starting ChEMBL score 1 optimization") t0 = time() exp.evolve(30) print("Completed ChEMBL score 1 optimization, time elapsed: %.3fs" % (time() - t0)) # print(exp.pool) top = exp.get_best(1)[0] print(top.get_synthesis_path()) print("Best achieved score: %.3f" % func(top)) props = [dd[mol.smiles] for mol in pool_all] print("Best possible score: %.3f" % np.max(props))
def test_wl_kernel(self): mols = [Molecule(S1), Molecule(S2), Molecule(S3)] print(mol_kernels.compute_wl_kernel(mols))
def test_edgehist_kernel(self): mols = [Molecule(S1), Molecule(S2)] print(mol_kernels.compute_edgehist_kernel(mols))
def test_conversions(self): mol = Molecule(S1) graph = mol_kernels.mol2graph_igraph(mol) print(graph)