コード例 #1
0
    def test_chembl(self):
        """
        Problem with fixed-prop testing:
        Almost all of the results (<10% for init_pool of 50) seem to be outside of the database,
        and even less for smaller pool. Hence cannot get its score for testing;
        setting them to zero leads to slow exploration.
        """
        pool_all, dd = get_chembl_prop()

        # loading with mol conversions takes 8 minutes
        # pool_all = [Molecule(smiles, conv_enabled=True) for smiles in tqdm(pool_all[:10000])]
        pool_all = [Molecule(smiles, conv_enabled=False) for smiles in pool_all]
        start_pool = list(np.random.choice(pool_all, size=100, replace=False))

        def print_props(pool):
            props = [dd[mol.smiles] for mol in pool]
            print("Props of pool", len(pool), np.min(props), np.mean(props), np.max(props))
        print_props(pool_all)
        print_props(start_pool)

        func = lambda mol: dd[mol.smiles]
        exp = RandomExplorer(lambda mol_list: func(mol_list[0]), initial_pool=start_pool)

        print("Starting ChEMBL score 1 optimization")
        t0 = time()
        exp.run(30)
        print("Completed ChEMBL score 1 optimization, time elapsed: %.3fs" % (time()-t0))

        # print(exp.pool)
        top = exp.get_best(1)[0]
        print(top.get_synthesis_path())

        print("Best achieved score: %.3f" % func(top))
        props = [dd[mol.smiles] for mol in pool_all]
        print("Best possible score: %.3f" % np.max(props))
コード例 #2
0
    def _test_len(self):
        dummy_func = lambda mol: len(mol.smiles)
        test_pool = ["CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1", "c1ccccc1"]
        test_pool = [Molecule(smiles) for smiles in test_pool]
        exp = RandomExplorer(dummy_func, initial_pool=test_pool)
        print("Starting len of SMILES optimization")
        exp.run(2)

        #check
        print(exp.pool)
コード例 #3
0
    def _test_sas(self):
        sas_func = lambda mol: calculateSAScore(Chem.MolFromSmiles(mol.smiles))
        print(sas_func(Molecule("CC")))
        test_pool = ["CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1", "c1ccccc1"]
        test_pool = [Molecule(smiles) for smiles in test_pool]
        exp = RandomExplorer(sas_func, initial_pool=test_pool)
        print("Starting SA score optimization")
        t0 = time()
        exp.run(10)

        #check
        print("Completed SA score optimization, time elapsed: %.3fs" % (time()-t0))
        print(exp.pool)
        top = exp.get_best(1)[0]
        print(top.get_synthesis_path())
コード例 #4
0
def explore_and_validate_synth(init_pool_size, seed, budget, objective,
                               dataset, max_pool_size, reporter):
    """
    This experiment is equivalent to unlimited-evaluation optimization.
    It compares optimal found vs optimal over pool, and checks if synthesizeability is improved.
    """
    obj_func = get_objective_by_name(objective)
    sampler = MolSampler(dataset, sampling_seed=seed)
    pool = sampler(init_pool_size)
    exp = RandomExplorer(obj_func,
                         initial_pool=pool,
                         max_pool_size=max_pool_size)
    real_budget = budget - init_pool_size

    props = [obj_func(mol) for mol in pool]
    reporter.writeln(
        f"Properties of pool: quantity {len(pool)}, min {np.min(props)}, avg {np.mean(props)}, max {np.max(props)}"
    )
    reporter.writeln(f"Starting {objective} optimization")

    t0 = time.time()
    top_value, top_point, history = exp.run(real_budget)

    reporter.writeln("Finished run in {:.3f} minutes".format(
        (time.time() - t0) / 60))
    reporter.writeln(f"Is a valid molecule: {check_validity(top_point)}")
    reporter.writeln(f"Resulting molecule: {top_point}")
    reporter.writeln(f"Top score: {obj_func(top_point)}")
    reporter.writeln(
        f"Minimum synthesis score over the path: {compute_min_sa_score(top_point)}"
    )
    with open(SYN_PATH_FILE, 'wb') as f:
        pkl.dump(top_point.get_synthesis_path(), f)

    sorted_by_prop = sorted(pool, key=obj_func)[-5:]
    for opt_mol in sorted_by_prop:
        min_sa_score = compute_min_sa_score(opt_mol)
        reporter.writeln(
            f"Minimum synthesis score of optimal molecules: {min_sa_score}")

    vals = history['objective_vals']
    plt.title(f'Optimizing {objective} with random explorer')
    plt.plot(range(len(vals)), vals)
    plt.savefig(PLOT_FILE, format='eps', dpi=1000)
    with open(OPT_VALS_FILE, 'w') as f:
        f.write(' '.join([str(v) for v in vals]))
コード例 #5
0
ファイル: gp_bandit.py プロジェクト: kamikaze0923/ChemBo
 def _opt_method_optimise_initalise(self):
     """ Important setup: creating the optimization object """
     initial_pool = [mol_lst[0] for mol_lst in self.history.query_points]
     logging.info(
         f'Length of initial pool {len(initial_pool)}, should be equal to init_capital.'
     )
     if self.acq_opt_method == 'rand_explorer':
         self.acq_optimizer = RandomExplorer(
             initial_pool=initial_pool,
             max_pool_size=self.options.max_pool_size)
     else:
         raise NotImplementedError(
             "Acq opt method {} not implemented.".format(
                 self.acq_opt_method))