def run_model(parameters_dict): # Extracting search parameters explicit_search_parameters_dict = _extract_explicit_search_parameters( parameters_dict) # Extracting IO parameters explicit_IO_parameters_dict = _extract_explicit_IO_parameters( parameters_dict) # Building objective function evaluation_strategy = _parse_objective_function_strategy( parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict) # Building action space action_spaces, action_spaces_parameters = _parse_action_space( parameters_dict) # Building mutation strategy mutation_strategy = _parse_mutation_parameters( explicit_search_parameters=explicit_search_parameters_dict, evaluation_strategy=evaluation_strategy, action_spaces=action_spaces, action_spaces_parameters=action_spaces_parameters) # Building stop criterion strategy stop_criterion_strategy = _parse_stop_criterion_strategy( explicit_search_parameters_dict=explicit_search_parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict) # Building instance pop_alg = _build_instance( evaluation_strategy=evaluation_strategy, mutation_strategy=mutation_strategy, stop_criterion_strategy=stop_criterion_strategy, explicit_search_parameters_dict=explicit_search_parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict) # GuacaMol special case if evaluation_strategy == "guacamol": model_generator = ChemPopAlgGoalDirectedGenerator( pop_alg=pop_alg, guacamol_init_top_100=explicit_search_parameters_dict[ "guacamol_init_top_100"], init_pop_path=explicit_IO_parameters_dict["smiles_list_init_path"], output_save_path=explicit_IO_parameters_dict["model_path"]) assess_goal_directed_generation( model_generator, json_output_file=join(explicit_IO_parameters_dict["model_path"], "output_GuacaMol.json"), benchmark_version='v2') else: pop_alg.run()
def main(): population_size = 100 ### each generation for each mol in population, one oracle call. max_children = 10 generations_num = int(max_oracle_num / population_size / max_children) parser = argparse.ArgumentParser() parser.add_argument( '--pickle_directory', help= 'Directory containing pickle files with the distribution statistics', default=None) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--generations', type=int, default=generations_num) parser.add_argument('--population_size', type=int, default=population_size) parser.add_argument('--num_sims', type=int, default=40) parser.add_argument('--max_children', type=int, default=max_children) ### 25 -> 5 parser.add_argument('--max_atoms', type=int, default=60) parser.add_argument('--init_smiles', type=str, default='CC') parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--patience', type=int, default=5) parser.add_argument('--suite', default='v3') args = parser.parse_args() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) if args.pickle_directory is None: args.pickle_directory = os.path.dirname(os.path.realpath(__file__)) np.random.seed(args.seed) setup_default_logger() # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = GB_MCTS_Generator(pickle_directory=args.pickle_directory, n_jobs=args.n_jobs, num_sims=args.num_sims, max_children=args.max_children, init_smiles=args.init_smiles, max_atoms=args.max_atoms, patience=args.patience, generations=args.generations, population_size=args.population_size) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path, benchmark_version=args.suite)
def entry_point(): parser = argparse.ArgumentParser() parser.add_argument('--smiles_file', type=str) parser.add_argument('--db_fname', type=str) parser.add_argument('--selection_size', type=int, default=10) parser.add_argument('--radius', type=int, default=3) parser.add_argument('--replacements', type=int, default=1000) parser.add_argument('--min_size', type=int, default=0) parser.add_argument('--max_size', type=int, default=10) parser.add_argument('--min_inc', type=int, default=-7) parser.add_argument('--max_inc', type=int, default=7) parser.add_argument('--generations', type=int, default=1000) parser.add_argument('--ncpu', type=int, default=1) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--suite', default='v2') args = parser.parse_args() np.random.seed(args.seed) setup_default_logger() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = CREM_Generator(smi_file=args.smiles_file, selection_size=args.selection_size, db_fname=args.db_fname, radius=args.radius, min_size=args.min_size, max_size=args.max_size, min_inc=args.min_inc, max_inc=args.max_inc, replacements=args.replacements, generations=args.generations, ncpu=args.ncpu, random_start=True, output_dir=args.output_dir) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path, benchmark_version=args.suite)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--episode_size', type=int, default=8192) parser.add_argument('--batch_size', type=int, default=1024) parser.add_argument('--entropy_weight', type=int, default=1) parser.add_argument('--kl_div_weight', type=int, default=10) parser.add_argument('--output_dir', default=None) parser.add_argument('--clip_param', type=int, default=0.2) parser.add_argument('--num_epochs', type=int, default=20) parser.add_argument('--model_path', default=None) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--suite', default='v3') args = parser.parse_args() np.random.seed(args.seed) setup_default_logger() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) if args.model_path is None: dir_path = os.path.dirname(os.path.realpath(__file__)) args.model_path = os.path.join(dir_path, 'pretrained_model', 'model_final_0.473.pt') # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = PPODirectedGenerator(pretrained_model_path=args.model_path, num_epochs=args.num_epochs, episode_size=args.episode_size, batch_size=args.batch_size, entropy_weight=args.entropy_weight, kl_div_weight=args.kl_div_weight, clip_param=args.clip_param) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path, benchmark_version=args.suite)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pickle_directory', help='Directory containing pickle files with the distribution statistics', default=None) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--generations', type=int, default=1000) parser.add_argument('--population_size', type=int, default=100) parser.add_argument('--num_sims', type=int, default=40) parser.add_argument('--max_children', type=int, default=25) parser.add_argument('--max_atoms', type=int, default=60) parser.add_argument('--init_smiles', type=str, default='CC') parser.add_argument('--random_start', type=bool, default=False) parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--patience', type=int, default=5) args = parser.parse_args() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) if args.pickle_directory is None: args.pickle_directory = os.path.dirname(os.path.realpath(__file__)) np.random.seed(args.seed) setup_default_logger() # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = GB_MCTS_Generator(pickle_directory=args.pickle_directory, n_jobs=args.n_jobs, random_start=args.random_start, num_sims=args.num_sims, max_children=args.max_children, init_smiles=args.init_smiles, max_atoms=args.max_atoms, patience=args.patience, generations=args.generations, population_size=args.population_size) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smiles') parser.add_argument('--seed', type=int, default=42) parser.add_argument('--population_size', type=int, default=100) parser.add_argument('--n_mutations', type=int, default=200) parser.add_argument('--gene_size', type=int, default=300) parser.add_argument('--generations', type=int, default=1000) parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--random_start', action='store_true') parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--patience', type=int, default=5) parser.add_argument('--suite', default='v1') args = parser.parse_args() np.random.seed(args.seed) setup_default_logger() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = ChemGEGenerator(smi_file=args.smiles_file, population_size=args.population_size, n_mutations=args.n_mutations, gene_size=args.gene_size, generations=args.generations, n_jobs=args.n_jobs, random_start=args.random_start, patience=args.patience) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path, benchmark_version=args.suite)
from guacamol.assess_goal_directed_generation import assess_goal_directed_generation from generative_playground.molecules.guacamol_utils import MyGoalDirectedGenerator my_gen = MyGoalDirectedGenerator('trivial') assess_goal_directed_generation(goal_directed_molecule_generator=my_gen, benchmark_version='trivial')
parser.add_argument('--n_jobs', type=int, default=-1) parser.add_argument('--suite', default='v3') args = parser.parse_args() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) if args.model_path is None: dir_path = os.path.dirname(os.path.realpath(__file__)) args.model_path = os.path.join(dir_path, 'pretrained_model', 'model_final_0.473.pt') optimizer = SmilesRnnDirectedGenerator( pretrained_model_path=args.model_path, n_epochs=args.n_epochs, mols_to_sample=args.mols_to_sample, keep_top=args.keep_top, optimize_n_epochs=args.optimize_n_epochs, max_len=args.max_len, optimize_batch_size=args.optimize_batch_size, number_final_samples=args.benchmark_num_samples, random_start=args.random_start, smi_file=args.smiles_file, n_jobs=args.n_jobs) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimizer, json_output_file=json_file_path, benchmark_version=args.suite)
import argparse import os from guacamol.assess_goal_directed_generation import assess_goal_directed_generation from guacamol.utils.helpers import setup_default_logger from .chembl_file_reader import ChemblFileReader from .optimizer import BestFromChemblOptimizer if __name__ == '__main__': setup_default_logger() parser = argparse.ArgumentParser(description='Goal-directed benchmark for best molecules from SMILES file', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smiles') parser.add_argument('--output_dir', default=None, help='Output directory') args = parser.parse_args() if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) smiles_reader = ChemblFileReader(args.smiles_file) optimizer = BestFromChemblOptimizer(smiles_reader=smiles_reader) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimizer, json_output_file=json_file_path)
def run_model(parameters_dict): # Extracting search parameters explicit_search_parameters_dict = _extract_explicit_search_parameters( parameters_dict) # Extracting IO parameters explicit_IO_parameters_dict = _extract_explicit_IO_parameters( parameters_dict) # Building objective function evaluation_strategy = _parse_objective_function_strategy( parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict, explicit_search_parameters_dict=explicit_search_parameters_dict) # Building action space action_spaces, action_spaces_parameters, explicit_action_space_parameters = _parse_action_space( parameters_dict) # Building mutation strategy mutation_strategy = _parse_mutation_parameters( explicit_search_parameters=explicit_search_parameters_dict, evaluation_strategy=evaluation_strategy, action_spaces=action_spaces, action_spaces_parameters=action_spaces_parameters, search_space_parameters=explicit_action_space_parameters) # Building stop criterion strategy stop_criterion_strategy = _parse_stop_criterion_strategy( explicit_search_parameters_dict=explicit_search_parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict) # Building instance pop_alg = _build_instance( evaluation_strategy=evaluation_strategy, mutation_strategy=mutation_strategy, stop_criterion_strategy=stop_criterion_strategy, explicit_search_parameters_dict=explicit_search_parameters_dict, explicit_IO_parameters_dict=explicit_IO_parameters_dict) # GuacaMol special case if is_or_contains_undefined_GuacaMol_evaluation_strategy( evaluation_strategy): model_generator = ChemPopAlgGoalDirectedGenerator( pop_alg=pop_alg, guacamol_init_top_100=explicit_search_parameters_dict[ "guacamol_init_top_100"], init_pop_path=explicit_IO_parameters_dict["smiles_list_init_path"], output_save_path=explicit_IO_parameters_dict["model_path"]) # Extracting proper set of benchmarks benchmark_parameter = get_GuacaMol_benchmark_parameter( evaluation_strategy) benchmark_key = benchmark_parameter.split("_")[1] assess_goal_directed_generation( model_generator, json_output_file=join(explicit_IO_parameters_dict["model_path"], "output_GuacaMol.json"), benchmark_version=benchmark_key) else: pop_alg.run() return pop_alg
def main(): parser = argparse.ArgumentParser() parser.add_argument('--smiles_file', default='data/guacamol_v1_all.smi') parser.add_argument('--seed', type=int, default=24) parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--suite', default='v2') parser.add_argument('--generations', type=int, default=1000) parser.add_argument('--population', type=int, default=100) parser.add_argument('--selection_size', type=int, default=200) parser.add_argument('--selection_method', type=str, default='linear') parser.add_argument('--derive_size', type=int, default=100) parser.add_argument('--brics_fragment_db', type=str, default='deriver_goal/chembl.db') parser.add_argument('--selfies_proportion', type=float, default=0) parser.add_argument('--brics_proportion', type=float, default=0) parser.add_argument('--selfies_gb_proportion', type=float, default=0) parser.add_argument('--smiles_gb_proportion', type=float, default=0) parser.add_argument('--mutation_rate', type=float, default=0.5) parser.add_argument('--enable_scanner', action='store_true', default=False) parser.add_argument('--enable_filter', action='store_true', default=False) parser.add_argument('--delayed_filtering', type=float, default=0) parser.add_argument('--patience', type=int, default=5) parser.add_argument('--temperature', type=float, default=1) parser.add_argument('--temp_decay', type=float, default=0.95) parser.add_argument('--start_task', type=int, default=0) parser.add_argument('--random_start', action='store_true', default=False) parser.add_argument('--derive_population', action='store_true', default=False) parser.add_argument('--counterscreen', action='store_true', default=False) args = parser.parse_args() #assert os.path.exists('data/guacamol_v1_all.smiles') try: os.remove('deriver_goal/all_output_smiles.smi') os.remove('deriver_goal/mean_scores_by_generation.txt') os.remove('deriver_goal/best_scores_by_generation.txt') os.remove('deriver_goal/worst_scores_by_generation.txt') os.remove('deriver_goal/selected_output_smiles.smi') os.remove('deriver_goal/top_100_output_smiles.smi') except FileNotFoundError: pass np.random.seed(args.seed) random.seed = args.seed # turn off the millions of lines of useless text logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.CRITICAL) if args.output_dir is None: args.output_dir = os.path.dirname(os.path.realpath(__file__)) # save command line args with open(os.path.join(args.output_dir, 'goal_directed_params.json'), 'w') as jf: json.dump(vars(args), jf, sort_keys=True, indent=4) optimiser = DeriverGenerator( smi_file=args.smiles_file, generations=args.generations, population=args.population, selection_size=args.selection_size, selection_method=args.selection_method, derive_size=args.derive_size, brics_fragment_db=args.brics_fragment_db, selfies_proportion=args.selfies_proportion, brics_proportion=args.brics_proportion, selfies_gb_proportion=args.selfies_gb_proportion, smiles_gb_proportion=args.smiles_gb_proportion, mutation_rate=args.mutation_rate, enable_scanner=args.enable_scanner, enable_filter=args.enable_filter, delayed_filtering=args.delayed_filtering, patience=args.patience, temperature=args.temperature, temp_decay=args.temp_decay, start_task=args.start_task, random_start=args.random_start, derive_population=args.derive_population, counterscreen=args.counterscreen) json_file_path = os.path.join(args.output_dir, 'goal_directed_results.json') assess_goal_directed_generation(optimiser, json_output_file=json_file_path, benchmark_version=args.suite)
iterations = 100 balance = 1.0 version = "v2" accumulate_rewards = False json_file = "guacamol/json_results/first_nn_trial.json" # neural net hidden_size = 256 num_layers = 2 biderectional = False file = "deep_likeliness/saved_parameters/deep_scorer_network.pth" neural_net = RNNEncoder(hidden_size=hidden_size, num_layers=num_layers, bidirectional=biderectional) neural_net.load_state_dict(torch.load(file)) model = SynergeticMolGDG( neural_network=neural_net, mutation_rate=mutation_rate, n_walkers=n_walkers, balance=balance, iterations=iterations, accumulate_rewards=accumulate_rewards, n_cpu=n_cpu, ) assess_goal_directed_generation(model, json_output_file=json_file, benchmark_version=version)
all_smiles.add(smiles) if self.best_score < score: self.best_score = score self.best_smiles = smiles return score mcts = LanguageModelMCTSWithPUCTTerminating(self.lm, width, max_depth, eval_function, cpuct=c, terminating_symbol='</s>') mcts.search(["<s>"], num_simulations) return [self.best_smiles] if __name__ == '__main__': setup_default_logger() generator = ChemgramsMCTSGoalDirectedGenerator() json_file_path = os.path.join('../models', 'goal_directed_learning_results.json') assess_goal_directed_generation(generator, json_output_file=json_file_path, benchmark_version='v2')