def test_something(self): sim_data = DataNode(label='node_label', metadata='test the metadata') nodes_list = [] sim_data.data = nodes_list data_node = DataNode(label="seed_0") nodes_list.append(data_node) # sub-nodes of sim data resource loss_lst = [] train_loss_node = DataNode(label="training_loss", data=loss_lst) metrics_dict = {} metrics_node = DataNode(label="validation_metrics", data=metrics_dict) scores_lst = [] scores_node = DataNode(label="validation_score", data=scores_lst) # add sim data nodes to parent node sim_data_node = data_node if sim_data_node: sim_data_node.data = [train_loss_node, metrics_node, scores_node] sim_data.to_json(path="./") self.assertIsNotNone(sim_data.to_json_str())
def main(flags): irl_lbl = 'no_irl' if flags.use_true_reward else 'with_irl' sim_label = flags.exp_name + '_min_IReLeaSE-REINFORCE_' + irl_lbl + ( '_no_vflag' if flags.no_smiles_validity_flag else '') sim_data = DataNode(label=sim_label, metadata={ 'exp': flags.exp_name, 'date': date_label }) nodes_list = [] sim_data.data = nodes_list for seed in seeds: summary_writer_creator = lambda: SummaryWriter( log_dir="irelease_tb" "/{}_{}_{}/".format(sim_label, seed, dt.now().strftime("%Y_%m_%d__%H_%M_%S"))) # for data collection of this round of simulation. data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) print( '--------------------------------------------------------------------------------' ) print(f'{device}\n{sim_label}\tDemonstrations file: {flags.demo_file}') print( '--------------------------------------------------------------------------------' ) irelease = IReLeaSE() k = 1 if flags.hparam_search: print(f'Hyperparameter search enabled: {flags.hparam_search_alg}') # arguments to callables extra_init_args = {} extra_data_args = {'flags': flags} extra_train_args = { 'agent_net_path': flags.model_dir, 'agent_net_name': flags.pretrained_model, 'learn_irl': not flags.use_true_reward, 'seed': seed, 'n_episodes': 600, 'is_hsearch': True, 'tb_writer': summary_writer_creator } hparams_conf = get_hparam_config(flags) search_alg = { 'random_search': RandomSearch, 'bayopt_search': BayesianOptSearch }.get(flags.hparam_search_alg, BayesianOptSearch) search_args = GPMinArgs(n_calls=20, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=1, initializer=irelease.initialize, data_provider=irelease.data_provider, train_fn=irelease.train, save_model_fn=irelease.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='reinforce-rl', sim_label=sim_label, dataset_label=None, results_file=f'{flags.hparam_search_alg}_{sim_label}' f'_{date_label}_seed_{seed}') start = time.time() stats = hparam_search.fit() print(f'Duration = {time_since(start)}') print(stats) print("\nBest params = {}, duration={}".format( stats.best(), time_since(start))) else: hyper_params = default_hparams(flags) data_gens = irelease.data_provider(k, flags) init_args = irelease.initialize(hyper_params, data_gens['demo_data'], data_gens['unbiased_data'], data_gens['prior_data']) results = irelease.train(init_args, flags.model_dir, flags.pretrained_model, seed, sim_data_node=data_node, n_episodes=600, bias_mode=flags.bias_mode, learn_irl=not flags.use_true_reward, tb_writer=summary_writer_creator) irelease.save_model( results['model'][0], path=flags.model_dir, name= f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}' f'_reinforce_agent_{date_label}_{results["score"]}_{results["epoch"]}' ) irelease.save_model( results['model'][1], path=flags.model_dir, name= f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}' f'_reinforce_reward_net_{date_label}_{results["score"]}_{results["epoch"]}' ) # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")
def main(flags): sim_label = flags.exp_name if flags.exp_name else 'Irelease-pretraining-Stack-RNN' if flags.eval: sim_label += '_eval' sim_data = DataNode(label=sim_label, metadata={ 'exp': flags.exp_name, 'date': date_label }) nodes_list = [] sim_data.data = nodes_list # For searching over multiple seeds hparam_search = None for seed in seeds: summary_writer_creator = lambda: SummaryWriter( log_dir="tb_gpmt" "/{}_{}_{}/".format(sim_label, seed, dt.now().strftime("%Y_%m_%d__%H_%M_%S"))) # for data collection of this round of simulation. data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) print( '-------------------------------------------------------------------------------------------------' ) print( f'Running on dataset: {flags.data_file}, experiment = {flags.exp_name}' ) print( '-------------------------------------------------------------------------------------------------' ) trainer = IreleasePretrain() k = 1 if flags["hparam_search"]: print("Hyperparameter search enabled: {}".format( flags["hparam_search_alg"])) # arguments to callables extra_init_args = {} extra_data_args = {"flags": flags} extra_train_args = { "is_hsearch": True, "n_iters": 50000, "tb_writer": summary_writer_creator } hparams_conf = get_hparam_config(flags) if hparam_search is None: search_alg = { "random_search": RandomSearch, "bayopt_search": BayesianOptSearch }.get(flags["hparam_search_alg"], BayesianOptSearch) search_args = GPMinArgs(n_calls=20, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=1, initializer=trainer.initialize, data_provider=trainer.data_provider, train_fn=trainer.train, save_model_fn=trainer.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='', sim_label=sim_label, dataset_label='ChEMBL_SMILES', results_file="{}_{}_gpmt_{}.csv".format( flags["hparam_search_alg"], sim_label, date_label)) stats = hparam_search.fit(model_dir="models", model_name='irelease') print(stats) print("Best params = {}".format(stats.best())) else: hyper_params = default_hparams(flags) model, optimizer, gen_data, rnn_args = trainer.initialize( hyper_params, gen_data=trainer.data_provider(k, flags)['train']) if flags.eval: load_model = trainer.load_model(flags.model_dir, flags.eval_model_name) model.load_state_dict(load_model) trainer.evaluate_model(model, gen_data, rnn_args, data_node, num_smiles=flags.num_smiles) else: if flags.init_model: load_model = trainer.load_model(flags.model_dir, flags.init_model) model.load_state_dict(load_model) print( f'Model weights {flags.init_model} loaded successfully!' ) results = trainer.train(model=model, optimizer=optimizer, gen_data=gen_data, rnn_args=rnn_args, n_iters=1500000, sim_data_node=data_node, tb_writer=summary_writer_creator) trainer.save_model( results['model'], flags.model_dir, name= f'irelease-pretrained_stack-rnn_{hyper_params["unit_type"]}_' f'{date_label}_{results["score"]}_{results["epoch"]}') # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")
def main(flags): mode = 'eval' if flags.eval else 'train' sim_label = f'expert_rnn_reg_model_{mode}' print( '--------------------------------------------------------------------------------' ) print(f'{device}\n{sim_label}\tData file: {flags.data_file}') print( '--------------------------------------------------------------------------------' ) hparam_search = None sim_data = DataNode(label=sim_label, metadata=json.dumps({ 'date': date_label, 'seeds': seeds, 'mode': mode, 'sim_label': sim_label, 'num_folds': flags.folds })) nodes_list = [] sim_data.data = nodes_list # Load the data data_dict, transformer = load_smiles_data(flags.data_file, flags.cv, normalize_y=True, k=flags.folds, shuffle=5, create_val=False, train_size=.8) for seed in seeds: data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) # ensure reproducibility random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) trainer = ExpertTrainer() folds = flags.folds if flags.cv else 1 if flags.hparam_search: print(f'Hyperparameter search enabled: {flags.hparam_search_alg}') # arguments to callables extra_init_args = {} extra_data_args = {'cv': flags.cv, 'data': data_dict} extra_train_args = { 'n_iterations': 5000, 'transformer': transformer, 'is_hsearch': True, 'tb_writer': None } hparams_conf = hparams_config() if hparam_search is None: search_alg = { 'random_search': RandomSearch, 'bayopt_search': BayesianOptSearch }.get(flags.hparam_search_alg, BayesianOptSearch) search_args = GPMinArgs(n_calls=10, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=folds, initializer=trainer.initialize, data_provider=trainer.data_provider, train_fn=trainer.train, save_model_fn=trainer.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='random', sim_label=sim_label, dataset_label=os.path.split(flags.data_file)[1], results_file= f'{flags.hparam_search_alg}_{sim_label}_{date_label}') start = time.time() stats = hparam_search.fit() print(f'Duration = {time_since(start)}') print(stats) print("Best params = {}, duration={}".format( stats.best(), time_since(start))) else: hyper_params = default_params(flags) # Initialize the model and other related entities for training. if flags.cv: folds_data = [] data_node.data = folds_data data_node.label = data_node.label + 'cv' for k in range(folds): k_node = DataNode(label="fold-%d" % k) folds_data.append(k_node) start_fold(k_node, data_dict, transformer, flags, hyper_params, trainer, k, None) else: start_fold(data_node, data_dict, transformer, flags, hyper_params, trainer, folds, None) # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")
def main(flags): sim_label = f'RNN_XEnt_Generator_Baseline_{flags.exp_type}' if flags.eval: sim_label += '_eval' sim_data = DataNode(label=sim_label, metadata={ 'exp': flags.exp_type, 'date': date_label }) nodes_list = [] sim_data.data = nodes_list # For searching over multiple seeds hparam_search = None pretraining = flags.exp_type == 'pretraining' for seed in seeds: summary_writer_creator = lambda: SummaryWriter( log_dir="irelease_tb_rnn_xent" "/{}_{}_{}/".format(sim_label, seed, dt.now().strftime("%Y_%m_%d__%H_%M_%S"))) # for data collection of this round of simulation. data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) print( '--------------------------------------------------------------------------------' ) print( f'{device}\n{sim_label}\tDemonstrations file: {flags.prior_data if pretraining else flags.demo_file}' ) print( '--------------------------------------------------------------------------------' ) trainer = RNNBaseline() k = 1 if flags["hparam_search"]: print("Hyperparameter search enabled: {}".format( flags["hparam_search_alg"])) # arguments to callables extra_init_args = {} extra_data_args = {"flags": flags} extra_train_args = { "is_hsearch": True, "n_iters": 50000, "tb_writer": summary_writer_creator } hparams_conf = get_hparam_config(flags) if hparam_search is None: search_alg = { "random_search": RandomSearch, "bayopt_search": BayesianOptSearch }.get(flags["hparam_search_alg"], BayesianOptSearch) search_args = GPMinArgs(n_calls=20, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=1, initializer=trainer.initialize, data_provider=trainer.data_provider, train_fn=trainer.train, save_model_fn=trainer.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='', sim_label=sim_label, dataset_label='ChEMBL_SMILES', results_file="{}_{}_gpmt_{}.csv".format( flags["hparam_search_alg"], sim_label, date_label)) stats = hparam_search.fit(model_dir="models", model_name='irelease') print(stats) print("Best params = {}".format(stats.best())) else: hyper_params = default_hparams(flags) data_gens = trainer.data_provider(k, flags) model, optimizer, rnn_args = trainer.initialize( hyper_params, data_gens['demo_data'], data_gens['unbiased_data'], data_gens['prior_data']) if flags.eval: load_model = trainer.load_model(flags.model_dir, flags.eval_model_name) model.load_state_dict(load_model) trainer.evaluate_model(model, data_gens['demo_data'], rnn_args, data_node, num_smiles=200) else: results = trainer.train( generator=model, optimizer=optimizer, rnn_args=rnn_args, n_iters=40000, sim_data_node=data_node, tb_writer=summary_writer_creator, is_pretraining=pretraining, pretrained_net_path=flags.model_dir, pretrained_net_name=flags.pretrained_model) trainer.save_model( results['model'], flags.model_dir, name= f'rnn_xent_gen_baseline_{flags.exp_type}_{hyper_params["unit_type"]}_' f'{date_label}_{results["score"]}_{results["epoch"]}_seed_{seed}' ) # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")