def main(flags): irl_lbl = 'no_irl' if flags.use_true_reward else 'with_irl' sim_label = flags.exp_name + '_min_IReLeaSE-REINFORCE_' + irl_lbl + ( '_no_vflag' if flags.no_smiles_validity_flag else '') sim_data = DataNode(label=sim_label, metadata={ 'exp': flags.exp_name, 'date': date_label }) nodes_list = [] sim_data.data = nodes_list for seed in seeds: summary_writer_creator = lambda: SummaryWriter( log_dir="irelease_tb" "/{}_{}_{}/".format(sim_label, seed, dt.now().strftime("%Y_%m_%d__%H_%M_%S"))) # for data collection of this round of simulation. data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) print( '--------------------------------------------------------------------------------' ) print(f'{device}\n{sim_label}\tDemonstrations file: {flags.demo_file}') print( '--------------------------------------------------------------------------------' ) irelease = IReLeaSE() k = 1 if flags.hparam_search: print(f'Hyperparameter search enabled: {flags.hparam_search_alg}') # arguments to callables extra_init_args = {} extra_data_args = {'flags': flags} extra_train_args = { 'agent_net_path': flags.model_dir, 'agent_net_name': flags.pretrained_model, 'learn_irl': not flags.use_true_reward, 'seed': seed, 'n_episodes': 600, 'is_hsearch': True, 'tb_writer': summary_writer_creator } hparams_conf = get_hparam_config(flags) search_alg = { 'random_search': RandomSearch, 'bayopt_search': BayesianOptSearch }.get(flags.hparam_search_alg, BayesianOptSearch) search_args = GPMinArgs(n_calls=20, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=1, initializer=irelease.initialize, data_provider=irelease.data_provider, train_fn=irelease.train, save_model_fn=irelease.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='reinforce-rl', sim_label=sim_label, dataset_label=None, results_file=f'{flags.hparam_search_alg}_{sim_label}' f'_{date_label}_seed_{seed}') start = time.time() stats = hparam_search.fit() print(f'Duration = {time_since(start)}') print(stats) print("\nBest params = {}, duration={}".format( stats.best(), time_since(start))) else: hyper_params = default_hparams(flags) data_gens = irelease.data_provider(k, flags) init_args = irelease.initialize(hyper_params, data_gens['demo_data'], data_gens['unbiased_data'], data_gens['prior_data']) results = irelease.train(init_args, flags.model_dir, flags.pretrained_model, seed, sim_data_node=data_node, n_episodes=600, bias_mode=flags.bias_mode, learn_irl=not flags.use_true_reward, tb_writer=summary_writer_creator) irelease.save_model( results['model'][0], path=flags.model_dir, name= f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}' f'_reinforce_agent_{date_label}_{results["score"]}_{results["epoch"]}' ) irelease.save_model( results['model'][1], path=flags.model_dir, name= f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}' f'_reinforce_reward_net_{date_label}_{results["score"]}_{results["epoch"]}' ) # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")
def main(flags): mode = 'eval' if flags.eval else 'train' sim_label = f'expert_rnn_reg_model_{mode}' print( '--------------------------------------------------------------------------------' ) print(f'{device}\n{sim_label}\tData file: {flags.data_file}') print( '--------------------------------------------------------------------------------' ) hparam_search = None sim_data = DataNode(label=sim_label, metadata=json.dumps({ 'date': date_label, 'seeds': seeds, 'mode': mode, 'sim_label': sim_label, 'num_folds': flags.folds })) nodes_list = [] sim_data.data = nodes_list # Load the data data_dict, transformer = load_smiles_data(flags.data_file, flags.cv, normalize_y=True, k=flags.folds, shuffle=5, create_val=False, train_size=.8) for seed in seeds: data_node = DataNode(label="seed_%d" % seed) nodes_list.append(data_node) # ensure reproducibility random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) trainer = ExpertTrainer() folds = flags.folds if flags.cv else 1 if flags.hparam_search: print(f'Hyperparameter search enabled: {flags.hparam_search_alg}') # arguments to callables extra_init_args = {} extra_data_args = {'cv': flags.cv, 'data': data_dict} extra_train_args = { 'n_iterations': 5000, 'transformer': transformer, 'is_hsearch': True, 'tb_writer': None } hparams_conf = hparams_config() if hparam_search is None: search_alg = { 'random_search': RandomSearch, 'bayopt_search': BayesianOptSearch }.get(flags.hparam_search_alg, BayesianOptSearch) search_args = GPMinArgs(n_calls=10, random_state=seed) hparam_search = search_alg( hparam_config=hparams_conf, num_folds=folds, initializer=trainer.initialize, data_provider=trainer.data_provider, train_fn=trainer.train, save_model_fn=trainer.save_model, alg_args=search_args, init_args=extra_init_args, data_args=extra_data_args, train_args=extra_train_args, data_node=data_node, split_label='random', sim_label=sim_label, dataset_label=os.path.split(flags.data_file)[1], results_file= f'{flags.hparam_search_alg}_{sim_label}_{date_label}') start = time.time() stats = hparam_search.fit() print(f'Duration = {time_since(start)}') print(stats) print("Best params = {}, duration={}".format( stats.best(), time_since(start))) else: hyper_params = default_params(flags) # Initialize the model and other related entities for training. if flags.cv: folds_data = [] data_node.data = folds_data data_node.label = data_node.label + 'cv' for k in range(folds): k_node = DataNode(label="fold-%d" % k) folds_data.append(k_node) start_fold(k_node, data_dict, transformer, flags, hyper_params, trainer, k, None) else: start_fold(data_node, data_dict, transformer, flags, hyper_params, trainer, folds, None) # save simulation data resource tree to file. sim_data.to_json(path="./analysis/")
def train(model, optimizer, gen_data, init_args, n_iters=5000, sim_data_node=None, epoch_ckpt=(2, 4.0), tb_writer=None, is_hsearch=False): tb_writer = None # tb_writer() start = time.time() best_model_wts = model.state_dict() best_score = -10000 best_epoch = -1 terminate_training = False e_avg = ExpAverage(.01) num_batches = math.ceil(gen_data.file_len / gen_data.batch_size) n_epochs = math.ceil(n_iters / num_batches) grad_stats = GradStats(model, beta=0.) # learning rate decay schedulers # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01) # pred_loss functions criterion = nn.CrossEntropyLoss( ignore_index=gen_data.char2idx[gen_data.pad_symbol]) # criterion = LabelSmoothing(gen_data.n_characters, gen_data.char2idx[gen_data.pad_symbol], 0.1) # sub-nodes of sim data resource loss_lst = [] train_loss_node = DataNode(label="train_loss", data=loss_lst) metrics_dict = {} metrics_node = DataNode(label="validation_metrics", data=metrics_dict) train_scores_lst = [] train_scores_node = DataNode(label="train_score", data=train_scores_lst) scores_lst = [] scores_node = DataNode(label="validation_score", data=scores_lst) # add sim data nodes to parent node if sim_data_node: sim_data_node.data = [ train_loss_node, train_scores_node, metrics_node, scores_node ] try: # Main training loop tb_idx = {'train': Count(), 'val': Count(), 'test': Count()} for epoch in range(n_epochs): if terminate_training: print("Terminating training...") break for phase in ["train"]: # , "val" if is_hsearch else "test"]: if phase == "train": print("Training....") # Training mode model.train() else: print("Validation...") # Evaluation mode model.eval() epoch_losses = [] epoch_scores = [] # Iterate through mini-batches # with TBMeanTracker(tb_writer, 10) as tracker: with grad_stats: for b in trange(0, num_batches, desc=f'{phase} in progress...'): inputs, labels = gen_data.random_training_set() optimizer.zero_grad() # track history if only in train with torch.set_grad_enabled(phase == "train"): # forward propagation stack = init_stack_2d(inputs.shape[0], inputs.shape[1], init_args['stack_depth'], init_args['stack_width'], dvc=init_args['device']) predictions = model([inputs, stack]) predictions = predictions.permute(1, 0, -1) predictions = predictions.contiguous().view( -1, predictions.shape[-1]) labels = labels.contiguous().view(-1) # calculate loss loss = criterion(predictions, labels) # fail fast if str(loss.item()) == "nan": terminate_training = True break # metrics eval_dict = {} score = GpmtPretrain.evaluate( eval_dict, predictions, labels) # TBoard info # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet()) # tracker.track("%s/score" % phase, score, tb_idx[phase].i) # for k in eval_dict: # tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i) if phase == "train": # backward pass loss.backward() optimizer.step() # for epoch stats epoch_losses.append(loss.item()) # for sim data resource train_scores_lst.append(score) loss_lst.append(loss.item()) print( "\t{}: Epoch={}/{}, batch={}/{}, " "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}" .format( time_since(start), epoch + 1, n_epochs, b + 1, num_batches, loss.item(), eval_dict['accuracy'], generate_smiles(generator=model, gen_data=gen_data, init_args=init_args, num_samples=1, gen_type='trans'))) else: # for epoch stats epoch_scores.append(score) # for sim data resource scores_lst.append(score) for m in eval_dict: if m in metrics_dict: metrics_dict[m].append(eval_dict[m]) else: metrics_dict[m] = [eval_dict[m]] print("\nEpoch={}/{}, batch={}/{}, " "evaluation results= {}, accuracy={}". format(epoch + 1, n_epochs, b + 1, num_batches, eval_dict, score)) # End of mini=batch iterations. if phase == "train": ep_loss = np.nanmean(epoch_losses) e_avg.update(ep_loss) if epoch % (epoch_ckpt[0] - 1) == 0 and epoch > 0: if e_avg.value > epoch_ckpt[1]: terminate_training = True print( "\nPhase: {}, avg task pred_loss={:.4f}, ".format( phase, np.nanmean(epoch_losses))) # scheduler.step() else: mean_score = np.mean(epoch_scores) if best_score < mean_score: best_score = mean_score best_model_wts = copy.deepcopy(model.state_dict()) best_epoch = epoch except RuntimeError as e: print(str(e)) duration = time.time() - start print('\nModel training duration: {:.0f}m {:.0f}s'.format( duration // 60, duration % 60)) try: model.load_state_dict(best_model_wts) except RuntimeError as e: print(str(e)) return {'model': model, 'score': best_score, 'epoch': best_epoch}
def train(model, optimizer, gen_data, rnn_args, n_iters=5000, sim_data_node=None, epoch_ckpt=(1, 2.0), tb_writer=None, is_hsearch=False): tb_writer = None # tb_writer() start = time.time() best_model_wts = model.state_dict() best_score = -10000 best_epoch = -1 terminate_training = False e_avg = ExpAverage(.01) num_batches = math.ceil(gen_data.file_len / gen_data.batch_size) n_epochs = math.ceil(n_iters / num_batches) grad_stats = GradStats(model, beta=0.) # learning rate decay schedulers # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01) # pred_loss functions criterion = nn.CrossEntropyLoss( ignore_index=gen_data.char2idx[gen_data.pad_symbol]) # sub-nodes of sim data resource loss_lst = [] train_loss_node = DataNode(label="train_loss", data=loss_lst) metrics_dict = {} metrics_node = DataNode(label="validation_metrics", data=metrics_dict) train_scores_lst = [] train_scores_node = DataNode(label="train_score", data=train_scores_lst) scores_lst = [] scores_node = DataNode(label="validation_score", data=scores_lst) # add sim data nodes to parent node if sim_data_node: sim_data_node.data = [ train_loss_node, train_scores_node, metrics_node, scores_node ] try: # Main training loop tb_idx = {'train': Count(), 'val': Count(), 'test': Count()} epoch_losses = [] epoch_scores = [] for epoch in range(6): phase = 'train' # Iterate through mini-batches # with TBMeanTracker(tb_writer, 10) as tracker: with grad_stats: for b in trange(0, num_batches, desc=f'{phase} in progress...'): inputs, labels = gen_data.random_training_set() batch_size, seq_len = inputs.shape[:2] optimizer.zero_grad() # track history if only in train with torch.set_grad_enabled(phase == "train"): # Create hidden states for each layer hidden_states = [] for _ in range(rnn_args['num_layers']): hidden = init_hidden( num_layers=1, batch_size=batch_size, hidden_size=rnn_args['hidden_size'], num_dir=rnn_args['num_dir'], dvc=rnn_args['device']) if rnn_args['has_cell']: cell = init_cell( num_layers=1, batch_size=batch_size, hidden_size=rnn_args['hidden_size'], num_dir=rnn_args['num_dir'], dvc=rnn_args['device']) else: cell = None if rnn_args['has_stack']: stack = init_stack(batch_size, rnn_args['stack_width'], rnn_args['stack_depth'], dvc=rnn_args['device']) else: stack = None hidden_states.append((hidden, cell, stack)) # forward propagation outputs = model([inputs] + hidden_states) predictions = outputs[0] predictions = predictions.permute(1, 0, -1) predictions = predictions.contiguous().view( -1, predictions.shape[-1]) labels = labels.contiguous().view(-1) # calculate loss loss = criterion(predictions, labels) # metrics eval_dict = {} score = IreleasePretrain.evaluate( eval_dict, predictions, labels) # TBoard info # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet()) # tracker.track("%s/score" % phase, score, tb_idx[phase].i) # for k in eval_dict: # tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i) # backward pass loss.backward() optimizer.step() # for epoch stats epoch_losses.append(loss.item()) # for sim data resource train_scores_lst.append(score) loss_lst.append(loss.item()) # for epoch stats epoch_scores.append(score) print("\t{}: Epoch={}/{}, batch={}/{}, " "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}". format( time_since(start), epoch + 1, n_epochs, b + 1, num_batches, loss.item(), eval_dict['accuracy'], generate_smiles(generator=model, gen_data=gen_data, init_args=rnn_args, num_samples=1))) IreleasePretrain.save_model( model, './model_dir/', name=f'irelease-pretrained_stack-rnn_gru_' f'{date_label}_epoch_{epoch}') # End of mini=batch iterations. except RuntimeError as e: print(str(e)) duration = time.time() - start print('\nModel training duration: {:.0f}m {:.0f}s'.format( duration // 60, duration % 60)) return { 'model': model, 'score': round(np.mean(epoch_scores), 3), 'epoch': n_epochs }
def evaluate_model(data, metrics, expert_model_dir, sim_data_node=None, k=-1): start = time.time() assert (os.path.isdir(expert_model_dir) ), 'Expert predictor(s) should be in a dedicated folder' # sub-nodes of sim data resource loss_lst = [] loss_node = DataNode(label="loss", data=loss_lst) metrics_dict = {} metrics_node = DataNode(label="metrics", data=metrics_dict) scores_lst = [] scores_node = DataNode(label="score", data=scores_lst) predicted_vals = [] true_vals = [] model_preds_node = DataNode(label="predictions", data={ "y_true": true_vals, "y_pred": predicted_vals }) # add sim data nodes to parent node if sim_data_node: sim_data_node.data = [ loss_node, metrics_node, scores_node, model_preds_node ] model_files = os.listdir(expert_model_dir) loaded = False for i, m in enumerate(model_files): m_path = os.path.join(expert_model_dir, m) if 'transformer' in m: with open(m_path, 'rb') as f: transformer = joblib.load(f) suffix = m.split('_')[-1].split( '.' )[0] # fold label should be a suffix e.g. xxx_k0.mod, xxx_k1.mod, etc. if suffix == f'k{k}' and not loaded: with open(os.path.join(expert_model_dir, m), 'rb') as f: predictor = joblib.load(f) loaded = True if not loaded: return None dmatrix_eval = xgb.DMatrix(data=data['test'].X, label=data['test'].y.reshape(-1, )) # evaluation y_hat = predictor.predict(dmatrix_eval).reshape(-1, ) y_true = dmatrix_eval.get_label().reshape(-1, ) eval_dict = {} score = XGBExpert.evaluate(eval_dict, transformer.inverse_transform(y_true), transformer.inverse_transform(y_hat), metrics) scores_lst.append(score) for m in eval_dict: if m in metrics_dict: metrics_dict[m].append(float(eval_dict[m])) else: metrics_dict[m] = [float(eval_dict[m])] print('Evaluation completed: score={}, metrics={}, time{}'.format( score, eval_dict, time_since(start)))