def main(flags):
    irl_lbl = 'no_irl' if flags.use_true_reward else 'with_irl'
    sim_label = flags.exp_name + '_min_IReLeaSE-REINFORCE_' + irl_lbl + (
        '_no_vflag' if flags.no_smiles_validity_flag else '')
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_name,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="irelease_tb"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '--------------------------------------------------------------------------------'
        )
        print(f'{device}\n{sim_label}\tDemonstrations file: {flags.demo_file}')
        print(
            '--------------------------------------------------------------------------------'
        )

        irelease = IReLeaSE()
        k = 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'flags': flags}
            extra_train_args = {
                'agent_net_path': flags.model_dir,
                'agent_net_name': flags.pretrained_model,
                'learn_irl': not flags.use_true_reward,
                'seed': seed,
                'n_episodes': 600,
                'is_hsearch': True,
                'tb_writer': summary_writer_creator
            }
            hparams_conf = get_hparam_config(flags)
            search_alg = {
                'random_search': RandomSearch,
                'bayopt_search': BayesianOptSearch
            }.get(flags.hparam_search_alg, BayesianOptSearch)
            search_args = GPMinArgs(n_calls=20, random_state=seed)
            hparam_search = search_alg(
                hparam_config=hparams_conf,
                num_folds=1,
                initializer=irelease.initialize,
                data_provider=irelease.data_provider,
                train_fn=irelease.train,
                save_model_fn=irelease.save_model,
                alg_args=search_args,
                init_args=extra_init_args,
                data_args=extra_data_args,
                train_args=extra_train_args,
                data_node=data_node,
                split_label='reinforce-rl',
                sim_label=sim_label,
                dataset_label=None,
                results_file=f'{flags.hparam_search_alg}_{sim_label}'
                f'_{date_label}_seed_{seed}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("\nBest params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_hparams(flags)
            data_gens = irelease.data_provider(k, flags)
            init_args = irelease.initialize(hyper_params,
                                            data_gens['demo_data'],
                                            data_gens['unbiased_data'],
                                            data_gens['prior_data'])
            results = irelease.train(init_args,
                                     flags.model_dir,
                                     flags.pretrained_model,
                                     seed,
                                     sim_data_node=data_node,
                                     n_episodes=600,
                                     bias_mode=flags.bias_mode,
                                     learn_irl=not flags.use_true_reward,
                                     tb_writer=summary_writer_creator)
            irelease.save_model(
                results['model'][0],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_agent_{date_label}_{results["score"]}_{results["epoch"]}'
            )
            irelease.save_model(
                results['model'][1],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_reward_net_{date_label}_{results["score"]}_{results["epoch"]}'
            )

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Beispiel #2
0
def main(flags):
    mode = 'eval' if flags.eval else 'train'
    sim_label = f'expert_rnn_reg_model_{mode}'

    print(
        '--------------------------------------------------------------------------------'
    )
    print(f'{device}\n{sim_label}\tData file: {flags.data_file}')
    print(
        '--------------------------------------------------------------------------------'
    )

    hparam_search = None

    sim_data = DataNode(label=sim_label,
                        metadata=json.dumps({
                            'date': date_label,
                            'seeds': seeds,
                            'mode': mode,
                            'sim_label': sim_label,
                            'num_folds': flags.folds
                        }))
    nodes_list = []
    sim_data.data = nodes_list

    # Load the data
    data_dict, transformer = load_smiles_data(flags.data_file,
                                              flags.cv,
                                              normalize_y=True,
                                              k=flags.folds,
                                              shuffle=5,
                                              create_val=False,
                                              train_size=.8)

    for seed in seeds:
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        # ensure reproducibility
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        trainer = ExpertTrainer()
        folds = flags.folds if flags.cv else 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'cv': flags.cv, 'data': data_dict}
            extra_train_args = {
                'n_iterations': 5000,
                'transformer': transformer,
                'is_hsearch': True,
                'tb_writer': None
            }
            hparams_conf = hparams_config()
            if hparam_search is None:
                search_alg = {
                    'random_search': RandomSearch,
                    'bayopt_search': BayesianOptSearch
                }.get(flags.hparam_search_alg, BayesianOptSearch)
                search_args = GPMinArgs(n_calls=10, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=folds,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='random',
                    sim_label=sim_label,
                    dataset_label=os.path.split(flags.data_file)[1],
                    results_file=
                    f'{flags.hparam_search_alg}_{sim_label}_{date_label}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("Best params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_params(flags)
            # Initialize the model and other related entities for training.
            if flags.cv:
                folds_data = []
                data_node.data = folds_data
                data_node.label = data_node.label + 'cv'
                for k in range(folds):
                    k_node = DataNode(label="fold-%d" % k)
                    folds_data.append(k_node)
                    start_fold(k_node, data_dict, transformer, flags,
                               hyper_params, trainer, k, None)
            else:
                start_fold(data_node, data_dict, transformer, flags,
                           hyper_params, trainer, folds, None)

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Beispiel #3
0
    def train(model,
              optimizer,
              gen_data,
              init_args,
              n_iters=5000,
              sim_data_node=None,
              epoch_ckpt=(2, 4.0),
              tb_writer=None,
              is_hsearch=False):
        tb_writer = None  # tb_writer()
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        terminate_training = False
        e_avg = ExpAverage(.01)
        num_batches = math.ceil(gen_data.file_len / gen_data.batch_size)
        n_epochs = math.ceil(n_iters / num_batches)
        grad_stats = GradStats(model, beta=0.)

        # learning rate decay schedulers
        # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01)

        # pred_loss functions
        criterion = nn.CrossEntropyLoss(
            ignore_index=gen_data.char2idx[gen_data.pad_symbol])
        # criterion = LabelSmoothing(gen_data.n_characters, gen_data.char2idx[gen_data.pad_symbol], 0.1)

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="train_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        train_scores_lst = []
        train_scores_node = DataNode(label="train_score",
                                     data=train_scores_lst)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                train_loss_node, train_scores_node, metrics_node, scores_node
            ]

        try:
            # Main training loop
            tb_idx = {'train': Count(), 'val': Count(), 'test': Count()}
            for epoch in range(n_epochs):
                if terminate_training:
                    print("Terminating training...")
                    break
                for phase in ["train"]:  # , "val" if is_hsearch else "test"]:
                    if phase == "train":
                        print("Training....")
                        # Training mode
                        model.train()
                    else:
                        print("Validation...")
                        # Evaluation mode
                        model.eval()

                    epoch_losses = []
                    epoch_scores = []

                    # Iterate through mini-batches
                    # with TBMeanTracker(tb_writer, 10) as tracker:
                    with grad_stats:
                        for b in trange(0,
                                        num_batches,
                                        desc=f'{phase} in progress...'):
                            inputs, labels = gen_data.random_training_set()

                            optimizer.zero_grad()

                            # track history if only in train
                            with torch.set_grad_enabled(phase == "train"):
                                # forward propagation
                                stack = init_stack_2d(inputs.shape[0],
                                                      inputs.shape[1],
                                                      init_args['stack_depth'],
                                                      init_args['stack_width'],
                                                      dvc=init_args['device'])
                                predictions = model([inputs, stack])
                                predictions = predictions.permute(1, 0, -1)
                                predictions = predictions.contiguous().view(
                                    -1, predictions.shape[-1])
                                labels = labels.contiguous().view(-1)

                                # calculate loss
                                loss = criterion(predictions, labels)

                            # fail fast
                            if str(loss.item()) == "nan":
                                terminate_training = True
                                break

                            # metrics
                            eval_dict = {}
                            score = GpmtPretrain.evaluate(
                                eval_dict, predictions, labels)

                            # TBoard info
                            # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet())
                            # tracker.track("%s/score" % phase, score, tb_idx[phase].i)
                            # for k in eval_dict:
                            #     tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i)

                            if phase == "train":
                                # backward pass
                                loss.backward()
                                optimizer.step()

                                # for epoch stats
                                epoch_losses.append(loss.item())

                                # for sim data resource
                                train_scores_lst.append(score)
                                loss_lst.append(loss.item())

                                print(
                                    "\t{}: Epoch={}/{}, batch={}/{}, "
                                    "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}"
                                    .format(
                                        time_since(start), epoch + 1,
                                        n_epochs, b + 1, num_batches,
                                        loss.item(), eval_dict['accuracy'],
                                        generate_smiles(generator=model,
                                                        gen_data=gen_data,
                                                        init_args=init_args,
                                                        num_samples=1,
                                                        gen_type='trans')))
                            else:
                                # for epoch stats
                                epoch_scores.append(score)

                                # for sim data resource
                                scores_lst.append(score)
                                for m in eval_dict:
                                    if m in metrics_dict:
                                        metrics_dict[m].append(eval_dict[m])
                                    else:
                                        metrics_dict[m] = [eval_dict[m]]

                                print("\nEpoch={}/{}, batch={}/{}, "
                                      "evaluation results= {}, accuracy={}".
                                      format(epoch + 1, n_epochs, b + 1,
                                             num_batches, eval_dict, score))
                    # End of mini=batch iterations.

                    if phase == "train":
                        ep_loss = np.nanmean(epoch_losses)
                        e_avg.update(ep_loss)
                        if epoch % (epoch_ckpt[0] - 1) == 0 and epoch > 0:
                            if e_avg.value > epoch_ckpt[1]:
                                terminate_training = True
                        print(
                            "\nPhase: {}, avg task pred_loss={:.4f}, ".format(
                                phase, np.nanmean(epoch_losses)))
                        # scheduler.step()
                    else:
                        mean_score = np.mean(epoch_scores)
                        if best_score < mean_score:
                            best_score = mean_score
                            best_model_wts = copy.deepcopy(model.state_dict())
                            best_epoch = epoch
        except RuntimeError as e:
            print(str(e))

        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        try:
            model.load_state_dict(best_model_wts)
        except RuntimeError as e:
            print(str(e))
        return {'model': model, 'score': best_score, 'epoch': best_epoch}
Beispiel #4
0
    def train(model,
              optimizer,
              gen_data,
              rnn_args,
              n_iters=5000,
              sim_data_node=None,
              epoch_ckpt=(1, 2.0),
              tb_writer=None,
              is_hsearch=False):
        tb_writer = None  # tb_writer()
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        terminate_training = False
        e_avg = ExpAverage(.01)
        num_batches = math.ceil(gen_data.file_len / gen_data.batch_size)
        n_epochs = math.ceil(n_iters / num_batches)
        grad_stats = GradStats(model, beta=0.)

        # learning rate decay schedulers
        # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01)

        # pred_loss functions
        criterion = nn.CrossEntropyLoss(
            ignore_index=gen_data.char2idx[gen_data.pad_symbol])

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="train_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        train_scores_lst = []
        train_scores_node = DataNode(label="train_score",
                                     data=train_scores_lst)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                train_loss_node, train_scores_node, metrics_node, scores_node
            ]

        try:
            # Main training loop
            tb_idx = {'train': Count(), 'val': Count(), 'test': Count()}
            epoch_losses = []
            epoch_scores = []
            for epoch in range(6):
                phase = 'train'

                # Iterate through mini-batches
                # with TBMeanTracker(tb_writer, 10) as tracker:
                with grad_stats:
                    for b in trange(0,
                                    num_batches,
                                    desc=f'{phase} in progress...'):
                        inputs, labels = gen_data.random_training_set()
                        batch_size, seq_len = inputs.shape[:2]
                        optimizer.zero_grad()

                        # track history if only in train
                        with torch.set_grad_enabled(phase == "train"):
                            # Create hidden states for each layer
                            hidden_states = []
                            for _ in range(rnn_args['num_layers']):
                                hidden = init_hidden(
                                    num_layers=1,
                                    batch_size=batch_size,
                                    hidden_size=rnn_args['hidden_size'],
                                    num_dir=rnn_args['num_dir'],
                                    dvc=rnn_args['device'])
                                if rnn_args['has_cell']:
                                    cell = init_cell(
                                        num_layers=1,
                                        batch_size=batch_size,
                                        hidden_size=rnn_args['hidden_size'],
                                        num_dir=rnn_args['num_dir'],
                                        dvc=rnn_args['device'])
                                else:
                                    cell = None
                                if rnn_args['has_stack']:
                                    stack = init_stack(batch_size,
                                                       rnn_args['stack_width'],
                                                       rnn_args['stack_depth'],
                                                       dvc=rnn_args['device'])
                                else:
                                    stack = None
                                hidden_states.append((hidden, cell, stack))
                            # forward propagation
                            outputs = model([inputs] + hidden_states)
                            predictions = outputs[0]
                            predictions = predictions.permute(1, 0, -1)
                            predictions = predictions.contiguous().view(
                                -1, predictions.shape[-1])
                            labels = labels.contiguous().view(-1)

                            # calculate loss
                            loss = criterion(predictions, labels)

                        # metrics
                        eval_dict = {}
                        score = IreleasePretrain.evaluate(
                            eval_dict, predictions, labels)

                        # TBoard info
                        # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet())
                        # tracker.track("%s/score" % phase, score, tb_idx[phase].i)
                        # for k in eval_dict:
                        #     tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i)

                        # backward pass
                        loss.backward()
                        optimizer.step()

                        # for epoch stats
                        epoch_losses.append(loss.item())

                        # for sim data resource
                        train_scores_lst.append(score)
                        loss_lst.append(loss.item())

                        # for epoch stats
                        epoch_scores.append(score)

                        print("\t{}: Epoch={}/{}, batch={}/{}, "
                              "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}".
                              format(
                                  time_since(start),
                                  epoch + 1, n_epochs, b + 1, num_batches,
                                  loss.item(), eval_dict['accuracy'],
                                  generate_smiles(generator=model,
                                                  gen_data=gen_data,
                                                  init_args=rnn_args,
                                                  num_samples=1)))
                    IreleasePretrain.save_model(
                        model,
                        './model_dir/',
                        name=f'irelease-pretrained_stack-rnn_gru_'
                        f'{date_label}_epoch_{epoch}')
                # End of mini=batch iterations.
        except RuntimeError as e:
            print(str(e))

        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        return {
            'model': model,
            'score': round(np.mean(epoch_scores), 3),
            'epoch': n_epochs
        }
Beispiel #5
0
    def evaluate_model(data,
                       metrics,
                       expert_model_dir,
                       sim_data_node=None,
                       k=-1):
        start = time.time()

        assert (os.path.isdir(expert_model_dir)
                ), 'Expert predictor(s) should be in a dedicated folder'

        # sub-nodes of sim data resource
        loss_lst = []
        loss_node = DataNode(label="loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="score", data=scores_lst)
        predicted_vals = []
        true_vals = []
        model_preds_node = DataNode(label="predictions",
                                    data={
                                        "y_true": true_vals,
                                        "y_pred": predicted_vals
                                    })

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                loss_node, metrics_node, scores_node, model_preds_node
            ]

        model_files = os.listdir(expert_model_dir)
        loaded = False
        for i, m in enumerate(model_files):
            m_path = os.path.join(expert_model_dir, m)
            if 'transformer' in m:
                with open(m_path, 'rb') as f:
                    transformer = joblib.load(f)
            suffix = m.split('_')[-1].split(
                '.'
            )[0]  # fold label should be a suffix e.g. xxx_k0.mod, xxx_k1.mod, etc.
            if suffix == f'k{k}' and not loaded:
                with open(os.path.join(expert_model_dir, m), 'rb') as f:
                    predictor = joblib.load(f)
                    loaded = True

        if not loaded:
            return None

        dmatrix_eval = xgb.DMatrix(data=data['test'].X,
                                   label=data['test'].y.reshape(-1, ))

        # evaluation
        y_hat = predictor.predict(dmatrix_eval).reshape(-1, )
        y_true = dmatrix_eval.get_label().reshape(-1, )
        eval_dict = {}
        score = XGBExpert.evaluate(eval_dict,
                                   transformer.inverse_transform(y_true),
                                   transformer.inverse_transform(y_hat),
                                   metrics)
        scores_lst.append(score)
        for m in eval_dict:
            if m in metrics_dict:
                metrics_dict[m].append(float(eval_dict[m]))
            else:
                metrics_dict[m] = [float(eval_dict[m])]

        print('Evaluation completed: score={}, metrics={}, time{}'.format(
            score, eval_dict, time_since(start)))