Esempio n. 1
0
    def evaluate_model(model,
                       gen_data,
                       rnn_args,
                       sim_data_node=None,
                       num_smiles=1000):
        start = time.time()
        model.eval()

        # Samples SMILES
        samples = []
        step = 100
        count = 0
        for _ in range(int(num_smiles / step)):
            samples.extend(
                generate_smiles(generator=model,
                                gen_data=gen_data,
                                init_args=rnn_args,
                                num_samples=step,
                                is_train=False,
                                verbose=True,
                                max_len=smiles_max_len))
            count += step
        res = num_smiles - count
        if res > 0:
            samples.extend(
                generate_smiles(generator=model,
                                gen_data=gen_data,
                                init_args=rnn_args,
                                num_samples=res,
                                is_train=False,
                                verbose=True,
                                max_len=smiles_max_len))
        smiles, valid_vec = canonical_smiles(samples)
        valid_smiles = []
        invalid_smiles = []
        for idx, sm in enumerate(smiles):
            if len(sm) > 0:
                valid_smiles.append(sm)
            else:
                invalid_smiles.append(samples[idx])
        v = len(valid_smiles)
        valid_smiles = list(set(valid_smiles))
        print(
            f'Percentage of valid SMILES = {float(len(valid_smiles)) / float(len(samples)):.2f}, '
            f'Num. samples = {len(samples)}, Num. valid = {len(valid_smiles)}, '
            f'Num. requested = {num_smiles}, Num. dups = {v - len(valid_smiles)}'
        )

        # sub-nodes of sim data resource
        smiles_node = DataNode(label="valid_smiles", data=valid_smiles)
        invalid_smiles_node = DataNode(label='invalid_smiles',
                                       data=invalid_smiles)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [smiles_node, invalid_smiles_node]

        duration = time.time() - start
        print('\nModel evaluation duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
Esempio n. 2
0
    def test_something(self):
        sim_data = DataNode(label='node_label', metadata='test the metadata')
        nodes_list = []
        sim_data.data = nodes_list
        data_node = DataNode(label="seed_0")
        nodes_list.append(data_node)

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        sim_data_node = data_node
        if sim_data_node:
            sim_data_node.data = [train_loss_node, metrics_node, scores_node]
        sim_data.to_json(path="./")
        self.assertIsNotNone(sim_data.to_json_str())
Esempio n. 3
0
    def fit(self, model_dir=None, model_name=None, seed=None, verbose=True):
        iter_data_list = []
        if self.data_node is not None:
            self.data_node.data = iter_data_list

        # Random hyperparameter search.
        for i in range(self.alg_args.n_calls):
            folds_data = []
            iter_data_node = DataNode(label="iteration-%d" % i, data=folds_data)
            iter_data_list.append(iter_data_node)

            # Get hyperparameters.
            hparams = self._sample_params()
            self.stats.current_param = ParamInstance(hparams)

            with NumpyRandomSeed(seed):
                for fold in range(self.num_folds):
                    if verbose:
                        print("\nFold {}, param search iteration {}, hparams={}".format(fold, i, hparams))

                    k_node = DataNode(label="Random_search_fold-%d" % fold)
                    folds_data.append(k_node)

                    if self.data_provider_fn is not None:
                        data = self.data_provider_fn(fold, **self.data_args)
                        if isinstance(data, dict):
                            data = data.values()
                    else:
                        data = {}

                    # initialize model, dataloaders, and other elements.
                    init_objs = self.initializer_fn(hparams, *data, **self.init_args)

                    # model training
                    self.train_args["sim_data_node"] = k_node
                    if isinstance(init_objs, dict):
                        results = self.train_fn(init_objs, **self.train_args)
                    else:
                        results = self.train_fn(*init_objs, **self.train_args)
                    best_model, score, epoch = results['model'], results['score'], results['epoch']
                    self.stats.current_param.add_score(score)

                    # avoid nan scores in search. TODO(bbrighttaer): replace this hack with a natural approach.
                    if str(score) == "nan":
                        score = -1e5

                    # save model
                    if model_dir is not None and model_name is not None:
                        self.save_model_fn(best_model, model_dir,
                                           "{}_{}-{}-fold{}-{}-{}-{}-{}-{:.4f}".format(self.dataset_label, self.sim,
                                                                                       self.stats.current_param.id,
                                                                                       fold, i, model_name,
                                                                                       self.split_label, epoch,
                                                                                       score))
            if verbose:
                print("Random search iter = {}: params = {}".format(i, self.stats.current_param))

            # move current hparams to records
            self.stats.update_records()
            self.stats.to_csv(self.results_file)
        return self.stats
Esempio n. 4
0
def main(flags):
    mode = 'eval' if flags.eval else 'train'
    sim_label = f'expert_rnn_reg_model_{mode}'

    print(
        '--------------------------------------------------------------------------------'
    )
    print(f'{device}\n{sim_label}\tData file: {flags.data_file}')
    print(
        '--------------------------------------------------------------------------------'
    )

    hparam_search = None

    sim_data = DataNode(label=sim_label,
                        metadata=json.dumps({
                            'date': date_label,
                            'seeds': seeds,
                            'mode': mode,
                            'sim_label': sim_label,
                            'num_folds': flags.folds
                        }))
    nodes_list = []
    sim_data.data = nodes_list

    # Load the data
    data_dict, transformer = load_smiles_data(flags.data_file,
                                              flags.cv,
                                              normalize_y=True,
                                              k=flags.folds,
                                              shuffle=5,
                                              create_val=False,
                                              train_size=.8)

    for seed in seeds:
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        # ensure reproducibility
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        trainer = ExpertTrainer()
        folds = flags.folds if flags.cv else 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'cv': flags.cv, 'data': data_dict}
            extra_train_args = {
                'n_iterations': 5000,
                'transformer': transformer,
                'is_hsearch': True,
                'tb_writer': None
            }
            hparams_conf = hparams_config()
            if hparam_search is None:
                search_alg = {
                    'random_search': RandomSearch,
                    'bayopt_search': BayesianOptSearch
                }.get(flags.hparam_search_alg, BayesianOptSearch)
                search_args = GPMinArgs(n_calls=10, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=folds,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='random',
                    sim_label=sim_label,
                    dataset_label=os.path.split(flags.data_file)[1],
                    results_file=
                    f'{flags.hparam_search_alg}_{sim_label}_{date_label}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("Best params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_params(flags)
            # Initialize the model and other related entities for training.
            if flags.cv:
                folds_data = []
                data_node.data = folds_data
                data_node.label = data_node.label + 'cv'
                for k in range(folds):
                    k_node = DataNode(label="fold-%d" % k)
                    folds_data.append(k_node)
                    start_fold(k_node, data_dict, transformer, flags,
                               hyper_params, trainer, k, None)
            else:
                start_fold(data_node, data_dict, transformer, flags,
                           hyper_params, trainer, folds, None)

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Esempio n. 5
0
    def evaluate_model(init_dict, expert_model_dir, sim_data_node=None, k=-1):
        start = time.time()
        predictor = init_dict['model']
        data_loaders = init_dict['data_loaders']
        metrics = init_dict['metrics']
        criterion = torch.nn.MSELoss()

        assert (os.path.isdir(expert_model_dir)
                ), 'Expert predictor(s) should be in a dedicated folder'

        # sub-nodes of sim data resource
        loss_lst = []
        loss_node = DataNode(label="loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="score", data=scores_lst)
        predicted_vals = []
        true_vals = []
        model_preds_node = DataNode(label="predictions",
                                    data={
                                        "y_true": true_vals,
                                        "y_pred": predicted_vals
                                    })

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                loss_node, metrics_node, scores_node, model_preds_node
            ]

        model_files = os.listdir(expert_model_dir)
        loaded = False
        for i, m in enumerate(model_files):
            m_path = os.path.join(expert_model_dir, m)
            if 'transformer' in m:
                with open(m_path, 'rb') as f:
                    transformer = joblib.load(f)
            suffix = m.split('_')[-1].split(
                '.'
            )[0]  # fold label should be a suffix e.g. xxx_k0.mod, xxx_k1.mod, etc.
            if suffix == f'k{k}' and not loaded:
                predictor.load_state_dict(
                    torch.load(m_path, torch.device(device)))
                print(f'Loaded model: {m_path}')
                loaded = True
        predictor = predictor.eval()

        if not loaded:
            return None

        for batch in tqdm(data_loaders['test'], desc=f'Evaluating...'):
            batch = np.array(batch)
            x = batch[:, 0]
            y_true = batch[:, 1]
            with torch.set_grad_enabled(False):
                y_true = torch.from_numpy(
                    y_true.reshape(-1, 1).astype(np.float)).float().to(device)
                y_pred = predictor(x)
                loss = criterion(y_pred, y_true)
                loss_lst.append(loss.item())

            # Perform evaluation using the given metrics
            eval_dict = {}
            score = ExpertTrainer.evaluate(
                eval_dict,
                transformer.inverse_transform(y_true.cpu().detach().numpy()),
                transformer.inverse_transform(y_pred.cpu().detach().numpy()),
                metrics)
            scores_lst.append(score)

            predicted_vals.extend(
                np_to_plot_data(
                    transformer.inverse_transform(
                        y_pred.cpu().detach().numpy())))
            true_vals.extend(
                np_to_plot_data(
                    transformer.inverse_transform(
                        y_true.cpu().detach().numpy())))
            for m in eval_dict:
                if m in metrics_dict:
                    metrics_dict[m].append(float(eval_dict[m]))
                else:
                    metrics_dict[m] = [float(eval_dict[m])]

        print(f'Evaluation completed. Elapsed time: {time_since(start)}')
Esempio n. 6
0
    def objective(**bopt_params):
        count.inc()
        folds_data = []
        iter_data_node = DataNode(label="iteration-%d" % count.i,
                                  data=folds_data)
        iter_data_list.append(iter_data_node)

        # Get hyperparameters.
        hparams = _to_hparams_dict(bopt_params=bopt_params,
                                   params_config=alg.config)
        alg.stats.current_param = ParamInstance(hparams)

        with NumpyRandomSeed(alg.alg_args.random_state):
            for fold in range(alg.num_folds):
                k_node = DataNode(label="BayOpt_search_fold-%d" % fold)
                folds_data.append(k_node)

                # Get data
                if alg.data_provider_fn is not None:
                    data = alg.data_provider_fn(fold, **alg.data_args)
                    if isinstance(data, dict):
                        data = list(data.values())
                else:
                    data = {}

                if verbose:
                    print("\nFold {}, param search iteration {}, hparams={}".
                          format(fold, count.i, hparams))

                # initialize model, dataloaders, and other elements.
                init_objs = alg.initializer_fn(hparams, *data, **alg.init_args)

                # start of training with selected parameters
                alg.train_args["sim_data_node"] = k_node
                if isinstance(init_objs, dict):
                    results = alg.train_fn(init_objs, **alg.train_args)
                else:
                    results = alg.train_fn(*init_objs, **alg.train_args)
                best_model, score, epoch = results['model'], results[
                    'score'], results['epoch']
                alg.stats.current_param.add_score(score)
                # end of training

                # save model
                if model_dir is not None and model_name is not None:
                    alg.save_model_fn(
                        best_model, model_dir,
                        "{}_{}-{}-fold{}-{}-{}-{}-{}-{:.4f}".format(
                            alg.dataset_label, alg.sim,
                            alg.stats.current_param.id, fold, count.i,
                            model_name, alg.split_label, epoch, score))

        if verbose:
            print("BayOpt hparams search iter = {}: params = {}".format(
                count.i, alg.stats.current_param))

        # get the score of this hyperparameter set
        score = alg.stats.current_param.score

        # avoid nan scores in search.
        if str(score) == "nan":
            score = -1e5

        # move current hparams to records
        alg.stats.update_records()
        alg.stats.to_csv(alg.results_file + '_' + alg.alg_args.type())

        # we want to maximize the score so negate it to invert minimization by skopt
        return -score
Esempio n. 7
0
                        help="If true, a saved model is loaded and evaluated")
    parser.add_argument("--eval_model_name",
                        default=None,
                        type=str,
                        help="The filename of the model to be loaded from the directory specified in --model_dir")
    args = parser.parse_args()
    flags = Flags()
    args_dict = args.__dict__
    for arg in args_dict:
        setattr(flags, arg, args_dict[arg])
    device = torch.device("cuda:0" if flags.cuda and torch.cuda.is_available() else "cpu")
    flags['device'] = device

    # Simulation data resource tree
    sim_label = "Soek_DQN_demo"
    sim_data = DataNode(label=sim_label)

    trainer = DQNTraining()
    k = 1
    if flags.hparam_search:
        print("Hyperparameter search enabled: {}".format(flags.hparam_search_alg))

        # arguments to callables
        extra_init_args = {}
        extra_data_args = {}
        extra_train_args = {}

        hparams_conf = get_hparam_config(flags)

        search_alg = {"random_search": RandomSearch,
                      "bayopt_search": BayesianOptSearch}.get(flags.hparam_search_alg, BayesianOptSearch)
Esempio n. 8
0
    def train(model,
              optimizer,
              gen_data,
              rnn_args,
              n_iters=5000,
              sim_data_node=None,
              epoch_ckpt=(1, 2.0),
              tb_writer=None,
              is_hsearch=False):
        tb_writer = None  # tb_writer()
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        terminate_training = False
        e_avg = ExpAverage(.01)
        num_batches = math.ceil(gen_data.file_len / gen_data.batch_size)
        n_epochs = math.ceil(n_iters / num_batches)
        grad_stats = GradStats(model, beta=0.)

        # learning rate decay schedulers
        # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01)

        # pred_loss functions
        criterion = nn.CrossEntropyLoss(
            ignore_index=gen_data.char2idx[gen_data.pad_symbol])

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="train_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        train_scores_lst = []
        train_scores_node = DataNode(label="train_score",
                                     data=train_scores_lst)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                train_loss_node, train_scores_node, metrics_node, scores_node
            ]

        try:
            # Main training loop
            tb_idx = {'train': Count(), 'val': Count(), 'test': Count()}
            epoch_losses = []
            epoch_scores = []
            for epoch in range(6):
                phase = 'train'

                # Iterate through mini-batches
                # with TBMeanTracker(tb_writer, 10) as tracker:
                with grad_stats:
                    for b in trange(0,
                                    num_batches,
                                    desc=f'{phase} in progress...'):
                        inputs, labels = gen_data.random_training_set()
                        batch_size, seq_len = inputs.shape[:2]
                        optimizer.zero_grad()

                        # track history if only in train
                        with torch.set_grad_enabled(phase == "train"):
                            # Create hidden states for each layer
                            hidden_states = []
                            for _ in range(rnn_args['num_layers']):
                                hidden = init_hidden(
                                    num_layers=1,
                                    batch_size=batch_size,
                                    hidden_size=rnn_args['hidden_size'],
                                    num_dir=rnn_args['num_dir'],
                                    dvc=rnn_args['device'])
                                if rnn_args['has_cell']:
                                    cell = init_cell(
                                        num_layers=1,
                                        batch_size=batch_size,
                                        hidden_size=rnn_args['hidden_size'],
                                        num_dir=rnn_args['num_dir'],
                                        dvc=rnn_args['device'])
                                else:
                                    cell = None
                                if rnn_args['has_stack']:
                                    stack = init_stack(batch_size,
                                                       rnn_args['stack_width'],
                                                       rnn_args['stack_depth'],
                                                       dvc=rnn_args['device'])
                                else:
                                    stack = None
                                hidden_states.append((hidden, cell, stack))
                            # forward propagation
                            outputs = model([inputs] + hidden_states)
                            predictions = outputs[0]
                            predictions = predictions.permute(1, 0, -1)
                            predictions = predictions.contiguous().view(
                                -1, predictions.shape[-1])
                            labels = labels.contiguous().view(-1)

                            # calculate loss
                            loss = criterion(predictions, labels)

                        # metrics
                        eval_dict = {}
                        score = IreleasePretrain.evaluate(
                            eval_dict, predictions, labels)

                        # TBoard info
                        # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet())
                        # tracker.track("%s/score" % phase, score, tb_idx[phase].i)
                        # for k in eval_dict:
                        #     tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i)

                        # backward pass
                        loss.backward()
                        optimizer.step()

                        # for epoch stats
                        epoch_losses.append(loss.item())

                        # for sim data resource
                        train_scores_lst.append(score)
                        loss_lst.append(loss.item())

                        # for epoch stats
                        epoch_scores.append(score)

                        print("\t{}: Epoch={}/{}, batch={}/{}, "
                              "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}".
                              format(
                                  time_since(start),
                                  epoch + 1, n_epochs, b + 1, num_batches,
                                  loss.item(), eval_dict['accuracy'],
                                  generate_smiles(generator=model,
                                                  gen_data=gen_data,
                                                  init_args=rnn_args,
                                                  num_samples=1)))
                    IreleasePretrain.save_model(
                        model,
                        './model_dir/',
                        name=f'irelease-pretrained_stack-rnn_gru_'
                        f'{date_label}_epoch_{epoch}')
                # End of mini=batch iterations.
        except RuntimeError as e:
            print(str(e))

        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        return {
            'model': model,
            'score': round(np.mean(epoch_scores), 3),
            'epoch': n_epochs
        }
Esempio n. 9
0
    def train(model,
              optimizer,
              data_loaders,
              metrics,
              n_iters=5000,
              sim_data_node=None):
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        n_epochs = n_iters // len(data_loaders["train"])
        scheduler = sch.StepLR(optimizer, step_size=30, gamma=0.01)
        criterion = nn.CrossEntropyLoss()
        first_epoch_loss = None

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [train_loss_node, metrics_node, scores_node]

        # Main training loop
        for epoch in range(n_epochs):
            for phase in ["train", "val"]:
                if phase == "train":
                    print("Training....")
                    # Training mode
                    model.train()
                else:
                    print("Validation...")
                    # Evaluation mode
                    model.eval()

                epoch_losses = []
                epoch_scores = []

                # Iterate through mini-batches
                i = 0
                for X, y in tqdm(data_loaders[phase]):
                    X = X.view(X.shape[0], -1)
                    if CUDA:
                        X = X.cuda()
                        y = y.cuda()

                    optimizer.zero_grad()

                    # forward propagation
                    # track history if only in train
                    with torch.set_grad_enabled(phase == "train"):
                        y_pred = model(X)
                        loss = criterion(y_pred, y.squeeze())

                    if phase == "train":
                        print("\tEpoch={}/{}, batch={}/{}, loss={:.4f}".format(
                            epoch + 1, n_epochs, i + 1,
                            len(data_loaders[phase]), loss.item()))
                        # for epoch stats
                        epoch_losses.append(loss.item())

                        # for sim data resource
                        loss_lst.append(loss.item())

                        # optimization ops
                        loss.backward()
                        optimizer.step()
                    else:
                        if str(loss.item()
                               ) != "nan":  # useful in hyperparameter search
                            eval_dict = {}
                            score = Demo.evaluate(eval_dict, y, y_pred,
                                                  metrics)
                            # for epoch stats
                            epoch_scores.append(score)

                            # for sim data resource
                            scores_lst.append(score)
                            for m in eval_dict:
                                if m in metrics_dict:
                                    metrics_dict[m].append(eval_dict[m])
                                else:
                                    metrics_dict[m] = [eval_dict[m]]

                            print("\nEpoch={}/{}, batch={}/{}, "
                                  "evaluation results= {}, score={}".format(
                                      epoch + 1, n_epochs, i + 1,
                                      len(data_loaders[phase]), eval_dict,
                                      score))

                    i += 1
                # End of mini=batch iterations.

                if phase == "train":
                    # Adjust the learning rate.
                    scheduler.step()

                    ep_loss = np.nanmean(epoch_losses)
                    if first_epoch_loss is None:
                        first_epoch_loss = ep_loss
                    print("\nPhase: {}, avg task loss={:.4f}, ".format(
                        phase, ep_loss))
                else:
                    mean_score = np.mean(epoch_scores)
                    if best_score < mean_score:
                        best_score = mean_score
                        best_model_wts = copy.deepcopy(model.state_dict())
                        best_epoch = epoch

        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        model.load_state_dict(best_model_wts)
        return {'model': model, 'score': best_score, 'epoch': best_epoch}
Esempio n. 10
0
    def train(model,
              optimizer,
              gen_data,
              init_args,
              n_iters=5000,
              sim_data_node=None,
              epoch_ckpt=(2, 4.0),
              tb_writer=None,
              is_hsearch=False):
        tb_writer = None  # tb_writer()
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        terminate_training = False
        e_avg = ExpAverage(.01)
        num_batches = math.ceil(gen_data.file_len / gen_data.batch_size)
        n_epochs = math.ceil(n_iters / num_batches)
        grad_stats = GradStats(model, beta=0.)

        # learning rate decay schedulers
        # scheduler = sch.StepLR(optimizer, step_size=500, gamma=0.01)

        # pred_loss functions
        criterion = nn.CrossEntropyLoss(
            ignore_index=gen_data.char2idx[gen_data.pad_symbol])
        # criterion = LabelSmoothing(gen_data.n_characters, gen_data.char2idx[gen_data.pad_symbol], 0.1)

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="train_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        train_scores_lst = []
        train_scores_node = DataNode(label="train_score",
                                     data=train_scores_lst)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [
                train_loss_node, train_scores_node, metrics_node, scores_node
            ]

        try:
            # Main training loop
            tb_idx = {'train': Count(), 'val': Count(), 'test': Count()}
            for epoch in range(n_epochs):
                if terminate_training:
                    print("Terminating training...")
                    break
                for phase in ["train"]:  # , "val" if is_hsearch else "test"]:
                    if phase == "train":
                        print("Training....")
                        # Training mode
                        model.train()
                    else:
                        print("Validation...")
                        # Evaluation mode
                        model.eval()

                    epoch_losses = []
                    epoch_scores = []

                    # Iterate through mini-batches
                    # with TBMeanTracker(tb_writer, 10) as tracker:
                    with grad_stats:
                        for b in trange(0,
                                        num_batches,
                                        desc=f'{phase} in progress...'):
                            inputs, labels = gen_data.random_training_set()

                            optimizer.zero_grad()

                            # track history if only in train
                            with torch.set_grad_enabled(phase == "train"):
                                # forward propagation
                                stack = init_stack_2d(inputs.shape[0],
                                                      inputs.shape[1],
                                                      init_args['stack_depth'],
                                                      init_args['stack_width'],
                                                      dvc=init_args['device'])
                                predictions = model([inputs, stack])
                                predictions = predictions.permute(1, 0, -1)
                                predictions = predictions.contiguous().view(
                                    -1, predictions.shape[-1])
                                labels = labels.contiguous().view(-1)

                                # calculate loss
                                loss = criterion(predictions, labels)

                            # fail fast
                            if str(loss.item()) == "nan":
                                terminate_training = True
                                break

                            # metrics
                            eval_dict = {}
                            score = GpmtPretrain.evaluate(
                                eval_dict, predictions, labels)

                            # TBoard info
                            # tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet())
                            # tracker.track("%s/score" % phase, score, tb_idx[phase].i)
                            # for k in eval_dict:
                            #     tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i)

                            if phase == "train":
                                # backward pass
                                loss.backward()
                                optimizer.step()

                                # for epoch stats
                                epoch_losses.append(loss.item())

                                # for sim data resource
                                train_scores_lst.append(score)
                                loss_lst.append(loss.item())

                                print(
                                    "\t{}: Epoch={}/{}, batch={}/{}, "
                                    "pred_loss={:.4f}, accuracy: {:.2f}, sample: {}"
                                    .format(
                                        time_since(start), epoch + 1,
                                        n_epochs, b + 1, num_batches,
                                        loss.item(), eval_dict['accuracy'],
                                        generate_smiles(generator=model,
                                                        gen_data=gen_data,
                                                        init_args=init_args,
                                                        num_samples=1,
                                                        gen_type='trans')))
                            else:
                                # for epoch stats
                                epoch_scores.append(score)

                                # for sim data resource
                                scores_lst.append(score)
                                for m in eval_dict:
                                    if m in metrics_dict:
                                        metrics_dict[m].append(eval_dict[m])
                                    else:
                                        metrics_dict[m] = [eval_dict[m]]

                                print("\nEpoch={}/{}, batch={}/{}, "
                                      "evaluation results= {}, accuracy={}".
                                      format(epoch + 1, n_epochs, b + 1,
                                             num_batches, eval_dict, score))
                    # End of mini=batch iterations.

                    if phase == "train":
                        ep_loss = np.nanmean(epoch_losses)
                        e_avg.update(ep_loss)
                        if epoch % (epoch_ckpt[0] - 1) == 0 and epoch > 0:
                            if e_avg.value > epoch_ckpt[1]:
                                terminate_training = True
                        print(
                            "\nPhase: {}, avg task pred_loss={:.4f}, ".format(
                                phase, np.nanmean(epoch_losses)))
                        # scheduler.step()
                    else:
                        mean_score = np.mean(epoch_scores)
                        if best_score < mean_score:
                            best_score = mean_score
                            best_model_wts = copy.deepcopy(model.state_dict())
                            best_epoch = epoch
        except RuntimeError as e:
            print(str(e))

        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        try:
            model.load_state_dict(best_model_wts)
        except RuntimeError as e:
            print(str(e))
        return {'model': model, 'score': best_score, 'epoch': best_epoch}
Esempio n. 11
0
    parser.add_argument(
        "--eval_model_name",
        default=None,
        type=str,
        help=
        "The filename of the model to be loaded from the directory specified in --model_dir"
    )
    args = parser.parse_args()
    flags = Flags()
    args_dict = args.__dict__
    for arg in args_dict:
        setattr(flags, arg, args_dict[arg])

    # Simulation data resource tree
    sim_label = "Soek_pytorch_mnist_demo"
    sim_data = DataNode(label=sim_label,
                        metadata='put any extra information here')

    trainer = Demo()
    k = 1
    if flags.hparam_search:
        print("Hyperparameter search enabled: {}".format(
            flags.hparam_search_alg))

        # arguments to callables
        extra_init_args = {}
        extra_data_args = {}
        extra_train_args = {"n_iters": 1000}

        hparams_conf = get_hparam_config(flags)

        search_alg = {
Esempio n. 12
0
    def train(init_args, model_path=None, agent_net_name=None, reward_net_name=None, n_episodes=500,
              sim_data_node=None, tb_writer=None, is_hsearch=False, n_to_generate=200, learn_irl=True):
        tb_writer = tb_writer()
        agent = init_args['agent']
        probs_reg = init_args['probs_reg']
        drl_algorithm = init_args['drl_alg']
        irl_algorithm = init_args['irl_alg']
        reward_func = init_args['reward_func']
        gamma = init_args['gamma']
        episodes_to_train = init_args['episodes_to_train']
        expert_model = init_args['expert_model']
        demo_data_gen = init_args['demo_data_gen']
        unbiased_data_gen = init_args['unbiased_data_gen']
        best_model_wts = None
        exp_avg = ExpAverage(beta=0.6)
        best_score = -1.

        # load pretrained model
        if model_path and agent_net_name and reward_net_name:
            try:
                print('Loading pretrained model...')
                weights = IReLeaSE.load_model(model_path, agent_net_name)
                agent.model.load_state_dict(weights)
                print('Pretrained model loaded successfully!')
                reward_func.model.load_state_dict(IReLeaSE.load_model(model_path, reward_net_name))
                print('Reward model loaded successfully!')
            except:
                print('Pretrained model could not be loaded. Terminating prematurely.')
                return {'model': [drl_algorithm.actor,
                                  drl_algorithm.critic,
                                  irl_algorithm.model],
                        'score': round(best_score, 3),
                        'epoch': -1}

        start = time.time()

        # Begin simulation and training
        total_rewards = []
        trajectories = []
        done_episodes = 0
        batch_episodes = 0
        exp_trajectories = []
        step_idx = 0

        # collect mean predictions
        unbiased_smiles_mean_pred, biased_smiles_mean_pred, gen_smiles_mean_pred = [], [], []
        unbiased_smiles_mean_pred_data_node = DataNode('baseline_mean_vals', unbiased_smiles_mean_pred)
        biased_smiles_mean_pred_data_node = DataNode('biased_mean_vals', biased_smiles_mean_pred)
        gen_smiles_mean_pred_data_node = DataNode('gen_mean_vals', gen_smiles_mean_pred)
        if sim_data_node:
            sim_data_node.data = [unbiased_smiles_mean_pred_data_node,
                                  biased_smiles_mean_pred_data_node,
                                  gen_smiles_mean_pred_data_node]

        env = MoleculeEnv(actions=get_default_tokens(), reward_func=reward_func)
        exp_source = ExperienceSourceFirstLast(env, agent, gamma, steps_count=1, steps_delta=1)
        traj_prob = 1.
        exp_traj = []

        demo_score = np.mean(expert_model(demo_data_gen.random_training_set_smiles(1000))[1])
        baseline_score = np.mean(expert_model(unbiased_data_gen.random_training_set_smiles(1000))[1])
        # with contextlib.suppress(RuntimeError if is_hsearch else DummyException):
        try:
            with TBMeanTracker(tb_writer, 1) as tracker:
                for step_idx, exp in tqdm(enumerate(exp_source)):
                    exp_traj.append(exp)
                    traj_prob *= probs_reg.get(list(exp.state), exp.action)

                    if exp.last_state is None:
                        trajectories.append(Trajectory(terminal_state=EpisodeStep(exp.state, exp.action),
                                                       traj_prob=traj_prob))
                        exp_trajectories.append(exp_traj)  # for ExperienceFirstLast objects
                        exp_traj = []
                        traj_prob = 1.
                        probs_reg.clear()
                        batch_episodes += 1

                    new_rewards = exp_source.pop_total_rewards()
                    if new_rewards:
                        reward = new_rewards[0]
                        done_episodes += 1
                        total_rewards.append(reward)
                        mean_rewards = float(np.mean(total_rewards[-100:]))
                        tracker.track('mean_total_reward', mean_rewards, step_idx)
                        tracker.track('total_reward', reward, step_idx)
                        print(f'Time = {time_since(start)}, step = {step_idx}, reward = {reward:6.2f}, '
                              f'mean_100 = {mean_rewards:6.2f}, episodes = {done_episodes}')
                        with torch.set_grad_enabled(False):
                            samples = generate_smiles(drl_algorithm.model, demo_data_gen, init_args['gen_args'],
                                                      num_samples=n_to_generate)
                        predictions = expert_model(samples)[1]
                        mean_preds = np.nanmean(predictions)
                        if math.isnan(mean_preds) or math.isinf(mean_preds):
                            print(f'mean preds is {mean_preds}, terminating')
                            # best_score = -1.
                            break
                        try:
                            percentage_in_threshold = np.sum((predictions <= demo_score)) / len(predictions)
                        except:
                            percentage_in_threshold = 0.
                        per_valid = len(predictions) / n_to_generate
                        if per_valid < 0.2:
                            print(f'Percentage of valid SMILES is = {per_valid}. Terminating...')
                            # best_score = -1.
                            break
                        print(f'Mean value of predictions = {mean_preds}, % of valid SMILES = {per_valid}')
                        unbiased_smiles_mean_pred.append(float(baseline_score))
                        biased_smiles_mean_pred.append(float(demo_score))
                        gen_smiles_mean_pred.append(float(mean_preds))
                        tb_writer.add_scalars('qsar_score', {'sampled': mean_preds,
                                                             'baseline': baseline_score,
                                                             'demo_data': demo_score}, step_idx)
                        tb_writer.add_scalars('SMILES stats', {'per. of valid': per_valid,
                                                               'per. in drug-like region': percentage_in_threshold},
                                              step_idx)
                        eval_dict = {}
                        eval_score = IReLeaSE.evaluate(eval_dict, samples,
                                                       demo_data_gen.random_training_set_smiles(1000))
                        for k in eval_dict:
                            tracker.track(k, eval_dict[k], step_idx)
                        avg_len = np.nanmean([len(s) for s in samples])
                        tracker.track('Average SMILES length', np.nanmean([len(s) for s in samples]), step_idx)
                        d_penalty = eval_score < .5
                        s_penalty = avg_len < 20
                        diff = demo_score - mean_preds
                        # score = 3 * np.exp(diff) + np.log(per_valid + 1e-5) - s_penalty * np.exp(
                        #     diff) - d_penalty * np.exp(diff)
                        score = np.exp(diff)
                        # score = np.exp(diff) + np.mean([np.exp(per_valid), np.exp(percentage_in_threshold)])
                        if math.isnan(score) or math.isinf(score):
                            # best_score = -1.
                            print(f'Score is {score}, terminating.')
                            break
                        tracker.track('score', score, step_idx)
                        exp_avg.update(score)
                        if is_hsearch:
                            best_score = exp_avg.value
                        if exp_avg.value > best_score:
                            best_model_wts = [copy.deepcopy(drl_algorithm.actor.state_dict()),
                                              copy.deepcopy(drl_algorithm.critic.state_dict()),
                                              copy.deepcopy(irl_algorithm.model.state_dict())]
                            best_score = exp_avg.value
                        if best_score >= np.exp(0.):
                            print(f'threshold reached, best score={mean_preds}, '
                                  f'threshold={demo_score}, training completed')
                            break
                        if done_episodes == n_episodes:
                            print('Training completed!')
                            break

                    if batch_episodes < episodes_to_train:
                        continue

                    # Train models
                    print('Fitting models...')
                    irl_loss = 0.
                    # irl_loss = irl_algorithm.fit(trajectories) if learn_irl else 0.
                    rl_loss = drl_algorithm.fit(exp_trajectories)
                    samples = generate_smiles(drl_algorithm.model, demo_data_gen, init_args['gen_args'],
                                              num_samples=3)
                    print(f'IRL loss = {irl_loss}, RL loss = {rl_loss}, samples = {samples}')
                    tracker.track('irl_loss', irl_loss, step_idx)
                    tracker.track('critic_loss', rl_loss[0], step_idx)
                    tracker.track('agent_loss', rl_loss[1], step_idx)

                    # Reset
                    batch_episodes = 0
                    trajectories.clear()
                    exp_trajectories.clear()
        except Exception as e:
            print(str(e))
        if best_model_wts:
            drl_algorithm.actor.load_state_dict(best_model_wts[0])
            drl_algorithm.critic.load_state_dict(best_model_wts[1])
            irl_algorithm.model.load_state_dict(best_model_wts[2])
        duration = time.time() - start
        print('\nTraining duration: {:.0f}m {:.0f}s'.format(duration // 60, duration % 60))
        # if math.isinf(best_score) or math.isnan(best_score):
        #     best_score = -1.
        return {'model': [drl_algorithm.actor,
                          drl_algorithm.critic,
                          irl_algorithm.model],
                'score': round(best_score, 3),
                'epoch': done_episodes}
Esempio n. 13
0
def main(flags):
    sim_label = f'RNN_XEnt_Generator_Baseline_{flags.exp_type}'
    if flags.eval:
        sim_label += '_eval'
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_type,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    # For searching over multiple seeds
    hparam_search = None

    pretraining = flags.exp_type == 'pretraining'

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="irelease_tb_rnn_xent"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '--------------------------------------------------------------------------------'
        )
        print(
            f'{device}\n{sim_label}\tDemonstrations file: {flags.prior_data if pretraining else flags.demo_file}'
        )
        print(
            '--------------------------------------------------------------------------------'
        )

        trainer = RNNBaseline()
        k = 1
        if flags["hparam_search"]:
            print("Hyperparameter search enabled: {}".format(
                flags["hparam_search_alg"]))

            # arguments to callables
            extra_init_args = {}
            extra_data_args = {"flags": flags}
            extra_train_args = {
                "is_hsearch": True,
                "n_iters": 50000,
                "tb_writer": summary_writer_creator
            }

            hparams_conf = get_hparam_config(flags)
            if hparam_search is None:
                search_alg = {
                    "random_search": RandomSearch,
                    "bayopt_search": BayesianOptSearch
                }.get(flags["hparam_search_alg"], BayesianOptSearch)
                search_args = GPMinArgs(n_calls=20, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=1,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='',
                    sim_label=sim_label,
                    dataset_label='ChEMBL_SMILES',
                    results_file="{}_{}_gpmt_{}.csv".format(
                        flags["hparam_search_alg"], sim_label, date_label))

            stats = hparam_search.fit(model_dir="models",
                                      model_name='irelease')
            print(stats)
            print("Best params = {}".format(stats.best()))
        else:
            hyper_params = default_hparams(flags)
            data_gens = trainer.data_provider(k, flags)
            model, optimizer, rnn_args = trainer.initialize(
                hyper_params, data_gens['demo_data'],
                data_gens['unbiased_data'], data_gens['prior_data'])
            if flags.eval:
                load_model = trainer.load_model(flags.model_dir,
                                                flags.eval_model_name)
                model.load_state_dict(load_model)
                trainer.evaluate_model(model,
                                       data_gens['demo_data'],
                                       rnn_args,
                                       data_node,
                                       num_smiles=200)
            else:
                results = trainer.train(
                    generator=model,
                    optimizer=optimizer,
                    rnn_args=rnn_args,
                    n_iters=40000,
                    sim_data_node=data_node,
                    tb_writer=summary_writer_creator,
                    is_pretraining=pretraining,
                    pretrained_net_path=flags.model_dir,
                    pretrained_net_name=flags.pretrained_model)
                trainer.save_model(
                    results['model'],
                    flags.model_dir,
                    name=
                    f'rnn_xent_gen_baseline_{flags.exp_type}_{hyper_params["unit_type"]}_'
                    f'{date_label}_{results["score"]}_{results["epoch"]}_seed_{seed}'
                )

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
    def train(init_args,
              agent_net_path=None,
              agent_net_name=None,
              seed=0,
              n_episodes=500,
              sim_data_node=None,
              tb_writer=None,
              is_hsearch=False,
              n_to_generate=200,
              learn_irl=True,
              bias_mode='max'):
        tb_writer = tb_writer()
        agent = init_args['agent']
        probs_reg = init_args['probs_reg']
        drl_algorithm = init_args['drl_alg']
        irl_algorithm = init_args['irl_alg']
        reward_func = init_args['reward_func']
        gamma = init_args['gamma']
        episodes_to_train = init_args['episodes_to_train']
        expert_model = init_args['expert_model']
        demo_data_gen = init_args['demo_data_gen']
        unbiased_data_gen = init_args['unbiased_data_gen']
        best_model_wts = None
        best_score = 0.
        exp_avg = ExpAverage(beta=0.6)

        # load pretrained model
        if agent_net_path and agent_net_name:
            print('Loading pretrained model...')
            agent.model.load_state_dict(
                IReLeaSE.load_model(agent_net_path, agent_net_name))
            print('Pretrained model loaded successfully!')

        # collect mean predictions
        unbiased_smiles_mean_pred, biased_smiles_mean_pred, gen_smiles_mean_pred = [], [], []
        unbiased_smiles_mean_pred_data_node = DataNode(
            'baseline_mean_vals', unbiased_smiles_mean_pred)
        biased_smiles_mean_pred_data_node = DataNode('biased_mean_vals',
                                                     biased_smiles_mean_pred)
        gen_smiles_mean_pred_data_node = DataNode('gen_mean_vals',
                                                  gen_smiles_mean_pred)
        if sim_data_node:
            sim_data_node.data = [
                unbiased_smiles_mean_pred_data_node,
                biased_smiles_mean_pred_data_node,
                gen_smiles_mean_pred_data_node
            ]

        start = time.time()

        # Begin simulation and training
        total_rewards = []
        irl_trajectories = []
        done_episodes = 0
        batch_episodes = 0
        exp_trajectories = []

        env = MoleculeEnv(actions=get_default_tokens(),
                          reward_func=reward_func)
        exp_source = ExperienceSourceFirstLast(env,
                                               agent,
                                               gamma,
                                               steps_count=1,
                                               steps_delta=1)
        traj_prob = 1.
        exp_traj = []

        demo_score = np.mean(
            expert_model(demo_data_gen.random_training_set_smiles(1000))[1])
        baseline_score = np.mean(
            expert_model(
                unbiased_data_gen.random_training_set_smiles(1000))[1])
        with contextlib.suppress(Exception if is_hsearch else DummyException):
            with TBMeanTracker(tb_writer, 1) as tracker:
                for step_idx, exp in tqdm(enumerate(exp_source)):
                    exp_traj.append(exp)
                    traj_prob *= probs_reg.get(list(exp.state), exp.action)

                    if exp.last_state is None:
                        irl_trajectories.append(
                            Trajectory(terminal_state=EpisodeStep(
                                exp.state, exp.action),
                                       traj_prob=traj_prob))
                        exp_trajectories.append(
                            exp_traj)  # for ExperienceFirstLast objects
                        exp_traj = []
                        traj_prob = 1.
                        probs_reg.clear()
                        batch_episodes += 1

                    new_rewards = exp_source.pop_total_rewards()
                    if new_rewards:
                        reward = new_rewards[0]
                        done_episodes += 1
                        total_rewards.append(reward)
                        mean_rewards = float(np.mean(total_rewards[-100:]))
                        tracker.track('mean_total_reward', mean_rewards,
                                      step_idx)
                        tracker.track('total_reward', reward, step_idx)
                        print(
                            f'Time = {time_since(start)}, step = {step_idx}, reward = {reward:6.2f}, '
                            f'mean_100 = {mean_rewards:6.2f}, episodes = {done_episodes}'
                        )
                        with torch.set_grad_enabled(False):
                            samples = generate_smiles(
                                drl_algorithm.model,
                                demo_data_gen,
                                init_args['gen_args'],
                                num_samples=n_to_generate)
                        predictions = expert_model(samples)[1]
                        mean_preds = np.mean(predictions)
                        try:
                            percentage_in_threshold = np.sum(
                                (predictions >= 7.0)) / len(predictions)
                        except:
                            percentage_in_threshold = 0.
                        per_valid = len(predictions) / n_to_generate
                        print(
                            f'Mean value of predictions = {mean_preds}, '
                            f'% of valid SMILES = {per_valid}, '
                            f'% in drug-like region={percentage_in_threshold}')
                        unbiased_smiles_mean_pred.append(float(baseline_score))
                        biased_smiles_mean_pred.append(float(demo_score))
                        gen_smiles_mean_pred.append(float(mean_preds))
                        tb_writer.add_scalars(
                            'qsar_score', {
                                'sampled': mean_preds,
                                'baseline': baseline_score,
                                'demo_data': demo_score
                            }, step_idx)
                        tb_writer.add_scalars(
                            'SMILES stats', {
                                'per. of valid': per_valid,
                                'per. above threshold': percentage_in_threshold
                            }, step_idx)
                        eval_dict = {}
                        eval_score = IReLeaSE.evaluate(
                            eval_dict, samples,
                            demo_data_gen.random_training_set_smiles(1000))

                        for k in eval_dict:
                            tracker.track(k, eval_dict[k], step_idx)
                        tracker.track('Average SMILES length',
                                      np.nanmean([len(s) for s in samples]),
                                      step_idx)
                        if bias_mode == 'max':
                            diff = mean_preds - demo_score
                        else:
                            diff = demo_score - mean_preds
                        score = np.exp(diff)
                        exp_avg.update(score)
                        tracker.track('score', score, step_idx)
                        if exp_avg.value > best_score:
                            best_model_wts = [
                                copy.deepcopy(
                                    drl_algorithm.model.state_dict()),
                                copy.deepcopy(irl_algorithm.model.state_dict())
                            ]
                            best_score = exp_avg.value
                        if best_score >= np.exp(0.):
                            print(
                                f'threshold reached, best score={mean_preds}, '
                                f'threshold={demo_score}, training completed')
                            break
                        if done_episodes == n_episodes:
                            print('Training completed!')
                            break

                    if batch_episodes < episodes_to_train:
                        continue

                    # Train models
                    print('Fitting models...')
                    irl_stmt = ''
                    if learn_irl:
                        irl_loss = irl_algorithm.fit(irl_trajectories)
                        tracker.track('irl_loss', irl_loss, step_idx)
                        irl_stmt = f'IRL loss = {irl_loss}, '
                    rl_loss = drl_algorithm.fit(exp_trajectories)
                    samples = generate_smiles(drl_algorithm.model,
                                              demo_data_gen,
                                              init_args['gen_args'],
                                              num_samples=3)
                    print(
                        f'{irl_stmt}RL loss = {rl_loss}, samples = {samples}')
                    tracker.track('agent_loss', rl_loss, step_idx)

                    # Reset
                    batch_episodes = 0
                    irl_trajectories.clear()
                    exp_trajectories.clear()

        if best_model_wts:
            drl_algorithm.model.load_state_dict(best_model_wts[0])
            irl_algorithm.model.load_state_dict(best_model_wts[1])
        duration = time.time() - start
        print('\nTraining duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        return {
            'model': [drl_algorithm.model, irl_algorithm.model],
            'score': round(best_score, 3),
            'epoch': done_episodes
        }
Esempio n. 15
0
def main(flags):
    sim_label = flags.exp_name if flags.exp_name else 'Irelease-pretraining-Stack-RNN'
    if flags.eval:
        sim_label += '_eval'
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_name,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    # For searching over multiple seeds
    hparam_search = None

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="tb_gpmt"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '-------------------------------------------------------------------------------------------------'
        )
        print(
            f'Running on dataset: {flags.data_file}, experiment = {flags.exp_name}'
        )
        print(
            '-------------------------------------------------------------------------------------------------'
        )

        trainer = IreleasePretrain()
        k = 1
        if flags["hparam_search"]:
            print("Hyperparameter search enabled: {}".format(
                flags["hparam_search_alg"]))

            # arguments to callables
            extra_init_args = {}
            extra_data_args = {"flags": flags}
            extra_train_args = {
                "is_hsearch": True,
                "n_iters": 50000,
                "tb_writer": summary_writer_creator
            }

            hparams_conf = get_hparam_config(flags)
            if hparam_search is None:
                search_alg = {
                    "random_search": RandomSearch,
                    "bayopt_search": BayesianOptSearch
                }.get(flags["hparam_search_alg"], BayesianOptSearch)
                search_args = GPMinArgs(n_calls=20, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=1,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='',
                    sim_label=sim_label,
                    dataset_label='ChEMBL_SMILES',
                    results_file="{}_{}_gpmt_{}.csv".format(
                        flags["hparam_search_alg"], sim_label, date_label))

            stats = hparam_search.fit(model_dir="models",
                                      model_name='irelease')
            print(stats)
            print("Best params = {}".format(stats.best()))
        else:
            hyper_params = default_hparams(flags)
            model, optimizer, gen_data, rnn_args = trainer.initialize(
                hyper_params,
                gen_data=trainer.data_provider(k, flags)['train'])
            if flags.eval:
                load_model = trainer.load_model(flags.model_dir,
                                                flags.eval_model_name)
                model.load_state_dict(load_model)
                trainer.evaluate_model(model,
                                       gen_data,
                                       rnn_args,
                                       data_node,
                                       num_smiles=flags.num_smiles)
            else:
                if flags.init_model:
                    load_model = trainer.load_model(flags.model_dir,
                                                    flags.init_model)
                    model.load_state_dict(load_model)
                    print(
                        f'Model weights {flags.init_model} loaded successfully!'
                    )
                results = trainer.train(model=model,
                                        optimizer=optimizer,
                                        gen_data=gen_data,
                                        rnn_args=rnn_args,
                                        n_iters=1500000,
                                        sim_data_node=data_node,
                                        tb_writer=summary_writer_creator)
                trainer.save_model(
                    results['model'],
                    flags.model_dir,
                    name=
                    f'irelease-pretrained_stack-rnn_{hyper_params["unit_type"]}_'
                    f'{date_label}_{results["score"]}_{results["epoch"]}')

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
def main(flags):
    irl_lbl = 'no_irl' if flags.use_true_reward else 'with_irl'
    sim_label = flags.exp_name + '_min_IReLeaSE-REINFORCE_' + irl_lbl + (
        '_no_vflag' if flags.no_smiles_validity_flag else '')
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_name,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="irelease_tb"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '--------------------------------------------------------------------------------'
        )
        print(f'{device}\n{sim_label}\tDemonstrations file: {flags.demo_file}')
        print(
            '--------------------------------------------------------------------------------'
        )

        irelease = IReLeaSE()
        k = 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'flags': flags}
            extra_train_args = {
                'agent_net_path': flags.model_dir,
                'agent_net_name': flags.pretrained_model,
                'learn_irl': not flags.use_true_reward,
                'seed': seed,
                'n_episodes': 600,
                'is_hsearch': True,
                'tb_writer': summary_writer_creator
            }
            hparams_conf = get_hparam_config(flags)
            search_alg = {
                'random_search': RandomSearch,
                'bayopt_search': BayesianOptSearch
            }.get(flags.hparam_search_alg, BayesianOptSearch)
            search_args = GPMinArgs(n_calls=20, random_state=seed)
            hparam_search = search_alg(
                hparam_config=hparams_conf,
                num_folds=1,
                initializer=irelease.initialize,
                data_provider=irelease.data_provider,
                train_fn=irelease.train,
                save_model_fn=irelease.save_model,
                alg_args=search_args,
                init_args=extra_init_args,
                data_args=extra_data_args,
                train_args=extra_train_args,
                data_node=data_node,
                split_label='reinforce-rl',
                sim_label=sim_label,
                dataset_label=None,
                results_file=f'{flags.hparam_search_alg}_{sim_label}'
                f'_{date_label}_seed_{seed}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("\nBest params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_hparams(flags)
            data_gens = irelease.data_provider(k, flags)
            init_args = irelease.initialize(hyper_params,
                                            data_gens['demo_data'],
                                            data_gens['unbiased_data'],
                                            data_gens['prior_data'])
            results = irelease.train(init_args,
                                     flags.model_dir,
                                     flags.pretrained_model,
                                     seed,
                                     sim_data_node=data_node,
                                     n_episodes=600,
                                     bias_mode=flags.bias_mode,
                                     learn_irl=not flags.use_true_reward,
                                     tb_writer=summary_writer_creator)
            irelease.save_model(
                results['model'][0],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_agent_{date_label}_{results["score"]}_{results["epoch"]}'
            )
            irelease.save_model(
                results['model'][1],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_reward_net_{date_label}_{results["score"]}_{results["epoch"]}'
            )

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Esempio n. 17
0
    def train(generator,
              optimizer,
              rnn_args,
              pretrained_net_path=None,
              pretrained_net_name=None,
              n_iters=5000,
              sim_data_node=None,
              tb_writer=None,
              is_hsearch=False,
              is_pretraining=True,
              grad_clipping=5):
        expert_model = rnn_args['expert_model']
        tb_writer = tb_writer()
        best_model_wts = generator.state_dict()
        best_score = -1000
        best_epoch = -1
        demo_data_gen = rnn_args['demo_data_gen']
        unbiased_data_gen = rnn_args['unbiased_data_gen']
        prior_data_gen = rnn_args['prior_data_gen']
        score_exp_avg = ExpAverage(beta=0.6)
        exp_type = rnn_args['exp_type']

        if is_pretraining:
            num_batches = math.ceil(prior_data_gen.file_len /
                                    prior_data_gen.batch_size)
        else:
            num_batches = math.ceil(demo_data_gen.file_len /
                                    demo_data_gen.batch_size)
        n_epochs = math.ceil(n_iters / num_batches)
        grad_stats = GradStats(generator, beta=0.)

        # learning rate decay schedulers
        scheduler = sch.StepLR(optimizer, step_size=100, gamma=0.02)

        # pred_loss functions
        criterion = nn.CrossEntropyLoss(
            ignore_index=prior_data_gen.char2idx[prior_data_gen.pad_symbol])

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="train_loss", data=loss_lst)

        # collect mean predictions
        unbiased_smiles_mean_pred, biased_smiles_mean_pred, gen_smiles_mean_pred = [], [], []
        unbiased_smiles_mean_pred_data_node = DataNode(
            'baseline_mean_vals', unbiased_smiles_mean_pred)
        biased_smiles_mean_pred_data_node = DataNode('biased_mean_vals',
                                                     biased_smiles_mean_pred)
        gen_smiles_mean_pred_data_node = DataNode('gen_mean_vals',
                                                  gen_smiles_mean_pred)
        if sim_data_node:
            sim_data_node.data = [
                train_loss_node, unbiased_smiles_mean_pred_data_node,
                biased_smiles_mean_pred_data_node,
                gen_smiles_mean_pred_data_node
            ]

        # load pretrained model
        if pretrained_net_path and pretrained_net_name:
            print('Loading pretrained model...')
            weights = RNNBaseline.load_model(pretrained_net_path,
                                             pretrained_net_name)
            generator.load_state_dict(weights)
            print('Pretrained model loaded successfully!')

        start = time.time()
        try:
            demo_score = np.mean(
                expert_model(
                    demo_data_gen.random_training_set_smiles(1000))[1])
            baseline_score = np.mean(
                expert_model(
                    unbiased_data_gen.random_training_set_smiles(1000))[1])
            step_idx = Count()
            gen_data = prior_data_gen if is_pretraining else demo_data_gen
            with TBMeanTracker(tb_writer, 1) as tracker:
                mode = 'Pretraining' if is_pretraining else 'Fine tuning'
                n_epochs = 30
                for epoch in range(n_epochs):
                    epoch_losses = []
                    epoch_mean_preds = []
                    epoch_per_valid = []
                    with grad_stats:
                        for b in trange(
                                0,
                                num_batches,
                                desc=
                                f'Epoch {epoch + 1}/{n_epochs}, {mode} in progress...'
                        ):
                            inputs, labels = gen_data.random_training_set()
                            optimizer.zero_grad()

                            predictions = generator(inputs)[0]
                            predictions = predictions.permute(1, 0, -1)
                            predictions = predictions.contiguous().view(
                                -1, predictions.shape[-1])
                            labels = labels.contiguous().view(-1)

                            # calculate loss
                            loss = criterion(predictions, labels)
                            epoch_losses.append(loss.item())

                            # backward pass
                            loss.backward()
                            if grad_clipping:
                                torch.nn.utils.clip_grad_norm_(
                                    generator.parameters(), grad_clipping)
                            optimizer.step()
                            # scheduler.step()

                            # for sim data resource
                            n_to_generate = 200
                            with torch.set_grad_enabled(False):
                                samples = generate_smiles(
                                    generator,
                                    demo_data_gen,
                                    rnn_args,
                                    num_samples=n_to_generate,
                                    max_len=smiles_max_len)
                            samples_pred = expert_model(samples)[1]

                            # metrics
                            eval_dict = {}
                            eval_score = RNNBaseline.evaluate(
                                eval_dict, samples,
                                demo_data_gen.random_training_set_smiles(1000))
                            # TBoard info
                            tracker.track('loss', loss.item(),
                                          step_idx.IncAndGet())
                            for k in eval_dict:
                                tracker.track(f'{k}', eval_dict[k], step_idx.i)
                            mean_preds = np.mean(samples_pred)
                            epoch_mean_preds.append(mean_preds)
                            per_valid = len(samples_pred) / n_to_generate
                            epoch_per_valid.append(per_valid)
                            if exp_type == 'drd2':
                                per_qualified = float(
                                    len([v for v in samples_pred if v >= 0.8
                                         ])) / len(samples_pred)
                                score = mean_preds
                            elif exp_type == 'logp':
                                per_qualified = np.sum(
                                    (samples_pred >= 1.0)
                                    & (samples_pred < 5.0)) / len(samples_pred)
                                score = mean_preds
                            elif exp_type == 'jak2_max':
                                per_qualified = np.sum(
                                    (samples_pred >=
                                     demo_score)) / len(samples_pred)
                                diff = mean_preds - demo_score
                                score = np.exp(diff)
                            elif exp_type == 'jak2_min':
                                per_qualified = np.sum(
                                    (samples_pred <=
                                     demo_score)) / len(samples_pred)
                                diff = demo_score - mean_preds
                                score = np.exp(diff)
                            else:  # pretraining
                                score = per_valid  # -loss.item()
                                per_qualified = 0.
                            unbiased_smiles_mean_pred.append(
                                float(baseline_score))
                            biased_smiles_mean_pred.append(float(demo_score))
                            gen_smiles_mean_pred.append(float(mean_preds))
                            tb_writer.add_scalars(
                                'qsar_score', {
                                    'sampled': mean_preds,
                                    'baseline': baseline_score,
                                    'demo_data': demo_score
                                }, step_idx.i)
                            tb_writer.add_scalars(
                                'SMILES stats', {
                                    'per. of valid': per_valid,
                                    'per. of qualified': per_qualified
                                }, step_idx.i)
                            avg_len = np.nanmean([len(s) for s in samples])
                            tracker.track('Average SMILES length', avg_len,
                                          step_idx.i)

                            score_exp_avg.update(score)
                            if score_exp_avg.value > best_score:
                                best_model_wts = copy.deepcopy(
                                    generator.state_dict())
                                best_score = score_exp_avg.value
                                best_epoch = epoch

                            if step_idx.i > 0 and step_idx.i % 1000 == 0:
                                smiles = generate_smiles(
                                    generator=generator,
                                    gen_data=gen_data,
                                    init_args=rnn_args,
                                    num_samples=3,
                                    max_len=smiles_max_len)
                                print(f'Sample SMILES = {smiles}')
                        # End of mini=batch iterations.
                        print(
                            f'{time_since(start)}: Epoch {epoch + 1}/{n_epochs}, loss={np.mean(epoch_losses)},'
                            f'Mean value of predictions = {np.mean(epoch_mean_preds)}, '
                            f'% of valid SMILES = {np.mean(epoch_per_valid)}')

        except RuntimeError as e:
            print(str(e))

        duration = time.time() - start
        print('Model training duration: {:.0f}m {:.0f}s'.format(
            duration // 60, duration % 60))
        generator.load_state_dict(best_model_wts)
        return {
            'model': generator,
            'score': round(best_score, 3),
            'epoch': best_epoch
        }