Exemple #1
0
def train(config, save_path):
    train, test, meta_data = datasets(dataset=config['dataset'],
                                      batch_size=config['batch_size'],
                                      augmented=config['augmented'],
                                      preprocessing='center',
                                      seed=config['seed'])

    pytorch_model_builder = models.__dict__[config['model']]
    pytorch_model = pytorch_model_builder(**config.get('model_kwargs', {}))
    summary(pytorch_model)
    loss_function = torch.nn.MSELoss()  # Because logsoftmax. Be careful!
    optimizer = torch.optim.SGD(pytorch_model.parameters(), lr=config['lr'])
    model = Model(pytorch_model, optimizer, loss_function, [acc])

    callbacks = []
    callbacks.append(LRSchedule(lr_schedule=config['lr_schedule']))

    # Call training loop (warning: using test as valid. Please don't do this)
    steps_per_epoch = int(len(meta_data['x_train']) / config['batch_size'])
    training_loop(model=model,
                  train=train,
                  valid=test,
                  save_path=save_path,
                  n_epochs=config['n_epochs'],
                  save_freq=1,
                  reload=config['reload'],
                  use_tb=True,
                  steps_per_epoch=steps_per_epoch,
                  custom_callbacks=callbacks)
Exemple #2
0
def evaluate(save_path, checkpoint_name="weights.ckpt"):
    # Load config
    config = parse_gin_config(os.path.join(save_path, "config.gin"))
    gin.parse_config_files_and_bindings([os.path.join(os.path.join(save_path, "config.gin"))], bindings=[""])

    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=config['train.batch_size'], seed=config['train.seed'])

    # Load model (a bit hacky, but necessary because load_from_checkpoint seems to fail)
    ckpt_path = os.path.join(save_path, checkpoint_name)
    ckpt = torch.load(ckpt_path)
    model = models.__dict__[config['train.model']]()
    summary(model)
    pl_module = SupervisedLearning(model, lr=0.0)
    pl_module.load_state_dict(ckpt['state_dict'])

    # NOTE: This fails, probably due to a bug in Pytorch Lightning. The above is manually doing something similar
    # ckpt_path = os.path.join(save_path, checkpoint_name)
    # pl_module = SupervisedLearning.load_from_checkpoint(ckpt_path)

    trainer = pl.Trainer()
    results, = trainer.test(model=pl_module, test_dataloaders=test, ckpt_path=ckpt_path)
    logger.info(results)
    with open(os.path.join(save_path, "eval_results_{}.json".format(checkpoint_name)), "w") as f:
        json.dump(results, f)
Exemple #3
0
def train(save_path, model, lr=0.1, batch_size=128, callbacks=[]):
    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=batch_size)

    # Create dynamically model
    model = models.__dict__[model]()
    summary(model)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Create dynamically callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            callbacks_constructed.append(clbk)

    # Pass everything to the training loop
    steps_per_epoch = (len(meta_data['x_train']) - 1) // batch_size + 1
    training_loop(model=model,
                  optimizer=optimizer,
                  loss_function=loss_function,
                  metrics=[acc],
                  train=train,
                  valid=test,
                  meta_data=meta_data,
                  steps_per_epoch=steps_per_epoch,
                  save_path=save_path,
                  config=_CONFIG,
                  use_tb=True,
                  custom_callbacks=callbacks_constructed)
Exemple #4
0
def train(save_path,
          model,
          batch_size=128,
          seed=777,
          callbacks=[],
          resume=True,
          evaluate=True):
    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=batch_size,
                                                seed=seed)

    # Create dynamically model
    model = models.__dict__[model]()
    summary(model)

    # Create dynamically callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            callbacks_constructed.append(clbk)

    if not resume and os.path.exists(os.path.join(save_path, "last.ckpt")):
        raise IOError(
            "Please clear folder before running or pass train.resume=True")

    # Create module and pass to trianing
    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(save_path, "weights"),
        verbose=True,
        save_last=True,  # For resumability
        monitor='valid_acc',
        mode='max')
    pl_module = supervised_training.SupervisedLearning(model,
                                                       meta_data=meta_data)
    trainer = training_loop(train,
                            valid,
                            pl_module=pl_module,
                            checkpoint_callback=checkpoint_callback,
                            callbacks=callbacks_constructed,
                            save_path=save_path)

    # Evaluate
    if evaluate:
        results, = trainer.test(test_dataloaders=test)
        logger.info(results)
        with open(os.path.join(save_path, "eval_results.json"), "w") as f:
            json.dump(results, f)
Exemple #5
0
def train(save_path,
          model,
          lr_splitting_by=None,
          lrs=None,
          wd=0,
          lr=0.1,
          batch_size=128,
          n_epochs=100,
          weights=None,
          fb_method=False,
          callbacks=[],
          optimizer='sgd',
          scheduler=None,
          freeze_all_but_this_layer=None,
          mode='train'):
    # Create dynamically dataset generators
    train, valid, test, meta_data = get_chexnet_covid(batch_size=batch_size)

    # Create dynamically model
    model = models.__dict__[model]()
    summary(model)

    loss_function = torch.nn.BCELoss()

    if freeze_all_but_this_layer is not None:
        # First freeze all layers
        logger.info("Freezing all layers")
        for i, parameter in enumerate(model.parameters()):
            parameter.requires_grad = False

        # Unfreeze layers that matches

        for i, (name, parameter) in enumerate(model.named_parameters()):
            if name.startswith(freeze_all_but_this_layer):
                parameter.requires_grad = True
                logger.info("Unfreezing {}: {}".format(name, parameter.shape))

    if optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
    elif optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=wd)

    if scheduler == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, n_epochs)

    if lr_splitting_by is not None:
        optimizer, _ = create_optimizer(optimizer, model, lr_splitting_by, lrs)

    # Create dynamically callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            print(name)
            callbacks_constructed.append(clbk)

    # Pass everything to the training loop
    if train is not None:
        steps_per_epoch = len(train)
    else:
        steps_per_epoch = None

    target_indice = None
    if fb_method:
        target_indice = weights.index(1) if 1 in weights else 0
    elif weights is not None:
        target_indice = 0

    if mode == 'train':
        assert train is not None, "please provide train data"
        assert valid is not None, "please provide validation data"
        training_loop(
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
            loss_function=loss_function,
            metrics=[acc_chexnet_covid],
            train=train,
            valid=valid,
            test=test,
            meta_data=meta_data,
            steps_per_epoch=steps_per_epoch,
            n_epochs=n_epochs,
            save_path=save_path,
            config=_CONFIG,
            use_tb=True,
            custom_callbacks=callbacks_constructed,
            fb_method=fb_method,
            target_indice=target_indice,
        )
    else:
        assert test is not None, "please provide test data for evaluation"
        evaluation_loop(
            model=model,
            optimizer=optimizer,
            loss_function=loss_function,
            metrics=[acc_chexnet_covid],
            test=test,
            meta_data=meta_data,
            save_path=save_path,
            config=_CONFIG,
            custom_callbacks=callbacks_constructed,
            target_indice=target_indice,
        )
Exemple #6
0
def train_megan(save_path: str,
                featurizer_key: str,
                learning_rate: float = 0.0001,
                train_samples_per_epoch: int = -1,
                valid_samples_per_epoch: int = -1,
                batch_size: int = 4,
                gen_lr_factor: float = 0.1,
                gen_lr_patience: int = 4,
                big_lr_epochs: int = -1,
                early_stopping: int = 16,
                start_epoch: int = 0,
                megan_warmup_epochs: int = 1,
                save_each_epoch: bool = False,
                max_n_epochs: int = 1000):
    """
    Train MEGAN model
    """
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    checkpoints_path = os.path.join(save_path, 'checkpoints')
    if save_each_epoch and not os.path.exists(checkpoints_path):
        os.makedirs(checkpoints_path)

    log_current_config()
    conf_path = os.path.join(save_path, 'config.gin')
    save_current_config(conf_path)

    model_path = os.path.join(save_path, 'model.pt')
    best_model_path = os.path.join(save_path, 'model_best.pt')

    summary_dir = 'summary'
    summary_dir = os.path.join(save_path, summary_dir)
    tf_callback = DumpTensorflowSummaries(
        save_path=summary_dir, step_multiplier=train_samples_per_epoch)

    dataset = get_dataset()
    featurizer = get_featurizer(featurizer_key)
    assert isinstance(featurizer, MeganTrainingSamplesFeaturizer)
    action_vocab = featurizer.get_actions_vocabulary(dataset.feat_dir)

    # copy featurizer dictionary files needed for using the model
    feat_dir = featurizer.dir(dataset.feat_dir)
    model_feat_dir = featurizer.dir(save_path)
    if not os.path.exists(model_feat_dir):
        os.makedirs(model_feat_dir)
    copyfile(get_actions_vocab_path(feat_dir),
             get_actions_vocab_path(model_feat_dir))
    copyfile(get_prop2oh_vocab_path(feat_dir),
             get_prop2oh_vocab_path(model_feat_dir))

    logger.info("Creating model...")
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model = Megan(n_atom_actions=action_vocab['n_atom_actions'],
                  n_bond_actions=action_vocab['n_bond_actions'],
                  prop2oh=action_vocab['prop2oh']).to(device)
    summary(model)

    logger.info("Loading data...")
    data_dict = {}

    logger.info(f"Training for maximum of {max_n_epochs} epochs...")

    start_learning_rate = learning_rate
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    def set_lr(lr: float):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    def run_batch(ind: np.ndarray, train: bool) -> dict:
        if train:
            optimizer.zero_grad()

        batch_ind = np.random.choice(ind, size=batch_size, replace=False)
        batch_metrics = {}
        batch = generate_batch(batch_ind,
                               data_dict['metadata'],
                               featurizer,
                               data_dict['data'],
                               action_vocab=action_vocab)

        batch_result = model(batch)
        actions = batch_result['output']

        target, n_steps = batch['target'], batch['n_steps']
        n_total_steps = torch.sum(n_steps)

        y_max_pred_prob, y_pred = torch.max(actions, dim=-1)
        y_val, y_true = torch.max(target, dim=-1)
        y_val_one = y_val == 1
        is_hard = batch['is_hard']

        weight = torch.ones_like(is_hard)
        avg_weight = torch.mean(weight.float(), axis=-1)

        weight = weight * y_val_one
        weight = weight.unsqueeze(-1).expand(*actions.shape)
        target_one = target == 1
        eps = 1e-09

        loss = -torch.log2(actions + ~target_one + eps) * target_one * weight
        loss = torch.sum(loss, dim=-1)
        path_losses = torch.sum(loss, dim=-1) / (avg_weight * 16)

        min_losses = []
        # for each reaction, use the minimum loss for each possible path as the loss to optimize
        path_i = 0
        for n_paths in batch['n_paths']:
            path_loss = torch.min(path_losses[path_i:path_i + n_paths])
            min_losses.append(path_loss.unsqueeze(-1))
            path_i += n_paths
        min_losses = torch.cat(min_losses)

        loss = torch.mean(min_losses)

        if torch.isinf(loss):
            raise ValueError(
                'Infinite loss (correct action has predicted probability=0.0)')

        if loss != loss:  # this is only true for NaN in pytorch
            raise ValueError('NaN loss')

        # skip accuracy metrics if there are no positive samples in batch
        correct = ((y_pred == y_true) & y_val_one).float()

        step_correct = torch.sum(correct) / n_total_steps
        batch_metrics['step_acc'] = step_correct.cpu().detach().numpy()

        total_hard = torch.sum(is_hard)
        if total_hard > 0:
            hard_correct = torch.sum(correct * is_hard) / total_hard
            batch_metrics['step_acc_hard'] = hard_correct.cpu().detach().numpy(
            )

        is_easy = (1.0 - is_hard) * y_val_one

        total_easy = torch.sum(is_easy)
        if total_easy > 0:
            easy_correct = torch.sum(correct * is_easy) / total_easy
            batch_metrics['step_acc_easy'] = easy_correct.cpu().detach().numpy(
            )

        all_correct = torch.sum(correct, dim=-1)
        all_correct = all_correct == n_steps
        acc = []
        path_i = 0
        for n_paths in batch['n_paths']:
            corr = any(all_correct[i] == 1
                       for i in range(path_i, path_i + n_paths))
            acc.append(corr)
            path_i += n_paths
        if len(acc) > 0:
            batch_metrics['acc'] = np.mean(acc)

        if train:
            loss.backward()
            optimizer.step()

        batch_metrics['loss'] = loss.cpu().detach().numpy()
        return batch_metrics

    def get_lr():
        for param_group in optimizer.param_groups:
            return param_group['lr']

    def run_epoch(set_key: str,
                  i_ep: int,
                  all_ind: np.ndarray,
                  train: bool,
                  batches_per_epoch: int,
                  lr_step: float = 0.0):
        torch.cuda.empty_cache()
        if train:
            model.train()
        else:
            model.eval()

        metrics = {}
        counts = Counter()

        for batch_i in tqdm(range(batches_per_epoch),
                            desc=f'{save_path} {set_key} epoch {i_ep + 1}'):
            if lr_step > 0:
                set_lr(get_lr() + lr_step)
            try:
                batch_metrics = run_batch(all_ind, train)
                for k, v in batch_metrics.items():
                    if k not in metrics:
                        metrics[k] = 0
                    metrics[k] += v
                    counts[k] += 1
            except AssertionError as e:
                # batch skipped because of zero loss
                logger.debug(f"Exception while running batch: {str(e)}")
            except Exception as e:
                logger.warning(f"Exception while running batch: {str(e)}")
                raise e

        metrics = dict((k, v / counts[k]) for k, v in metrics.items())
        str_metrics = ', '.join("{:s}={:.4f}".format(k, v)
                                for k, v in metrics.items())
        logger.info(f'{set_key} epoch {i_ep + 1}: {str_metrics}')

        if train:
            save_weights(model_path,
                         model,
                         optimizer,
                         epoch=i_ep,
                         lr=get_lr(),
                         no_progress=no_progress)

            if save_each_epoch:
                model_epoch_path = os.path.join(
                    checkpoints_path,
                    f'model_{(i_ep + 1) * train_samples_per_epoch}.pt')
                save_weights(model_epoch_path,
                             model,
                             optimizer,
                             epoch=i_ep,
                             lr=get_lr())
        return metrics

    best_acc = 0
    no_progress = 0

    if os.path.exists(model_path):
        checkpoint = load_state_dict(model_path)
        if 'epoch' in checkpoint:
            start_epoch = checkpoint['epoch'] + 1
        logger.info("Resuming training after {} epochs".format(start_epoch))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        if 'lr' in checkpoint:
            learning_rate = checkpoint['lr']
            start_learning_rate = learning_rate
            logger.info(
                "Resuming training with LR={:f} epochs".format(learning_rate))
            set_lr(learning_rate)
        if 'valid_acc' in checkpoint:
            best_acc = checkpoint['valid_acc']
            logger.info(f"Best acc so far: {best_acc}")

    megan_warmup_epochs = max(megan_warmup_epochs - start_epoch, 0)
    if megan_warmup_epochs > 0:
        learning_rate = 0.0
        set_lr(learning_rate)

    no_progress = 0
    no_progress_lr = 0

    logger.info('Loading data')
    loaded_data = featurizer.load(dataset.feat_dir)
    chunk_metadata = loaded_data['reaction_metadata']
    data_dict['data'] = loaded_data
    data_dict['metadata'] = chunk_metadata
    data_dict['mean_n_steps'] = np.mean(data_dict['metadata']['n_samples'])

    metadata = data_dict['metadata']
    if 'remapped' in metadata:
        train_ind = (metadata['is_train'] == 1) & (metadata['remapped'])
        valid_ind = (metadata['is_train'] == 0) & (metadata['remapped'])
    else:
        train_ind = metadata['is_train'] == 1
        valid_ind = metadata['is_train'] == 0

    if 'path_i' in metadata:
        train_ind = train_ind & (metadata['path_i'] == 0)
        valid_ind = valid_ind & (metadata['path_i'] == 0)

    train_ind = np.argwhere(train_ind).flatten()
    valid_ind = np.argwhere(valid_ind).flatten()

    logger.info(
        f"Training on chunk of {len(train_ind)} training samples and {len(valid_ind)} valid samples"
    )
    if train_samples_per_epoch == -1:
        train_samples_per_epoch = len(train_ind)
    if valid_samples_per_epoch == -1:
        valid_samples_per_epoch = len(valid_ind)
    train_batches_per_epoch = int(np.ceil(train_samples_per_epoch /
                                          batch_size))
    valid_batches_per_epoch = int(np.ceil(valid_samples_per_epoch /
                                          batch_size))

    logger.info(
        f'Starting training on epoch {start_epoch + 1} with Learning Rate={learning_rate} '
        f'({megan_warmup_epochs} warmup epochs)')

    for epoch_i in range(start_epoch, max_n_epochs):
        if epoch_i == megan_warmup_epochs:
            set_lr(start_learning_rate)
            logger.info(
                f'Learning rate set to {start_learning_rate} after {megan_warmup_epochs} warmup epochs'
            )

        if big_lr_epochs != -1 and epoch_i == big_lr_epochs:
            learning_rate *= gen_lr_factor
            no_progress = 0
            no_progress_lr = 0
            set_lr(learning_rate)
            logger.info(f'Changing Learning Rate to {learning_rate}')

        if megan_warmup_epochs > 0:
            warmup_lr_step = start_learning_rate / (train_batches_per_epoch *
                                                    megan_warmup_epochs)
        else:
            warmup_lr_step = 0

        learning_rate = get_lr()
        train_metrics = run_epoch(
            'train',
            epoch_i,
            train_ind,
            True,
            train_batches_per_epoch,
            lr_step=warmup_lr_step if epoch_i < megan_warmup_epochs else 0.0)
        with torch.no_grad():
            valid_metrics = run_epoch('valid', epoch_i, valid_ind, False,
                                      valid_batches_per_epoch)

        all_metrics = {}
        for key, val in train_metrics.items():
            all_metrics[f'train_{key}'] = val
        for key, val in valid_metrics.items():
            all_metrics[f'valid_{key}'] = val

        all_metrics['lr'] = learning_rate
        tf_callback.on_epoch_end(epoch_i + 1, all_metrics)

        valid_acc = valid_metrics['acc']
        if valid_acc > best_acc:
            logger.info(
                f'Saving best model from epoch {epoch_i + 1} to {best_model_path} (acc={valid_acc})'
            )
            save_weights(best_model_path,
                         model,
                         optimizer,
                         epoch=epoch_i,
                         lr=learning_rate,
                         valid_acc=valid_acc)

            best_acc = valid_acc
            no_progress = 0
            no_progress_lr = 0
        else:
            no_progress += 1
            no_progress_lr += 1

        if big_lr_epochs == -1 or epoch_i >= big_lr_epochs:
            if no_progress_lr > gen_lr_patience:
                learning_rate *= gen_lr_factor
                logger.info(f'Changing Learning Rate to {learning_rate}')
                set_lr(learning_rate)
                no_progress_lr = 0

            if no_progress > early_stopping:
                logger.info(f'Early stopping after {epoch_i + 1} epochs')
                break

    logger.info("Experiment finished!")
def _plot_predictions_logistic(data,
                               predictors,
                               properties=dict(),
                               obs_site_name='obs'):
    """Plots predictions of given pyro predictors."""
    fig, axs = plt.subplots(
        nrows=len(predictors),
        ncols=2,
        figsize=(15, 6 * len(predictors)),
        sharey=True,
        sharex=True,
        squeeze=False,
    )

    x_col = properties.get('x', None)
    x_label = properties.get('x_label', 'x')
    y_label = properties.get('y_label', 'y')

    y_labels = properties.get('y_labels', dict())
    positive_y = y_labels.get(1, 'Positive class')
    negative_y = y_labels.get(0, 'Negative class')

    cat_col = properties.get('category', None)
    cat_name = properties.get('category_labels', dict())
    positive_cat = cat_name.get(1, 'Positive')
    negative_cat = cat_name.get(0, 'Negative')

    x = data['x']
    y = data['y']

    for ax, (predictor_name, predictor) in zip(axs, predictors.items()):
        for category in (0, 1):
            category_idx = x[:, cat_col] == category
            x_data = x[category_idx, x_col]
            y_data = y[category_idx]

            samples = predictor(x)
            pred_summary = summary(samples)
            y_pred = pred_summary[obs_site_name]

            xplot, ym, y_true = list(
                zip(*sorted(zip(x_data, y_pred["mean"], y_data),
                            key=lambda r: r[0])))

            y_positive_idx = (
                torch.Tensor(y_true) == 1.).numpy().astype('bool')
            ax[category].plot(np.array(xplot)[y_positive_idx],
                              np.array(ym)[y_positive_idx],
                              marker='o',
                              ms=10,
                              linestyle='',
                              alpha=1,
                              color='green',
                              label=positive_y)
            ax[category].plot(np.array(xplot)[~y_positive_idx],
                              np.array(ym)[~y_positive_idx],
                              marker='o',
                              ms=10,
                              linestyle='',
                              alpha=1,
                              color='red',
                              label=negative_y)
            ax[category].set(
                xlabel=x_label,
                ylabel=y_label,
                title=positive_cat if category == 1 else negative_cat,
            )

        ax[0].set_ylabel(f"{predictor_name}\n{y_label}")

    handles, labels = ax[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper right')
    plt.show()
def _plot_predictions_linear(data,
                             predictors,
                             properties=dict(),
                             obs_site_name='obs'):
    """Plots predictions of given pyro predictors."""
    fig, axs = plt.subplots(
        nrows=len(predictors),
        ncols=2,
        figsize=(15, 6 * len(predictors)),
        sharey=True,
        sharex=True,
        squeeze=False,
    )

    x_col = properties.get('x', None)
    x_label = properties.get('x_label', 'x')
    y_label = properties.get('y_label', 'y')

    cat_col = properties.get('category', None)
    cat_name = properties.get('category_labels', dict())
    positive_cat = cat_name.get(1, 'Positive')
    negative_cat = cat_name.get(0, 'Negative')

    x = data['x']
    y = data['y']

    for ax, (predictor_name, predictor) in zip(axs, predictors.items()):
        for category in (0, 1):
            category_idx = x[:, cat_col] == category
            x_data = x[category_idx, x_col]
            y_data = y[category_idx]

            samples = predictor(x)
            pred_summary = summary(samples)
            y_pred = pred_summary[obs_site_name]
            mu = pred_summary["_RETURN"]

            y_pred = {
                k: v if len(v.shape) == 1 else v.squeeze(0)
                for k, v in y_pred.items()
            }

            mu = {
                k: v if len(v.shape) == 1 else v.squeeze(0)
                for k, v in mu.items()
            }

            xplot, mum, mu5, mu95, ym, y5, y95, y_true = list(
                zip(*sorted(
                    zip(x_data, mu["mean"], mu["5%"], mu["95%"],
                        y_pred["mean"], y_pred["5%"], y_pred["95%"], y_data),
                    key=lambda r: r[0])))
            ax[category].fill_between(
                xplot,
                y5,
                y95,
                color='orange',
                alpha=0.5,
                label="Posterior predictive distribution with 90% CI")
            ax[category].fill_between(xplot,
                                      mu5,
                                      mu95,
                                      color='cornflowerblue',
                                      alpha=0.8,
                                      label="Regression line 90% CI")
            ax[category].plot(xplot, mum, color="red", label="Mean output")

            ax[category].plot(xplot,
                              y_true,
                              marker='o',
                              ms=4,
                              linestyle='',
                              alpha=1,
                              color='green',
                              label="True values")
            ax[category].set(
                xlabel=x_label,
                ylabel=y_label,
                title=positive_cat if category == 1 else negative_cat,
            )

        ax[0].set_ylabel(f"{predictor_name}\n{y_label}")

    handles, labels = ax[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper right')
    plt.show()