コード例 #1
0
def test_zero_sample():
    rmse = RootMeanSquaredError()
    with pytest.raises(
            NotComputableError,
            match=
            r"MeanSquaredError must have at least one example before it can be computed"
    ):
        rmse.compute()
コード例 #2
0
 def __init__(self, column, title=None, metrics=None, figures=None, output_transform=lambda x: x):
     self.column = column
     self.title = title if title is not None else ''
     self.metrics = set(metrics if metrics is not None else LocalMetrics.METRICS)
     self.figures = set(figures if figures is not None else LocalMetrics.FIGURES)
     self._rmse = RootMeanSquaredError()
     self._pearson = PearsonR()
     self._per_model_pearson = Mean()
     self._hist = ScoreHistogram(title=title)
     super(LocalMetrics, self).__init__(output_transform=output_transform)
コード例 #3
0
def test_compute():
    rmse = RootMeanSquaredError()

    y_pred = torch.Tensor([[2.0], [-2.0]])
    y = torch.zeros(2)
    rmse.update((y_pred, y))
    assert rmse.compute() == 2.0

    rmse.reset()
    y_pred = torch.Tensor([[3.0], [-3.0]])
    y = torch.zeros(2)
    rmse.update((y_pred, y))
    assert rmse.compute() == 3.0
コード例 #4
0
def test_compute():

    rmse = RootMeanSquaredError()

    def _test(y_pred, y, batch_size):
        rmse.reset()
        if batch_size > 1:
            n_iters = y.shape[0] // batch_size + 1
            for i in range(n_iters):
                idx = i * batch_size
                rmse.update(
                    (y_pred[idx:idx + batch_size], y[idx:idx + batch_size]))
        else:
            rmse.update((y_pred, y))

        np_y = y.numpy().ravel()
        np_y_pred = y_pred.numpy().ravel()

        np_res = np.sqrt(
            np.power((np_y - np_y_pred), 2.0).sum() / np_y.shape[0])
        res = rmse.compute()

        assert isinstance(res, float)
        assert pytest.approx(res) == np_res

    def get_test_cases():

        test_cases = [
            (
                torch.empty(10, ).uniform_(0, 10),
                torch.empty(10, ).uniform_(0, 10),
                1,
            ),
            (torch.empty(10, 1).uniform_(-10,
                                         10), torch.empty(10,
                                                          1).uniform_(-10,
                                                                      10), 1),
            # updated batches
            (
                torch.empty(50, ).uniform_(0, 10),
                torch.empty(50).uniform_(0, 10),
                16,
            ),
            (torch.empty(50, 1).uniform_(-10,
                                         10), torch.empty(50,
                                                          1).uniform_(-10,
                                                                      10), 16),
        ]

        return test_cases

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        test_cases = get_test_cases()
        for y_pred, y, batch_size in test_cases:
            _test(y_pred, y, batch_size)
コード例 #5
0
    def _test(metric_device):
        engine = Engine(update)

        m = RootMeanSquaredError(device=metric_device)
        m.attach(engine, "rmse")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=1)

        assert "rmse" in engine.state.metrics
        res = engine.state.metrics["rmse"]

        y_preds_full = []
        for i in range(idist.get_world_size()):
            y_preds_full.append((i + 1) * torch.ones(offset))
        y_preds_full = torch.stack(y_preds_full).to(device).flatten()

        true_res = np.sqrt(np.mean(np.square((y_true - y_preds_full).cpu().numpy())))

        assert pytest.approx(res, rel=tol) == true_res
コード例 #6
0
def _test_distrib_itegration(device):
    import numpy as np
    import torch.distributed as dist

    from ignite.engine import Engine

    rank = dist.get_rank()
    n_iters = 100
    s = 50
    offset = n_iters * s

    y_true = torch.arange(0, offset * dist.get_world_size(), dtype=torch.float).to(
        device
    )
    y_preds = (rank + 1) * torch.ones(offset, dtype=torch.float).to(device)

    def update(engine, i):
        return (
            y_preds[i * s : (i + 1) * s],
            y_true[i * s + offset * rank : (i + 1) * s + offset * rank],
        )

    engine = Engine(update)

    m = RootMeanSquaredError(device=device)
    m.attach(engine, "rmse")

    data = list(range(n_iters))
    engine.run(data=data, max_epochs=1)

    assert "rmse" in engine.state.metrics
    res = engine.state.metrics["rmse"]

    y_preds_full = []
    for i in range(dist.get_world_size()):
        y_preds_full.append((i + 1) * torch.ones(offset))
    y_preds_full = torch.stack(y_preds_full).to(device).flatten()

    true_res = np.sqrt(np.mean(np.square((y_true - y_preds_full).cpu().numpy())))

    assert pytest.approx(res) == true_res
コード例 #7
0
def test_compute(n_times, test_data):

    rmse = RootMeanSquaredError()

    y_pred, y, batch_size = test_data
    rmse.reset()
    if batch_size > 1:
        n_iters = y.shape[0] // batch_size + 1
        for i in range(n_iters):
            idx = i * batch_size
            rmse.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
    else:
        rmse.update((y_pred, y))

    np_y = y.numpy().ravel()
    np_y_pred = y_pred.numpy().ravel()

    np_res = np.sqrt(np.power((np_y - np_y_pred), 2.0).sum() / np_y.shape[0])
    res = rmse.compute()

    assert isinstance(res, float)
    assert pytest.approx(res) == np_res
コード例 #8
0
def RMSEMetric(key):
    """Create RMSE metric on key."""
    return DictMetric(key, RootMeanSquaredError())
コード例 #9
0
class LocalMetrics(ignite.metrics.Metric):
    METRICS = (
        'rmse',
        'pearson',
        'per_model_pearson',
    )
    FIGURES = (
        'hist',
    )

    def __init__(self, column, title=None, metrics=None, figures=None, output_transform=lambda x: x):
        self.column = column
        self.title = title if title is not None else ''
        self.metrics = set(metrics if metrics is not None else LocalMetrics.METRICS)
        self.figures = set(figures if figures is not None else LocalMetrics.FIGURES)
        self._rmse = RootMeanSquaredError()
        self._pearson = PearsonR()
        self._per_model_pearson = Mean()
        self._hist = ScoreHistogram(title=title)
        super(LocalMetrics, self).__init__(output_transform=output_transform)

    def reset(self):
        self._rmse.reset()
        self._pearson.reset()
        self._per_model_pearson.reset()
        self._hist.reset()

    def update(self, batch: DecoyBatch):
        # Skip native structures and ignore residues that don't have a ground-truth score
        non_native = np.repeat(np.char.not_equal(batch.decoy_name, 'native'),
                               repeats=batch.num_nodes_by_graph.cpu().numpy())
        has_score = torch.isfinite(batch.lddt).cpu().numpy()
        valid_scores = np.logical_and(non_native, has_score)

        # Used to uniquely identify a (protein, model) pair without using their str names
        target_model_id = batch.node_index_by_graph[valid_scores].cpu().numpy()
        node_preds = batch.node_features[valid_scores, self.column].detach().cpu().numpy()
        node_targets = batch.lddt[valid_scores].detach().cpu().numpy()

        # Streaming metrics on local scores (they expect torch tensors, not numpy arrays)
        self._rmse.update((torch.from_numpy(node_preds), torch.from_numpy(node_targets)))
        self._pearson.update((torch.from_numpy(node_preds), torch.from_numpy(node_targets)))

        # Per model metrics: pandas is the easiest way to get a groupby.
        grouped = pd.DataFrame({
            'target_model': target_model_id,
            'preds': node_preds,
            'true': node_targets
        }).groupby('target_model')

        per_model_pearsons = grouped.apply(lambda df: pearson(df['preds'], df['true']))
        self._per_model_pearson.update(torch.from_numpy(per_model_pearsons.values))

        self._hist.update(node_preds, node_targets)

    def compute(self):
        metrics = {}
        figures = {}

        if 'rmse' in self.metrics:
            metrics['rmse'] = self._rmse.compute()
        if 'pearson' in self.metrics:
            metrics['pearson'] = self._pearson.compute()
        if 'per_model_pearson' in self.metrics:
            metrics['per_model_pearson'] = self._per_model_pearson.compute()

        if 'hist' in self.figures:
            extra_title = []
            if 'pearson' in self.metrics:
                extra_title.append(f'$R$        {metrics["pearson"]:.3f}')
            if 'per_model_pearson' in self.metrics:
                extra_title.append(f'$R_\\mathrm{{model}}$ {metrics["per_model_pearson"]:.3f}')
            figures['hist'] = self._hist.compute('\n'.join(extra_title))

        return {'metrics': metrics, 'figures': figures}

    def completed(self, engine, prefix):
        result = self.compute()
        for name, metric in result['metrics'].items():
            engine.state.metrics[prefix + '/' + name] = metric
        for name, fig in result['figures'].items():
            engine.state.figures[prefix + '/' + name] = fig
コード例 #10
0
def test_zero_div():
    rmse = RootMeanSquaredError()
    with pytest.raises(NotComputableError):
        rmse.compute()
コード例 #11
0
def run(args, seed):
    config.make_paths()

    torch.random.manual_seed(seed)
    train_loader, val_loader, shape = get_data_loaders(
        config.Training.batch_size,
        proportion=config.Training.proportion,
        test_batch_size=config.Training.batch_size * 2,
    )
    n, d, t = shape
    model = models.ConvNet(d, seq_len=t)

    writer = tb.SummaryWriter(log_dir=config.TENSORBOARD)

    model.to(config.device)  # Move model before creating optimizer
    optimizer = torch.optim.Adam(model.parameters())
    criterion = nn.MSELoss()

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        criterion,
                                        device=config.device)
    trainer.logger = setup_logger("trainer")

    checkpointer = ModelCheckpoint(
        config.MODEL,
        model.__class__.__name__,
        n_saved=2,
        create_dir=True,
        save_as_state_dict=True,
    )
    trainer.add_event_handler(
        Events.EPOCH_COMPLETED(every=config.Training.save_every),
        checkpointer,
        {"model": model},
    )

    val_metrics = {
        "mse": Loss(criterion),
        "mae": MeanAbsoluteError(),
        "rmse": RootMeanSquaredError(),
    }

    evaluator = create_supervised_evaluator(model,
                                            metrics=val_metrics,
                                            device=config.device)
    evaluator.logger = setup_logger("evaluator")

    ar_evaluator = create_ar_evaluator(model,
                                       metrics=val_metrics,
                                       device=config.device)
    ar_evaluator.logger = setup_logger("ar")

    @trainer.on(Events.EPOCH_COMPLETED(every=config.Training.save_every))
    def log_ar(engine):
        ar_evaluator.run(val_loader)
        y_pred, y = ar_evaluator.state.output
        fig = plot_output(y, y_pred)
        writer.add_figure("eval/ar", fig, engine.state.epoch)
        plt.close()

    # desc = "ITERATION - loss: {:.2f}"
    # pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0))

    @trainer.on(Events.ITERATION_COMPLETED(every=config.Training.log_every))
    def log_training_loss(engine):
        # pbar.desc = desc.format(engine.state.output)
        # pbar.update(log_interval)
        if args.verbose:
            grad_norm = torch.stack(
                [p.grad.norm() for p in model.parameters()]).sum()
            writer.add_scalar("train/grad_norm", grad_norm,
                              engine.state.iteration)
        writer.add_scalar("train/loss", engine.state.output,
                          engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED(every=config.Training.eval_every))
    def log_training_results(engine):
        # pbar.refresh()
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        for k, v in metrics.items():
            writer.add_scalar(f"train/{k}", v, engine.state.epoch)
        # tqdm.write(
        #    f"Training Results - Epoch: {engine.state.epoch}  Avg mse: {evaluator.state.metrics['mse']:.2f}"
        # )

    @trainer.on(Events.EPOCH_COMPLETED(every=config.Training.eval_every))
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics

        for k, v in metrics.items():
            writer.add_scalar(f"eval/{k}", v, engine.state.epoch)
        # tqdm.write(
        #    f"Validation Results - Epoch: {engine.state.epoch}  Avg mse: {evaluator.state.metrics['mse']:.2f}"
        # )

        # pbar.n = pbar.last_print_n = 0

        y_pred, y = evaluator.state.output

        fig = plot_output(y, y_pred)
        writer.add_figure("eval/preds", fig, engine.state.epoch)
        plt.close()

    # @trainer.on(Events.EPOCH_COMPLETED | Events.COMPLETED)
    # def log_time(engine):
    #    #tqdm.write(
    #    #    f"{trainer.last_event_name.name} took {trainer.state.times[trainer.last_event_name.name]} seconds"
    #    #)
    if args.ckpt is not None:
        ckpt = torch.load(args.ckpt)
        ModelCheckpoint.load_objects({"model": model}, ckpt)

    try:
        trainer.run(train_loader, max_epochs=config.Training.max_epochs)
    except Exception as e:
        import traceback

        print(traceback.format_exc())

    # pbar.close()
    writer.close()
コード例 #12
0
ファイル: run.py プロジェクト: weilin2018/convmos
def run(
    train_batch_size: int,
    val_batch_size: int,
    epochs: int,
    lr: float,
    model_name: str,
    architecture: str,
    momentum: float,
    log_interval: int,
    log_dir: str,
    save_dir: str,
    save_step: int,
    val_step: int,
    num_workers: int,
    patience: int,
    eval_only: bool = False,
    overfit_on_few_samples: bool = False,
):
    train_loader, val_loader, test_loader = get_data_loaders(
        train_batch_size,
        val_batch_size,
        num_workers=num_workers,
        overfit_on_few_samples=overfit_on_few_samples,
    )

    models_available = {'convmos': ConvMOS}

    model = models_available[model_name](architecture=architecture)
    writer = create_summary_writer(model, train_loader, log_dir)
    device = 'cpu'

    if torch.cuda.is_available():
        device = 'cuda'

    model = model.to(device=device)

    # E-OBS only provides observational data for land so we need to use a mask to avoid fitting on the sea
    land_mask_np = np.load('remo_eobs_land_mask.npy')
    # Convert booleans to 1 and 0, and convert numpy array to torch Tensor
    land_mask = torch.from_numpy(1 * land_mask_np).to(device)
    print('Land mask:')
    print(land_mask)
    loss_fn = partial(masked_mse_loss, mask=land_mask)

    optimizer = Adam(model.parameters(), lr=lr)
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        loss_fn,
                                        device=device)

    metrics = {
        'rmse': RootMeanSquaredError(),
        'mae': MeanAbsoluteError(),
        'mse': Loss(loss_fn),
    }
    train_evaluator = create_supervised_evaluator(model,
                                                  metrics=metrics,
                                                  device=device)
    val_evaluator = create_supervised_evaluator(model,
                                                metrics=metrics,
                                                device=device)

    to_save = {'model': model, 'optimizer': optimizer, 'trainer': trainer}
    checkpoint_handler = Checkpoint(
        to_save,
        DiskSaver(save_dir, create_dir=True, require_empty=False),
        n_saved=2,
        global_step_transform=global_step_from_engine(trainer),
    )
    trainer.add_event_handler(Events.EPOCH_COMPLETED(every=save_step),
                              checkpoint_handler)
    trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())

    def score_function(engine):
        val_loss = engine.state.metrics['mse']
        return -val_loss

    best_checkpoint_handler = Checkpoint(
        to_save,
        DiskSaver(save_dir, create_dir=True, require_empty=False),
        n_saved=2,
        filename_prefix='best',
        score_function=score_function,
        score_name='val_loss',
        global_step_transform=global_step_from_engine(trainer),
    )
    val_evaluator.add_event_handler(Events.COMPLETED, best_checkpoint_handler)

    earlystop_handler = EarlyStopping(patience=patience,
                                      score_function=score_function,
                                      trainer=trainer)
    val_evaluator.add_event_handler(Events.COMPLETED, earlystop_handler)

    # Maybe load model
    checkpoint_files = glob(join(save_dir, 'checkpoint_*.pt'))
    if len(checkpoint_files) > 0:
        # latest_checkpoint_file = sorted(checkpoint_files)[-1]
        epoch_list = [
            int(c.split('.')[0].split('_')[-1]) for c in checkpoint_files
        ]
        last_epoch = sorted(epoch_list)[-1]
        latest_checkpoint_file = join(save_dir, f'checkpoint_{last_epoch}.pt')
        print('Loading last checkpoint', latest_checkpoint_file)
        last_epoch = int(latest_checkpoint_file.split('.')[0].split('_')[-1])
        if last_epoch >= epochs:
            print('Training was already completed')
            eval_only = True
            # return

        checkpoint = torch.load(latest_checkpoint_file, map_location=device)
        Checkpoint.load_objects(to_load=to_save, checkpoint=checkpoint)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1
        if iter % log_interval == 0:
            print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
                  "".format(engine.state.epoch, iter, len(train_loader),
                            engine.state.output))
            writer.add_scalar("training/loss", engine.state.output,
                              engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        train_evaluator.run(train_loader)
        metrics = train_evaluator.state.metrics
        avg_rmse = metrics['rmse']
        avg_mae = metrics['mae']
        avg_mse = metrics['mse']
        print(
            "Training Results - Epoch: {}  Avg RMSE: {:.2f} Avg loss: {:.2f} Avg MAE: {:.2f}"
            .format(engine.state.epoch, avg_rmse, avg_mse, avg_mae))
        writer.add_scalar("training/avg_loss", avg_mse, engine.state.epoch)
        writer.add_scalar("training/avg_rmse", avg_rmse, engine.state.epoch)
        writer.add_scalar("training/avg_mae", avg_mae, engine.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED(every=val_step))
    def log_validation_results(engine):
        val_evaluator.run(val_loader)
        metrics = val_evaluator.state.metrics
        avg_rmse = metrics['rmse']
        avg_mae = metrics['mae']
        avg_mse = metrics['mse']
        print(
            "Validation Results - Epoch: {}  Avg RMSE: {:.2f} Avg loss: {:.2f} Avg MAE: {:.2f}"
            .format(engine.state.epoch, avg_rmse, avg_mse, avg_mae))
        writer.add_scalar("validation/avg_loss", avg_mse, engine.state.epoch)
        writer.add_scalar("validation/avg_rmse", avg_rmse, engine.state.epoch)
        writer.add_scalar("validation/avg_mae", avg_mae, engine.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED(every=save_step))
    def log_model_weights(engine):
        for name, param in model.named_parameters():
            writer.add_histogram(f"model/weights_{name}", param,
                                 engine.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED(every=save_step))
    def regularly_predict_val_data(engine):
        predict_data(engine.state.epoch, val_loader)

    def predict_data(epoch: int, data_loader) -> xr.Dataset:
        # Predict all test data points and write the predictions
        print(f'Predicting {data_loader.dataset.mode} data...')
        data_loader_iter = iter(data_loader)
        pred_np = None
        for i in range(len(data_loader)):
            x, y = next(data_loader_iter)
            # print(x)
            pred = (model.forward(x.to(device=device)).to(
                device='cpu').detach().numpy()[:, 0, :, :])
            # print('=======================================')
            # print(pred)
            if pred_np is None:
                pred_np = pred
            else:
                pred_np = np.concatenate((pred_np, pred), axis=0)

        preds = xr.Dataset(
            {
                'pred': (['time', 'lat', 'lon'], pred_np),
                'input': (['time', 'lat', 'lon'], data_loader.dataset.X),
                'target':
                (['time', 'lat', 'lon'], data_loader.dataset.Y[:, :, :, 0]),
            },
            coords={
                'time': data_loader.dataset.
                times,  # list(range(len(val_loader.dataset))),
                'lon_var': (
                    ('lat', 'lon'),
                    data_loader.dataset.lons[0],
                ),  # list(range(x.shape[-2])),
                'lat_var': (('lat', 'lon'), data_loader.dataset.lats[0]),
            },  # list(range(x.shape[-1]))}
        )

        preds.to_netcdf(
            join(save_dir,
                 f'predictions_{data_loader.dataset.mode}_{epoch}.nc'))
        return preds

    # kick everything off
    if not eval_only:
        trainer.run(train_loader, max_epochs=epochs)

    # Load best model
    best_checkpoint = best_checkpoint_handler.last_checkpoint
    print('Loading best checkpoint from', best_checkpoint)
    checkpoint = torch.load(join(save_dir,
                                 best_checkpoint_handler.last_checkpoint),
                            map_location=device)
    Checkpoint.load_objects(to_load=to_save, checkpoint=checkpoint)

    writer.close()

    val_preds = predict_data(trainer.state.epoch, val_loader)
    test_preds = predict_data(trainer.state.epoch, test_loader)
    val_res = mean_metrics(calculate_metrics(val_preds.pred, val_preds.target))
    test_res = mean_metrics(
        calculate_metrics(test_preds.pred, test_preds.target))

    # val_evaluator.run(val_loader)
    results = {}
    # Store the config, ...
    results.update({
        section_name: dict(config[section_name])
        for section_name in config.sections()
    })
    # ... the last training metrics,
    results.update(
        {f'train_{k}': v
         for k, v in train_evaluator.state.metrics.items()})
    # ... the last validation metrics from torch,
    results.update(
        {f'val_torch_{k}': v
         for k, v in val_evaluator.state.metrics.items()})
    # ... the validation metrics that I calculate,
    results.update({f'val_{k}': v for k, v in val_res.items()})
    # ... asnd the test metrics that I calculate
    results.update({f'test_{k}': v for k, v in test_res.items()})
    write_results_file(join('results', 'results.json'),
                       pd.json_normalize(results))