コード例 #1
0
def get_mean_and_std_cmax(list_IDs: [str],
                          dim: (int, int),
                          sequence_length: int,
                          future_sequence_length: int = 0,
                          prediction_offset: int = 0):
    # Bear in mind that date_keys are indices of FIRST frame in the sequence. Not all frames exist in date_keys because of that fact.
    log.info("Calculating std and mean for the CMAX dataset")
    all_ids = set([
        item for sublist in [[
            get_cmax_datekey_from_offset(id, offset) for offset in range(
                0, sequence_length + future_sequence_length +
                prediction_offset)
        ] for id in list_IDs] for item in sublist
    ])
    mean, sqr_mean = 0, 0
    denom = len(all_ids) * dim[0] * dim[1] / 4
    cmax_loader = CMAXLoader()
    for id in tqdm(all_ids):
        values = cmax_loader.get_cmax_image(id)
        mean += np.sum(values) / denom
        sqr_mean += np.sum(np.power(values, 2)) / denom

    std = math.sqrt(sqr_mean - pow(mean, 2))

    return cmax_loader.get_all_loaded_cmax_images(), mean, std
コード例 #2
0
ファイル: gfs_util.py プロジェクト: adambelniak/WindForecast
def initialize_mean_and_std_for_sequence(date_keys: dict,
                                         train_parameters,
                                         dim: (int, int),
                                         sequence_length: int,
                                         prediction_offset: int,
                                         subregion_coords: Coords = None):
    log.info("Calculating std and mean for a dataset")
    means = []
    stds = []
    for param in tqdm(train_parameters):
        sum, sqr_sum = 0, 0
        for id in tqdm(date_keys[prediction_offset]):
            values = np.squeeze(
                get_GFS_values_for_sequence(id, param, sequence_length,
                                            prediction_offset,
                                            subregion_coords))
            sum += np.sum(values)
            sqr_sum += np.sum(np.power(values, 2))

        mean = sum / (len(date_keys) * sequence_length * dim[0] * dim[1])
        means.append(mean)
        stds.append(
            math.sqrt(sqr_sum /
                      (len(date_keys) * sequence_length * dim[0] * dim[1]) -
                      pow(mean, 2)))

    return means, stds
コード例 #3
0
ファイル: gfs_util.py プロジェクト: adambelniak/WindForecast
def initialize_mean_and_std(date_keys,
                            train_parameters,
                            dim: (int, int),
                            prediction_offset: int,
                            subregion_coords=None):
    log.info("Calculating std and mean for a dataset")
    means = []
    stds = []
    gfs_loader = GFSLoader()
    for param in tqdm(train_parameters):
        sum, sqr_sum = 0, 0
        for date_key in tqdm(date_keys):
            values = gfs_loader.get_gfs_image(date_key, param,
                                              prediction_offset)
            if subregion_coords is not None:
                values = get_subregion_from_GFS_slice_for_coords(
                    values, subregion_coords)
            sum += np.sum(values)
            sqr_sum += np.sum(np.power(values, 2))

        mean = sum / (len(date_keys) * dim[0] * dim[1])
        means.append(mean)
        stds.append(
            math.sqrt(sqr_sum / (len(date_keys) * dim[0] * dim[1]) -
                      pow(mean, 2)))

    return means, stds
コード例 #4
0
def get_min_max_cmax(list_IDs: [str],
                     sequence_length: int,
                     future_sequence_length: int = 0,
                     prediction_offset: int = 0):
    all_ids = set([
        item for sublist in [[
            get_cmax_datekey_from_offset(id, offset) for offset in range(
                0, sequence_length + future_sequence_length +
                prediction_offset)
        ] for id in list_IDs] for item in sublist
    ])
    cmax_loader = CMAXLoader()
    log.info("Loading CMAX files into the runtime.")
    for id in tqdm(all_ids):
        values = cmax_loader.get_cmax_image(id)

    return cmax_loader.get_all_loaded_cmax_images(
    ), CMAX_MIN, CMAX_MAX  # We know max and min upfront, let's not waste time :)
コード例 #5
0
ファイル: gfs_util.py プロジェクト: adambelniak/WindForecast
def initialize_min_max_for_sequence(list_IDs: [str],
                                    train_parameters,
                                    sequence_length: int,
                                    prediction_offset: int,
                                    subregion_coords=None):
    log.info("Calculating min and max for the GFS dataset")
    mins = []
    maxes = []
    for param in tqdm(train_parameters):
        min, max = sys.float_info.max, sys.float_info.min
        for id in list_IDs:
            values = np.squeeze(
                get_GFS_values_for_sequence(id, param, sequence_length,
                                            prediction_offset,
                                            subregion_coords))
            min = min(values, min)
            max = max(values, max)

        mins.append(min)
        maxes.append(max)

    return mins, maxes
コード例 #6
0
ファイル: gfs_util.py プロジェクト: adambelniak/WindForecast
def initialize_min_max(date_keys: [str],
                       train_parameters,
                       prediction_offset: int,
                       subregion_coords=None):
    log.info("Calculating min and max for a dataset")
    mins = []
    maxes = []
    gfs_loader = GFSLoader()
    for param in tqdm(train_parameters):
        min, max = sys.float_info.max, sys.float_info.min
        for date_key in date_keys:
            values = gfs_loader.get_gfs_image(date_key, param,
                                              prediction_offset)
            if subregion_coords is not None:
                values = get_subregion_from_GFS_slice_for_coords(
                    values, subregion_coords)
            min = min(np.min(values), min)
            max = max(np.max(values), max)

        mins.append(min)
        maxes.append(max)

    return mins, maxes
コード例 #7
0
def main(cfg: Config):

    cfg.experiment.train_parameters_config_file = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), 'config',
        'train_parameters', cfg.experiment.train_parameters_config_file)

    log.info(
        f'\\[init] Loaded config:\n{OmegaConf.to_yaml(cfg, resolve=True)}')

    pl.seed_everything(cfg.experiment.seed)

    RUN_NAME = os.getenv('RUN_NAME')
    log.info(f'[bold yellow]\\[init] Run name --> {RUN_NAME}')

    run: Run = wandb_logger.experiment  # type: ignore

    # Setup logging & checkpointing
    tags = get_tags(cast(DictConfig, cfg))
    run.tags = tags
    run.notes = str(cfg.notes)
    wandb_logger.log_hyperparams(OmegaConf.to_container(
        cfg, resolve=True))  # type: ignore
    log.info(
        f'[bold yellow][{RUN_NAME} / {run.id}]: [bold white]{",".join(tags)}')

    setproctitle.setproctitle(
        f'{RUN_NAME} ({os.getenv("WANDB_PROJECT")})')  # type: ignore

    log.info(
        f'[bold white]Overriding cfg.lightning settings with derived values:')
    log.info(
        f' >>> num_sanity_val_steps = {-1 if cfg.experiment.validate_before_training else 0}\n'
    )

    # Create main system (system = models + training regime)
    system: LightningModule = instantiate(cfg.experiment.system, cfg)
    log.info(f'[bold yellow]\\[init] System architecture:')
    log.info(system)
    # Prepare data using datamodules
    datamodule: LightningDataModule = instantiate(cfg.experiment.datamodule,
                                                  cfg)

    resume_path = get_resume_checkpoint(cfg, wandb_logger)
    if resume_path is not None:
        log.info(f'[bold yellow]\\[checkpoint] [bold white]{resume_path}')

    checkpointer = CustomCheckpointer(
        period=1,
        dirpath='checkpoints',
        filename='{epoch}',
    )

    trainer: pl.Trainer = instantiate(
        cfg.lightning,
        logger=wandb_logger,
        max_epochs=cfg.experiment.epochs,
        callbacks=[checkpointer],
        resume_from_checkpoint=resume_path,
        checkpoint_callback=True if cfg.experiment.save_checkpoints else False,
        num_sanity_val_steps=-1
        if cfg.experiment.validate_before_training else 0)

    trainer.fit(system, datamodule=datamodule)
    trainer.test(system, datamodule=datamodule)

    wandb_logger.log_metrics(
        {
            'target_mean': datamodule.dataset_test.mean,
            'target_std': datamodule.dataset_test.std
        },
        step=system.current_epoch)

    mean = datamodule.dataset_test.mean
    std = datamodule.dataset_test.std

    plot_results(system, cfg, mean, std)

    if trainer.interrupted:  # type: ignore
        log.info(f'[bold red]>>> Training interrupted.')
        run.finish(exit_code=255)