예제 #1
0
def _construct_norm_arrays(file_path: str, metadata_path: str, fold: int = None, filesystem: S3FS = None) -> \
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """ Return arrays with normalisation factors to be used """
    chunk_name = os.path.basename(file_path)

    df = pd.read_csv(
        filesystem.open(metadata_path)
    ) if filesystem is not None else pd.read_csv(metadata_path)

    df = df[df.chunk == chunk_name]

    if fold is not None:
        df = df[df.fold == fold]

    perc99 = df[[
        'norm_perc99_b0', 'norm_perc99_b1', 'norm_perc99_b2', 'norm_perc99_b3'
    ]].values
    meanstd_mean = df[[
        'norm_meanstd_mean_b0', 'norm_meanstd_mean_b1', 'norm_meanstd_mean_b2',
        'norm_meanstd_mean_b3'
    ]].values
    meanstd_median = df[[
        'norm_meanstd_median_b0', 'norm_meanstd_median_b1',
        'norm_meanstd_median_b2', 'norm_meanstd_median_b3'
    ]].values
    meanstd_std = df[[
        'norm_meanstd_std_b0', 'norm_meanstd_std_b1', 'norm_meanstd_std_b2',
        'norm_meanstd_std_b3'
    ]].values

    return perc99, meanstd_mean, meanstd_median, meanstd_std
예제 #2
0
def load_dates(filesystem: S3FS, tile_name: str) -> List[datetime]:
    """ Load a json file with dates from the bucket and parse out dates
    """
    path = f'/{tile_name}/userdata.json'

    with filesystem.open(path, 'r') as fp:
        userdata = json.load(fp)

    dates_list = json.loads(userdata['dates'])

    return [parse(date) for date in dates_list]
예제 #3
0
def load_metadata(filesystem: S3FS, config: PredictionConfig) -> pd.DataFrame:
    """ Load DataFrame with info about normalisation factors """
    metadata_dir = os.path.dirname(config.metadata_path)
    if not filesystem.exists(metadata_dir):
        filesystem.makedirs(metadata_dir)

    df = pd.read_csv(filesystem.open(f'{config.metadata_path}'))

    normalisation_factors = df.groupby(
        pd.to_datetime(df.timestamp).dt.to_period("M")).max()

    normalisation_factors['month'] = pd.to_datetime(
        normalisation_factors.timestamp).dt.month

    return normalisation_factors