def _construct_norm_arrays(file_path: str, metadata_path: str, fold: int = None, filesystem: S3FS = None) -> \ Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Return arrays with normalisation factors to be used """ chunk_name = os.path.basename(file_path) df = pd.read_csv( filesystem.open(metadata_path) ) if filesystem is not None else pd.read_csv(metadata_path) df = df[df.chunk == chunk_name] if fold is not None: df = df[df.fold == fold] perc99 = df[[ 'norm_perc99_b0', 'norm_perc99_b1', 'norm_perc99_b2', 'norm_perc99_b3' ]].values meanstd_mean = df[[ 'norm_meanstd_mean_b0', 'norm_meanstd_mean_b1', 'norm_meanstd_mean_b2', 'norm_meanstd_mean_b3' ]].values meanstd_median = df[[ 'norm_meanstd_median_b0', 'norm_meanstd_median_b1', 'norm_meanstd_median_b2', 'norm_meanstd_median_b3' ]].values meanstd_std = df[[ 'norm_meanstd_std_b0', 'norm_meanstd_std_b1', 'norm_meanstd_std_b2', 'norm_meanstd_std_b3' ]].values return perc99, meanstd_mean, meanstd_median, meanstd_std
def load_dates(filesystem: S3FS, tile_name: str) -> List[datetime]: """ Load a json file with dates from the bucket and parse out dates """ path = f'/{tile_name}/userdata.json' with filesystem.open(path, 'r') as fp: userdata = json.load(fp) dates_list = json.loads(userdata['dates']) return [parse(date) for date in dates_list]
def load_metadata(filesystem: S3FS, config: PredictionConfig) -> pd.DataFrame: """ Load DataFrame with info about normalisation factors """ metadata_dir = os.path.dirname(config.metadata_path) if not filesystem.exists(metadata_dir): filesystem.makedirs(metadata_dir) df = pd.read_csv(filesystem.open(f'{config.metadata_path}')) normalisation_factors = df.groupby( pd.to_datetime(df.timestamp).dt.to_period("M")).max() normalisation_factors['month'] = pd.to_datetime( normalisation_factors.timestamp).dt.month return normalisation_factors