def __init__(self, config: Config, train_IDs, labels, normalize=True): self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.target_param = config.experiment.target_parameter self.synop_file = config.experiment.synop_file self.labels = labels self.subregion_coords = Coords(config.experiment.subregion_nlat, config.experiment.subregion_slat, config.experiment.subregion_wlon, config.experiment.subregion_elon) self.prediction_offset = config.experiment.prediction_offset self.dim = get_dim_of_GFS_slice_for_coords(self.subregion_coords) self.channels = len(self.train_parameters) self.normalization_type = config.experiment.normalization_type self.sequence_length = config.experiment.sequence_length self.list_IDs = train_IDs self.data = self.list_IDs[str(self.prediction_offset)] self.mean, self.std = [], [] self.normalize = normalize if normalize: self.normalize_data(config.experiment.normalization_type)
def get_indices_of_GFS_slice_for_coords(coords: Coords): nearest_coords_NW = get_nearest_coords( Coords(coords.nlat, coords.nlat, coords.wlon, coords.wlon)) nearest_coords_SE = get_nearest_coords( Coords(coords.slat, coords.slat, coords.elon, coords.elon)) lat_NW, lon_NW = get_nearest_lat_lon_from_coords( nearest_coords_NW, Coords(coords.nlat, coords.nlat, coords.wlon, coords.wlon)) lat_SE, lon_SE = get_nearest_lat_lon_from_coords( nearest_coords_SE, Coords(coords.slat, coords.slat, coords.elon, coords.elon)) lat_index_start = int((GFS_SPACE.nlat - lat_NW) * 4) lat_index_end = int((GFS_SPACE.nlat - lat_SE) * 4) lon_index_start = int((GFS_SPACE.elon - lon_SE) * 4) lon_index_end = int((GFS_SPACE.elon - lon_NW) * 4) return lat_index_start, lat_index_end, lon_index_start, lon_index_end
def get_next_gfs_values(dates, prediction_offset, lat: float, lon: float, gfs_params: list, future_dates): next_gfs_values = [] gfs_loader = GFSLoader() first_date = dates.values[0] coords = Coords(lat, lat, lon, lon) for date in dates: if future_dates: offset = prediction_offset + int( divmod((date - first_date).total_seconds(), 3600)[0]) else: offset = prediction_offset gfs_dates, gfs_offsets, mod_offset = get_forecast_dates_and_offsets_for_prediction_date( date, offset) val = [] for param in gfs_params: if mod_offset == 0: gfs_date_key = gfs_loader.get_date_key(gfs_dates[0]) value = gfs_loader.get_gfs_image(gfs_date_key, param, gfs_offsets[0]) if value is None: return None value = get_point_from_GFS_slice_for_coords(value, coords) val.append(value) else: # interpolate from 2 gfs forecasts gfs_date_key = gfs_loader.get_date_key(gfs_dates[0]) val1 = gfs_loader.get_gfs_image(gfs_date_key, param, gfs_offsets[0]) gfs_date_key = gfs_loader.get_date_key(gfs_dates[1]) val2 = gfs_loader.get_gfs_image(gfs_date_key, param, gfs_offsets[1]) if val1 is None or val2 is None: return None val1 = get_point_from_GFS_slice_for_coords(val1, coords) val2 = get_point_from_GFS_slice_for_coords(val2, coords) val.append(val1 * (3 - mod_offset) / 3 + val2 * mod_offset / 3) next_gfs_values.append(val) return next_gfs_values
def get_gfs_values_and_targets_for_gfs_ids(gfs_date_keys, labels, target_param, lat: float, lon: float, offset: int): targets = [] gfs_values = [] coords = Coords(lat, lat, lon, lon) gfs_loader = GFSLoader() param = target_param_to_gfs_name_level(target_param)[0] for date_key in tqdm(gfs_date_keys): date = date_from_gfs_date_key(date_key) label = labels[labels["date"] == date] if len(label) > 0: targets.append(label[target_param].to_numpy()) gfs_values.append( get_point_from_GFS_slice_for_coords( gfs_loader.get_gfs_image(date_key, param, offset), coords)) return np.array(gfs_values), np.array(targets).squeeze()
def match_gfs_with_synop_sequence(features: Union[list, np.ndarray], targets: list, lat: float, lon: float, prediction_offset: int, gfs_params: list, return_GFS=True): gfs_values = [] new_targets = [] new_features = [] gfs_loader = GFSLoader() removed_indices = [] print("Matching GFS with synop data") for index, value in tqdm(enumerate(targets)): date = value[0] gfs_date, gfs_offset = get_forecast_date_and_offset_for_prediction_date( date, prediction_offset) gfs_date_key = gfs_loader.get_date_key(gfs_date) # check if there are forecasts available if all( gfs_loader.get_gfs_image(gfs_date_key, param, gfs_offset) is not None for param in gfs_params): if return_GFS: val = [] for param in gfs_params: val.append( get_point_from_GFS_slice_for_coords( gfs_loader.get_gfs_image(gfs_date_key, param, gfs_offset), Coords(lat, lat, lon, lon))) gfs_values.append(val) new_targets.append(value[1]) new_features.append(features[index]) else: removed_indices.append(index) if return_GFS: return np.array(new_features), np.array(gfs_values), np.array( new_targets), removed_indices return np.array(new_features), np.array(new_targets), removed_indices
def process_netCDF_files_to_npy(output_dir: str): for param in GFS_PARAMETERS: logger.info(f"Converting parameter {param['name']} {param['level']}") process_to_numpy_array(param, Coords(56, 48, 13, 26), output_dir)
import matplotlib.pyplot as plt import numpy as np from gfs_archive_0_25.gfs_processor.Coords import Coords GFS_SPACE = Coords(56, 48, 13, 26) GFS_PARAMETERS = [ { "name": "V GRD", "level": "ISBL_1000" }, { "name": "V GRD", "level": "ISBL_975" }, { "name": "V GRD", "level": "ISBL_950" }, { "name": "V GRD", "level": "ISBL_900" }, { "name": "V GRD", "level": "ISBL_850" }, { "name": "V GRD", "level": "ISBL_800"