def get_result(response: requests.Response) -> dict: response.raise_for_status() try: obj = response.json() except: raise UserError('RDA error: ' + response.text) try: if obj['status'] == 'error': raise UserError('RDA error: ' + ' '.join(obj['messages'])) except KeyError: raise UserError('RDA error: ' + response.text) return obj['result']
def load(path: str): project_json_path = os.path.join(path, PROJECT_FILENAME) if not os.path.exists(project_json_path): raise UserError(f'{project_json_path} not found') with open(project_json_path) as fp: data = json.load(fp, object_hook=ProjectJSONDecoder) assert data['version'] > 0 if data['version'] < PROJECT_JSON_VERSION: Project.upgrade(data) elif data['version'] > PROJECT_JSON_VERSION: raise UserError('Plugin too old to read project file of version {}'.format(data['version'])) return Project(data, path)
def find_mpiexec() -> str: plat = platform.system() paths = [] if plat == 'Windows': help_option = [] if 'MSMPI_BIN' in os.environ: paths.append(os.path.join(os.environ['MSMPI_BIN'], 'mpiexec.exe')) elif plat in ['Darwin', 'Linux']: help_option = ['-h'] paths.append('mpiexec') # Sometimes /usr/local/bin is not in PATH. paths.append('/usr/local/bin/mpiexec') else: raise UnsupportedError(f'Platform "{plat}" is not supported') mpiexec_path = None for path in paths: try: subprocess.check_output([path] + help_option, startupinfo=STARTUPINFO) except FileNotFoundError: pass else: mpiexec_path = path break if mpiexec_path is None: raise UserError('MPI not found') return mpiexec_path
def convert_wps_nml_to_project(nml: dict, existing_project: Project) -> Project: data = existing_project.data.copy() try: data['domains'] = convert_nml_to_project_domains(nml) except KeyError as e: raise UserError(f'Invalid namelist, section/variable {e} not found') project = Project(data, existing_project.path) return project
def read_namelist(path: Union[str, StringIO], schema_name: Optional[str] = None) -> dict: if isinstance(path, str) and not os.path.exists(path): raise UserError(f'Namelist file {path} does not exist') try: nml = f90nml.read(path) except: # f90nml does not raise useful exceptions, so we can't include details here raise UserError(f'Namelist file {path} could not be parsed') # If a schema is specified, use it to fix single-element lists which are parsed as # primitive value since there is nothing to distinguish them from each other in the namelist format. if schema_name: schema = get_namelist_schema(schema_name) for group_name, group in nml.items(): schema_group = schema[group_name] for var_name, var_val in group.items(): schema_var = schema_group[var_name] schema_type = SCHEMA_VAR_TYPES[schema_var['type']] if schema_type is list and not isinstance(var_val, list): group[var_name] = [var_val] return nml
def rda_submit_request(request_data: dict, auth: tuple) -> str: headers = {'Content-type': 'application/json'} # Note that requests_retry_session() is not used here since any error may be due # to invalid input and the user should be alerted immediately. response = requests.post(f'{API_BASE_URL}/request', auth=auth, headers=headers, json=request_data) result = get_result(response) try: request_id = result['request_id'] except: raise UserError('RDA error: ' + json.dumps(result)) return request_id
def rda_submit_request(request_data: dict, auth: tuple) -> str: headers = {'Content-type': 'application/json'} # Note that requests_retry_session() is not used here since any error may be due # to invalid input and the user should be alerted immediately. response = requests.post('https://rda.ucar.edu/apps/request', auth=auth, headers=headers, json=request_data) response.raise_for_status() try: response_fmt = [x.split(':') for x in response.text.splitlines()] request_id = [ x[1].strip() for x in response_fmt if x[0].strip() == 'Index' ][0] except: raise UserError('RDA error: ' + response.text.strip()) return request_id
def read_wps_binary_index_file(folder: str) -> WPSBinaryIndexMetadata: index_path = os.path.join(folder, 'index') if not os.path.exists(index_path): raise UserError(f'{index_path} is missing, this is not a valid WPS Binary dataset') with open(index_path) as f: index = '\n'.join(line.strip() for line in f.readlines()) parser = ConfigParser() parser.read_string('[root]\n' + index) meta = parser['root'] def clean_str(s: Optional[str]) -> Optional[str]: if s is None: return else: return s.strip('"') m = WPSBinaryIndexMetadata() # encoding m.little_endian = meta.get('endian') == 'little' m.signed = meta.get('signed') == 'yes' m.top_bottom = meta.get('row_order') == 'top_bottom' m.word_size = int(meta['wordsize']) m.scale_factor = float(meta.get('scale_factor', '1')) m.missing_value = float(meta['missing_value']) if 'missing_value' in meta else None # tile dimensions m.tile_x = int(meta['tile_x']) m.tile_y = int(meta['tile_y']) if 'tile_z_start' in meta: m.tile_z_start = int(meta['tile_z_start']) m.tile_z_end = int(meta['tile_z_end']) else: m.tile_z_start = 1 m.tile_z_end = int(meta['tile_z']) m.tile_bdr = int(meta.get('tile_bdr', '0')) # projection / geographic coordinate system m.proj_id = meta['projection'] m.stdlon = float(meta['stdlon']) if 'stdlon' in meta else None m.truelat1 = float(meta['truelat1']) if 'truelat1' in meta else None m.truelat2 = float(meta['truelat2']) if 'truelat2' in meta else None # grid georeferencing m.dx = float(meta['dx']) m.dy = float(meta['dy']) m.known_lonlat = LonLat(lon=float(meta['known_lon']), lat=float(meta['known_lat'])) known_x_idx = float(meta.get('known_x', '1')) known_y_idx = float(meta.get('known_y', '1')) m.known_idx = Coordinate2D(known_x_idx, known_y_idx) # categories m.categorical = meta['type'] == 'categorical' m.category_min = int(meta['category_min']) if 'category_min' in meta else None m.category_max = int(meta['category_max']) if 'category_max' in meta else None # landuse categories m.landuse_scheme = clean_str(meta.get('mminlu')) for field in LANDUSE_FIELDS: setattr(m, field, int(meta[field]) if field in meta else None) # other m.filename_digits = int(meta.get('filename_digits', '5')) m.units = clean_str(meta.get('units')) m.description = clean_str(meta.get('description')) m.validate() return m
def convert_wps_binary_to_vrt_dataset( folder: str, use_vsi: bool = False) -> Tuple[str, str, str, Callable[[], None]]: """Converts a WPS Binary format dataset into a mosaic VRT dataset referencing per-tile VRT datasets.""" m = read_wps_binary_index_file(folder) if m.proj_id == 'regular_ll' and m.stdlon is not None: raise UnsupportedError('Rotated pole system is not supported') # scan folder for available tiles tile_filename_re = re.compile('^({d})-({d})\.({d})-({d})$'.format( d='\d{' + str(m.filename_digits) + '}')) tiles = [] for filename in os.listdir(folder): match = tile_filename_re.match(filename) if match: tiles.append({ 'filename': filename, 'path': os.path.join(folder, filename), 'start_x': int(match.group(1)), 'end_x': int(match.group(2)), 'start_y': int(match.group(3)), 'end_y': int(match.group(4)) }) if not tiles: raise UserError(f'No tiles found in {folder}') # determine raster dimensions xsize = max(tile['end_x'] for tile in tiles) # type: int ysize = max(tile['end_y'] for tile in tiles) # type: int zsize = m.tile_z_end - m.tile_z_start + 1 # convert to GDAL metadata dtype_mapping = { (1, False): gdal.GDT_Byte, # GDAL only supports unsigned byte (2, False): gdal.GDT_UInt16, (2, True): gdal.GDT_Int16, (3, False): gdal.GDT_UInt32, (3, True): gdal.GDT_Int32 } try: dtype = dtype_mapping[(m.word_size, m.signed)] except KeyError: raise UnsupportedError( 'word_size={} signed={} is not supported'.format( m.word_size, m.signed)) if m.proj_id == 'regular_ll': crs = CRS.create_lonlat() elif m.proj_id == 'lambert': # The map distortion of a Lambert Conformal projection is fully # defined by the two true latitudes. # # However, the longitude of origin is important for WRF as well, # since we only deal with upright rectangles (the domains) on the map. # For that reason, WRF allows the user to define the "standard longitude" # which is the longitude of origin. # # The latitude of origin on the other hand does not have any significance # here and cannot be specified by the user. The geo transform for a given # grid is computed based on any arbitrary latitude of origin (see below). # In QGIS, the only difference are the displayed projected y coordinates, # but the actual grid georeferencing is unaffected. # This is possible as WRF's georeferencing metadata is based on geographical # reference coordinates for a grid cell, not projected coordinates. arbitrary_latitude_origin = (m.truelat1 + m.truelat2) / 2 origin = LonLat(lon=m.stdlon, lat=arbitrary_latitude_origin) crs = CRS.create_lambert(m.truelat1, m.truelat2, origin) elif m.proj_id == 'mercator': # The map distortion of a Mercator projection is fully # defined by the true latitude. # The longitude of origin does not have any significance and # any arbitrary value is handled when computing the geo transform # for a given grid (see below). See also the comment above for Lambert. arbitrary_longitude_origin = 0 crs = CRS.create_mercator(m.truelat1, arbitrary_longitude_origin) elif m.proj_id == 'albers_nad83': # See the comment above for Lambert. The same applies here. arbitrary_latitude_origin = (m.truelat1 + m.truelat2) / 2 origin = LonLat(lon=m.stdlon, lat=arbitrary_latitude_origin) crs = CRS.create_albers_nad83(m.truelat1, m.truelat2, origin) # FIXME handle polar vs polar_wgs84 differently elif m.proj_id == 'polar': # See the comment above for Lambert. The same applies here. crs = CRS.create_polar(m.truelat1, m.stdlon) elif m.proj_id == 'polar_wgs84': # See the comment above for Lambert. The same applies here. crs = CRS.create_polar(m.truelat1, m.stdlon) else: raise UnsupportedError(f'Projection {m.proj_id} is not supported') known_x_idx_gdal = m.known_idx.x - 0.5 if m.top_bottom: known_y_idx_gdal = ysize - m.known_idx.y - 0.5 dy_gdal = -m.dy else: known_y_idx_gdal = m.known_idx.y - 0.5 dy_gdal = m.dy known_xy = crs.to_xy(m.known_lonlat) upper_left_x = known_xy.x - known_x_idx_gdal * m.dx upper_left_y = known_xy.y + known_y_idx_gdal * m.dy geo_transform = (upper_left_x, m.dx, 0, upper_left_y, 0, dy_gdal) # print('known_x_idx_gdal: {}'.format(known_x_idx_gdal)) # print('known_y_idx_gdal: {}'.format(known_y_idx_gdal)) # print('known_xy: {}'.format(m.known_xy)) # print('upper_left_x: {}'.format(upper_left_x)) # print('upper_left_y: {}'.format(upper_left_y)) # VRTRawRasterBand metadata line_width = m.word_size * (m.tile_x + m.tile_bdr * 2 ) # x size incl. border tile_size = line_width * (m.tile_y + m.tile_bdr * 2 ) # tile size incl. border line_offset = line_width image_offset = m.tile_bdr * line_width + m.tile_bdr * m.word_size pixel_offset = m.word_size byte_order = 'LSB' if m.little_endian else 'MSB' # create tile VRTs if use_vsi: out_dir = get_temp_vsi_path(ext='') else: out_dir = get_temp_dir() driver = gdal.GetDriverByName('VRT') # type: gdal.Driver tile_vrt_paths = {} for tile in tiles: vsi_path = '{}/{}.vrt'.format(out_dir, tile['filename']) vrt = driver.Create(vsi_path, m.tile_x, m.tile_y, 0) # type: gdal.Dataset for z in range(m.tile_z_start - 1, m.tile_z_end): options = [ 'subClass=VRTRawRasterBand', 'SourceFilename={}'.format(tile['path']), 'relativeToVRT=0', 'ImageOffset={}'.format(z * tile_size + image_offset), 'PixelOffset={}'.format(pixel_offset), 'LineOffset={}'.format(line_offset), 'ByteOrder=' + byte_order ] vrt.AddBand(dtype, options) vrt.FlushCache() tile_vrt_paths[tile['filename']] = vsi_path # create mosaic VRT mosaic_vrt_path = '{}/mosaic.vrt'.format(out_dir) vrt = driver.Create(mosaic_vrt_path, xsize, ysize, zsize, dtype) # type: gdal.Dataset vrt.SetProjection(crs.proj4) vrt.SetGeoTransform(geo_transform) if m.categorical: color_table, cat_names = get_gdal_categories(m.categories, m.category_min, m.category_max) for band_idx in range(1, zsize + 1): band = vrt.GetRasterBand(band_idx) # type: gdal.Band if m.missing_value is not None: band.SetNoDataValue(m.missing_value) band.SetScale(m.scale_factor) if m.categorical: band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex) band.SetRasterColorTable(color_table) band.SetRasterCategoryNames(cat_names) sources = {} for idx, tile in enumerate(tiles): tile_vrt_path = tile_vrt_paths[tile['filename']] if m.top_bottom: end_y = ysize - tile['start_y'] - 1 start_y = end_y - m.tile_y + 1 else: start_y = tile['start_y'] - 1 sources['source_{}'.format(idx)] = (''' <SimpleSource> <SourceFilename relativeToVRT="0">{path}</SourceFilename> <SourceBand>{band}</SourceBand> <SrcRect xOff="0" yOff="0" xSize="{tile_x}" ySize="{tile_y}" /> <DstRect xOff="{offset_x}" yOff="{offset_y}" xSize="{tile_x}" ySize="{tile_y}" /> </SimpleSource>''').format(path=tile_vrt_path, band=band_idx, tile_x=m.tile_x, tile_y=m.tile_y, offset_x=tile['start_x'] - 1, offset_y=start_y) band.SetMetadata(sources, 'vrt_sources') vrt.FlushCache() vrt_paths = [mosaic_vrt_path] + list(tile_vrt_paths.values()) if use_vsi: dispose = partial(remove_vsis, vrt_paths) else: dispose = partial(remove_dir, out_dir) short_name = os.path.basename(folder) title = short_name if m.units and m.units != 'category': title += ' in ' + m.units if m.description: title += ' (' + m.description + ')' # The title is returned as VRT does not support dataset descriptions. return mosaic_vrt_path, title, short_name, dispose
def fill_domains(self): ''' Updated computed fields in each domain object like cell size. ''' domains = self.data.get('domains') if domains is None: raise UserError('Domains are not configured yet') innermost_domain = domains[0] outermost_domain = domains[-1] innermost_domain['padding_left'] = 0 innermost_domain['padding_right'] = 0 innermost_domain['padding_bottom'] = 0 innermost_domain['padding_top'] = 0 outermost_domain['parent_start'] = [1, 1] # compute and adjust domain sizes for idx, domain in enumerate(domains): if idx == 0: continue child_domain = domains[idx - 1] # We need to make sure that the number of columns in the child domain is an integer multiple # of the nest's parent domain. As we calculate the inner most domain before calculating the outermost one, # we need to amend the value for the number of columns or rows for the inner most domain in the case the # dividend obtained by dividing the number of inner domain's columns by the user's inner-to-outer resolution ratio # in the case where is not an integer value. child_domain_size_padded = ( child_domain['domain_size'][0] + child_domain['padding_left'] + child_domain['padding_right'], child_domain['domain_size'][1] + child_domain['padding_bottom'] + child_domain['padding_top'], ) if (child_domain_size_padded[0] % domain['parent_cell_size_ratio']) != 0: new_cols = int( ceil(child_domain_size_padded[0] / domain['parent_cell_size_ratio'])) new_child_domain_padded_x = new_cols * domain[ 'parent_cell_size_ratio'] else: new_child_domain_padded_x = child_domain_size_padded[0] if (child_domain_size_padded[1] % domain['parent_cell_size_ratio']) != 0: new_rows = int( ceil(child_domain_size_padded[1] / domain['parent_cell_size_ratio'])) new_child_domain_padded_y = new_rows * domain[ 'parent_cell_size_ratio'] else: new_child_domain_padded_y = child_domain_size_padded[1] if idx == 1: child_domain['domain_size'] = [ new_child_domain_padded_x, new_child_domain_padded_y ] else: child_domain[ 'padding_right'] += new_child_domain_padded_x - child_domain_size_padded[ 0] child_domain[ 'padding_top'] += new_child_domain_padded_y - child_domain_size_padded[ 1] assert new_child_domain_padded_x % domain[ 'parent_cell_size_ratio'] == 0 assert new_child_domain_padded_y % domain[ 'parent_cell_size_ratio'] == 0 domain['domain_size'] = [ new_child_domain_padded_x // domain['parent_cell_size_ratio'], new_child_domain_padded_y // domain['parent_cell_size_ratio'] ] # compute bounding boxes, cell sizes, center lonlat, parent start for idx, domain in enumerate(domains): size_x, size_y = domain['domain_size'] padded_size_x = size_x + domain['padding_left'] + domain[ 'padding_right'] padded_size_y = size_y + domain['padding_bottom'] + domain[ 'padding_top'] domain['domain_size_padded'] = [padded_size_x, padded_size_y] if idx == 0: center_lon, center_lat = domain['center_lonlat'] center_xy = self.projection.to_xy( LonLat(lon=center_lon, lat=center_lat)) domain['bbox'] = get_bbox_from_grid_spec( center_xy.x, center_xy.y, domain['cell_size'], size_x, size_y) else: child_domain = domains[idx - 1] domain['cell_size'] = [ child_domain['cell_size'][0] * domain['parent_cell_size_ratio'], child_domain['cell_size'][1] * domain['parent_cell_size_ratio'] ] child_center_x, child_center_y = get_bbox_center( child_domain['bbox']) domain['bbox'] = get_parent_bbox_from_child_grid_spec( child_center_x=child_center_x, child_center_y=child_center_y, child_cell_size=child_domain['cell_size'], child_cols=child_domain['domain_size'][0] + child_domain['padding_left'] + child_domain['padding_right'], child_rows=child_domain['domain_size'][1] + child_domain['padding_top'] + child_domain['padding_bottom'], child_parent_res_ratio=domain['parent_cell_size_ratio'], parent_left_padding=domain['padding_left'], parent_right_padding=domain['padding_right'], parent_bottom_padding=domain['padding_bottom'], parent_top_padding=domain['padding_top']) center_x, center_y = get_bbox_center(domain['bbox']) center_lonlat = self.projection.to_lonlat( Coordinate2D(x=center_x, y=center_y)) domain['center_lonlat'] = [ center_lonlat.lon, center_lonlat.lat ] if idx < len(domains) - 1: parent_domain = domains[idx + 1] domain['parent_start'] = [ parent_domain['padding_left'] + 1, parent_domain['padding_bottom'] + 1 ]
def convert_nml_to_project_domains(nml: dict) -> List[dict]: max_dom = nml['share']['max_dom'] # type: int nml = nml['geogrid'] map_proj = nml['map_proj'] # type: str parent_id = nml['parent_id'] # type: List[int] parent_grid_ratio = nml['parent_grid_ratio'] # type: List[int] i_parent_start = nml['i_parent_start'] # type: List[int] j_parent_start = nml['j_parent_start'] # type: List[int] e_we = nml['e_we'] # type: List[int] e_sn = nml['e_sn'] # type: List[int] dx = [nml['dx']] # type: List[float] dy = [nml['dy']] # type: List[float] ref_lon = nml['ref_lon'] # type: float ref_lat = nml['ref_lat'] # type: float truelat1 = nml.get('truelat1') # type: float truelat2 = nml.get('truelat2') # type: float stand_lon = nml.get('stand_lon', 0.0) # type: float # Check that there are no domains with 2 nests on the same level if parent_id != [1] + list(range(1, max_dom)): raise UserError('Due to the way domains are represented in GIS4WRF ' 'each parent domain can have only one nested domain') # Check whether ref_x/ref_y is omitted, so that we can assume ref == center. if 'ref_x' in nml or 'ref_y' in nml: raise UnsupportedError('ref_x/ref_y is not supported in namelist') # Create CRS object from projection metadata. # See wps_binary_to_gdal.py for further explanations regarding latitude # and longitude of origin. if map_proj == 'lat-lon': if stand_lon != 0.0: raise UnsupportedError( 'Rotated lat-lon projection is not supported') crs = CRS.create_lonlat() elif map_proj == 'lambert': arbitrary_latitude_origin = (truelat1 + truelat2) / 2 origin = LonLat(lon=stand_lon, lat=arbitrary_latitude_origin) crs = CRS.create_lambert(truelat1, truelat2, origin) elif map_proj == 'mercator': arbitrary_longitude_origin = 0 crs = CRS.create_mercator(truelat1, arbitrary_longitude_origin) elif map_proj == 'polar': crs = CRS.create_polar(truelat1, stand_lon) else: raise UnsupportedError(f'Map projection "{map_proj}" is not supported') ref_xy = crs.to_xy(LonLat(lon=ref_lon, lat=ref_lat)) ref_x = [ref_xy.x] # type: List[float] ref_y = [ref_xy.y] # type: List[float] min_x = [] # type: List[float] min_y = [] # type: List[float] padding_left = [] # type: List[int] padding_bottom = [] # type: List[int] padding_right = [] # type: List[int] padding_top = [] # type: List[int] cols = [i - 1 for i in e_we] rows = [i - 1 for i in e_sn] for idx in range(max_dom - 1): # Calculate horizontal grid spacing for inner domain dx.append(dx[idx] / parent_grid_ratio[idx + 1]) dy.append(dy[idx] / parent_grid_ratio[idx + 1]) if idx == 0: # Calculate min coordinates for outermost domain min_x.append(ref_x[idx] - (dx[idx] * (cols[idx] / 2))) min_y.append(ref_y[idx] - (dy[idx] * (rows[idx] / 2))) # Calculate min coordinates for outer domain min_x.append(min_x[idx] + (dx[idx] * (i_parent_start[idx + 1] - 1))) min_y.append(min_y[idx] + (dy[idx] * (j_parent_start[idx + 1] - 1))) # Calculate center coordinates for inner domain ref_x.append(min_x[idx + 1] + (dx[idx + 1] * (cols[idx + 1] / 2))) ref_y.append(min_y[idx + 1] + (dy[idx + 1] * (rows[idx + 1] / 2))) padding_left.append(i_parent_start[idx + 1] - 1) padding_bottom.append(j_parent_start[idx + 1] - 1) padding_right.append(cols[idx] - padding_left[idx] - cols[idx + 1] // parent_grid_ratio[idx + 1]) padding_top.append(rows[idx] - padding_bottom[idx] - rows[idx + 1] // parent_grid_ratio[idx + 1]) ref_lonlat = crs.to_lonlat(Coordinate2D(x=ref_x[-1], y=ref_y[-1])) first_domain = { 'map_proj': map_proj, 'cell_size': [dx[-1], dy[-1]], 'center_lonlat': [ref_lonlat.lon, ref_lonlat.lat], 'domain_size': [cols[-1], rows[-1]], 'stand_lon': stand_lon, } if truelat1 is not None: first_domain['truelat1'] = truelat1 if truelat2 is not None: first_domain['truelat2'] = truelat2 if stand_lon is not None: first_domain['stand_lon'] = stand_lon domains = [first_domain] for i in range(max_dom - 1): domains.append({ 'parent_cell_size_ratio': parent_grid_ratio[::-1][:-1][i], "padding_left": padding_left[::-1][i], "padding_right": padding_right[::-1][i], "padding_bottom": padding_bottom[::-1][i], "padding_top": padding_top[::-1][i] }) return domains
def convert_to_wps_binary(input_path: str, output_folder: str, is_categorical: bool, units: Optional[str]=None, description: Optional[str]=None, strict_datum: bool=True) -> GeogridBinaryDataset: ''' Losslessly convert common geo formats to WPS binary format. If the given input file has a CRS or data type unsupported by WRF then an error is raised. :param input_path: Path to GDAL-supported raster file. :param output_folder: Path to output folder, will be created if not existing :param is_categorical: Whether the data is categorical, otherwise continuous :param units: units for continuous data :param description: single-line dataset description :param strict_datum: if True, fail if the input datum is not supported by WRF, otherwise ignore mismatch ''' os.makedirs(output_folder, exist_ok=True) if os.listdir(output_folder): raise ValueError('Output folder must be empty') # FIXME if there is no nodata value, ask the user if it really has no nodata or ask for the value src_ds = gdal.Open(input_path) # type: gdal.Dataset xsize, ysize = src_ds.RasterXSize, src_ds.RasterYSize if xsize > MAX_SIZE or ysize > MAX_SIZE: raise UserError(f'Dataset has more than {MAX_SIZE} rows or columns: {ysize} x {xsize}, consider downsampling') filename_digits = 6 if xsize > 99999 or ysize > 99999 else 5 if src_ds.GetLayerCount() > 1: raise UnsupportedError('Dataset has more than one layer which is unsupported') band = src_ds.GetRasterBand(1) # type: gdal.Band src_no_data_value = band.GetNoDataValue() has_no_data_value = src_no_data_value is not None tilesize_x = find_tile_size(xsize, try_hard=not has_no_data_value) tilesize_y = find_tile_size(ysize, try_hard=not has_no_data_value) is_perfect_tiling = xsize % tilesize_x == 0 and ysize % tilesize_y == 0 if is_categorical or (tilesize_x == xsize and tilesize_y == ysize): tile_bdr = 0 else: # TODO write unit test that checks whether halo areas have correct values tile_bdr = 3 if tile_bdr > 0 and not has_no_data_value: raise UserError('No-data value required as dataset is continuous and halo is non-zero') if not is_perfect_tiling and not has_no_data_value: raise UserError('No-data value required as no perfect tile size could be found') tilesize_bdr_x = tilesize_x + 2*tile_bdr tilesize_bdr_y = tilesize_y + 2*tile_bdr tiles_x = list(range(0, xsize, tilesize_x)) tiles_y = list(range(0, ysize, tilesize_y)) ysize_pad = tilesize_y * len(tiles_y) # ysize including padding caused by imperfect tiling # write 'index' file with metadata index_path = os.path.join(output_folder, 'index') index_dict, datum_mismatch, inv_scale_factor, dst_dtype, dst_no_data_value = create_index_dict( src_ds, tilesize_x, tilesize_y, ysize_pad, tile_bdr, filename_digits, is_categorical, units, description, strict_datum) write_index_file(index_path, index_dict) np_dst_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(dst_dtype) needs_scaling = inv_scale_factor is not None # As we have no control over the auxiliarly files that are created as well during conversion # we do everything in a temporary folder and move the binary file out after the conversion. # This keeps everything clean and tidy. tmp_dir = tempfile.mkdtemp() tmp_bin_path = os.path.join(tmp_dir, 'data.bin') driver = gdal.GetDriverByName('ENVI') # type: gdal.Driver# dy = src_ds.GetGeoTransform()[5] try: for start_x in tiles_x: for start_y in tiles_y: end_x = start_x + tilesize_x - 1 end_y = start_y + tilesize_y - 1 start_bdr_x = start_x - tile_bdr start_bdr_y = start_y - tile_bdr end_bdr_x = end_x + tile_bdr end_bdr_y = end_y + tile_bdr # read source data offset_x = max(0, start_bdr_x) offset_y = max(0, start_bdr_y) if end_bdr_x >= xsize: datasize_x = xsize - offset_x else: datasize_x = end_bdr_x - offset_x + 1 if end_bdr_y >= ysize: datasize_y = ysize - offset_y else: datasize_y = end_bdr_y - offset_y + 1 src_data = band.ReadAsArray(offset_x, offset_y, datasize_x, datasize_y) if dy > 0: src_data = src_data[::-1] # scale if necessary (float data only) if needs_scaling: # TODO test if scaling with no-data works if has_no_data_value: src_data = ma.masked_equal(src_data, src_no_data_value) src_data *= inv_scale_factor np.round(src_data, out=src_data) if has_no_data_value: src_data = ma.filled(src_data, dst_no_data_value) # pad incomplete tile with nodata value if datasize_x == tilesize_bdr_x and datasize_y == tilesize_bdr_y: dst_data = src_data else: assert has_no_data_value dst_data = np.empty((tilesize_bdr_y, tilesize_bdr_x), np_dst_dtype) data_start_x = offset_x - start_bdr_x data_start_y = offset_y - start_bdr_y dst_data[data_start_y:data_start_y+datasize_y,data_start_x:data_start_x+datasize_x] = src_data if start_bdr_x < 0: dst_data[:,:data_start_x] = dst_no_data_value if start_bdr_y < 0: dst_data[:data_start_y,:] = dst_no_data_value if end_bdr_x >= xsize: dst_data[:,data_start_x+datasize_x:] = dst_no_data_value if end_bdr_y >= ysize: dst_data[data_start_y+datasize_y:,:] = dst_no_data_value # create tile file dst_ds = driver.Create(tmp_bin_path, tilesize_bdr_x, tilesize_bdr_y, 1, dst_dtype) # type: gdal.Dataset dst_band = dst_ds.GetRasterBand(1) # type: gdal.Band dst_band.WriteArray(dst_data) # write to disk dst_ds.FlushCache() del dst_ds # move to final location with WPS-specific filename convention fmt_int = '{:0' + str(filename_digits) + 'd}' fmt_filename = '{fmt}-{fmt}.{fmt}-{fmt}'.format(fmt=fmt_int) if dy < 0: end_y = ysize_pad - start_y - 1 start_y = end_y - tilesize_y + 1 final_path = os.path.join(output_folder, fmt_filename.format( start_x + 1, end_x + 1, start_y + 1, end_y + 1)) shutil.move(tmp_bin_path, final_path) return GeogridBinaryDataset(index_path, datum_mismatch) finally: shutil.rmtree(tmp_dir)
def create_index_dict(dataset: gdal.Dataset, tilesize_x: int, tilesize_y: int, ysize_pad: int, tile_bdr: int, filename_digits: int, is_categorical: bool, units: Optional[str]=None, description: Optional[str]=None, strict_datum: bool=True) -> Tuple[Dict[str, Any], DatumMismatch, Optional[float], int, Optional[float]]: ''' Returns a dictionary that can be used for writing a WPS Binary format index file. If the given dataset has a CRS or data type unsupported by WRF then an error is raised. See also :func:`write_index_file`. ''' band = dataset.GetRasterBand(1) # type: gdal.Band dtype = band.DataType if dtype in DTYPE_INT: no_data_value = band.GetNoDataValue() # type: Optional[float] scale_factor = band.GetScale() inv_scale_factor = None if band.GetOffset() != 0: raise UnsupportedError('Integer data with offset not supported') elif dtype in DTYPE_FLOAT: if is_categorical: raise UserError('Categorical data must have integer-type data but is float') assert band.GetOffset() == 0 assert band.GetScale() == 1 # WPS binary doesn't support floating point data. # Floating point data must be converted to integers by scaling and rounding. inv_scale_factor, min_max = compute_inv_scale_factor(read_blocks(band)) scale_factor = 1/inv_scale_factor min_, max_ = min_max min_scaled = round(min_ * inv_scale_factor) max_scaled = round(max_ * inv_scale_factor) dtype = get_optimal_dtype(min_scaled, max_scaled) if band.GetNoDataValue() is None: no_data_value = None else: # TODO may fail if value range equals dtype range # adjusting the scaling factor slightly to make room for a no-data value may help no_data_value = get_no_data_value(dtype, min_scaled, max_scaled) #print('Scale factor: {}'.format(scale_factor)) #print('Min/max: {}'.format(min_max)) #print('Min/max scaled: {}'.format((min_scaled, max_scaled))) #print('No data: {}'.format(no_data_value)) else: assert False, "Unsupported data type: {}".format(gdal.GetDataTypeName(dtype)) signed = gdal_dtype_is_signed(dtype) wordsize = gdal.GetDataTypeSize(dtype) // 8 wkt = dataset.GetProjection() srs = osr.SpatialReference(wkt) truelat1 = truelat2 = stand_lon = None geotransform = dataset.GetGeoTransform() dx = geotransform[1] dy = geotransform[5] assert dx > 0 # dy can be negative, see below projection = None datum_mismatch = None if srs.IsGeographic(): if srs.EPSGTreatsAsLatLong(): raise UnsupportedError("Unsupported axis order: Lat/Lon, must be Lon/Lat") if not CRS.is_wrf_sphere_datum(srs): datum_mismatch = DatumMismatch( expected='WRF Sphere (6370km)', actual='a={}m b={}m'.format(srs.GetSemiMajor(), srs.GetSemiMinor())) if datum_mismatch and strict_datum: raise UnsupportedError("Unsupported datum, must be based on a sphere with " + "radius {}m, but is an ellipsoid with a={}m b={}m".format( WRF_EARTH_RADIUS, srs.GetSemiMajor(), srs.GetSemiMinor())) projection = 'regular_ll' elif srs.IsProjected(): proj = srs.GetAttrValue('projection') datum = srs.GetAttrValue('datum') if proj in ['Albers_Conic_Equal_Area', 'Lambert_Conformal_Conic_2SP', 'Mercator_2SP']: truelat1 = srs.GetNormProjParm('standard_parallel_1') if proj == 'Polar_Stereographic': truelat1 = srs.GetNormProjParm('latitude_of_origin') if proj in ['Albers_Conic_Equal_Area', 'Lambert_Conformal_Conic_2SP']: truelat2 = srs.GetNormProjParm('standard_parallel_2') if proj == 'Albers_Conic_Equal_Area': stand_lon = srs.GetNormProjParm('longitude_of_center') if proj in ['Lambert_Conformal_Conic_2SP', 'Mercator_2SP', 'Polar_Stereographic']: stand_lon = srs.GetNormProjParm('central_meridian') if proj == "Albers_Conic_Equal_Area": if datum != "North_American_Datum_1983": datum_mismatch = DatumMismatch(expected='NAD83', actual=datum) projection = 'albers_nad83' elif proj == "Lambert_Conformal_Conic_2SP": if not CRS.is_wrf_sphere_datum(srs): datum_mismatch = DatumMismatch(expected='WRF Sphere (6370km)', actual=datum) projection = 'lambert' elif proj == "Mercator_2SP": if not CRS.is_wrf_sphere_datum(srs): datum_mismatch = DatumMismatch(expected='WRF Sphere (6370km)', actual=datum) projection = 'mercator' # For polar stereographic we don't allow datum mismatch in non-strict mode # as it would be ambiguous which WPS projection ID to choose. elif proj == "Polar_Stereographic" and datum == 'WGS_1984': projection = 'polar_wgs84' elif proj == "Polar_Stereographic" and CRS.is_wrf_sphere_datum(srs): projection = 'polar' if projection is None or (datum_mismatch and strict_datum): raise UnsupportedError("Unsupported projection/datum: {}; {}".format(proj, datum)) else: raise UnsupportedError("Unsupported SRS type, must be geographic or projected") if units is None and is_categorical: units = 'category' # gdal always uses system byte order when creating ENVI files is_little_endian = sys.byteorder == 'little' # WPS does not support the concept of negative dy and requires that # the highest y coordinate corresponds to the highest y index. # If row_order=top_bottom (which we use), then the highest y index corresponds to # the row that is stored first in the file. # If row_order=bottom_top, then the highest y index corresponds to # the row that is stored last in the file. # Index coordinates in WPS do not start from 0 but from 1 where (1,1) # corresponds to the center of the cell. GDAL (0,0) corresponds to the corner of the cell. # See also http://www2.mmm.ucar.edu/wrf/users/FAQ_files/FAQ_wps_intermediate_format.html. half_cell = 0.5 # WPS index coordinates known_x = known_y = 1.0 # GDAL index coordinates x_idx = known_x - half_cell if dy < 0: y_idx = ysize_pad - known_y + half_cell else: y_idx = known_y - half_cell known_lonlat = CRS(srs=srs).to_lonlat(get_crs_coordinates(dataset, x_idx, y_idx)) metadata = { 'type': 'categorical' if is_categorical else 'continuous', 'endian': 'little' if is_little_endian else 'big', 'signed': 'yes' if signed else 'no', 'wordsize': wordsize, 'row_order': 'top_bottom', 'projection': projection, 'dx': dx, 'dy': abs(dy), 'known_x': known_x, 'known_y': known_y, 'known_lat': known_lonlat.lat, 'known_lon': known_lonlat.lon, 'tile_x': tilesize_x, 'tile_y': tilesize_y, 'tile_z': 1, 'tile_bdr': tile_bdr } if filename_digits > 5: metadata['filename_digits'] = filename_digits if scale_factor != 1: metadata['scale_factor'] = scale_factor if no_data_value is not None: metadata['missing_value'] = float(no_data_value) if is_categorical: # Note that ComputeRasterMinMax ignores pixels with no-data value. band_min, band_max = band.ComputeRasterMinMax() assert band_min == int(band_min) assert band_max == int(band_max) metadata['category_min'] = int(band_min) metadata['category_max'] = int(band_max) if truelat1 is not None: metadata['truelat1'] = truelat1 if truelat2 is not None: metadata['truelat2'] = truelat2 if stand_lon is not None: metadata['stdlon'] = stand_lon if units is not None: metadata['units'] = units if description is not None: metadata['description'] = description return metadata, datum_mismatch, inv_scale_factor, dtype, no_data_value
def convert_project_to_wrf_namelist(project: Project) -> dict: wrf = OrderedDict() # type: dict try: met_spec = project.met_dataset_spec except KeyError: raise UserError('Meteorological data not selected') geogrid_nc = [os.path.join(project.run_wps_folder, 'geo_em.d{:02d}.nc'.format(i)) for i in range(1, project.domain_count + 1)] if not all(map(os.path.exists, geogrid_nc)): raise UserError('Geogrid output files not found, run geogrid first') dx = [] # type: List[float] dy = [] # type: List[float] for path in geogrid_nc: ds = nc.Dataset(path) try: dx.append(ds.getncattr('DX')) dy.append(ds.getncattr('DY')) num_land_cat = ds.getncattr('NUM_LAND_CAT') finally: ds.close() logger.debug(f'read metadata from {path}: DX={dx[-1]}, DY={dy[-1]}, NUM_LAND_CAT={num_land_cat}') metgrid_nc = glob.glob(os.path.join(project.run_wps_folder, 'met_em.d01.*.nc')) if not metgrid_nc: raise UserError('Metgrid output files not found, run metgrid first') ds = nc.Dataset(metgrid_nc[0]) try: num_metgrid_levels = ds.dimensions['num_metgrid_levels'].size num_metgrid_soil_levels = ds.getncattr('NUM_METGRID_SOIL_LEVELS') finally: ds.close() logger.debug(f'read metadata from {metgrid_nc[0]}: num_metgrid_levels={num_metgrid_levels}, ' + f'NUM_METGRID_SOIL_LEVELS={num_metgrid_soil_levels}') domains = project.data['domains'] num_domains = len(domains) assert num_domains > 0 start, end = met_spec['time_range'] wrf['time_control'] = OrderedDict( start_year = [start.year] * num_domains, start_month = [start.month] * num_domains, start_day = [start.day] * num_domains, start_hour = [start.hour] * num_domains, start_minute = [start.minute] * num_domains, start_second = [start.second] * num_domains, end_year = [end.year] * num_domains, end_month = [end.month] * num_domains, end_day = [end.day] * num_domains, end_hour = [end.hour] * num_domains, end_minute = [end.minute] * num_domains, end_second = [end.second] * num_domains, interval_seconds = met_spec['interval_seconds'], history_interval = [60] * num_domains, frames_per_outfile = [100] * num_domains, input_from_file = [True] * num_domains, nocolons = True ) parent_grid_ratio = [1] + [domain['parent_cell_size_ratio'] for domain in domains[:0:-1]] wrf['domains'] = OrderedDict( max_dom = num_domains, grid_id = list(range(1, num_domains + 1)), parent_id = [1] + list(range(1, num_domains)), parent_grid_ratio = parent_grid_ratio, parent_time_step_ratio = parent_grid_ratio, i_parent_start = [domain['parent_start'][0] for domain in domains[::-1]], j_parent_start = [domain['parent_start'][1] for domain in domains[::-1]], # e_we and e_sn represent the number of velocity points (i.e., u-staggered or v-staggered points) # which is one more than the number of cells in each dimension. e_we = [domain['domain_size'][0] + domain['padding_left'] + domain['padding_right'] + 1 for domain in domains[::-1]], e_sn = [domain['domain_size'][1] + domain['padding_bottom'] + domain['padding_top'] + 1 for domain in domains[::-1]], e_vert = [30] * num_domains, # dx/dy is not the same as in the WPS namelist, instead it is always meters # and is written to the geogrid output files (see above). dx = dx, dy = dy, num_metgrid_levels = num_metgrid_levels, num_metgrid_soil_levels = num_metgrid_soil_levels ) wrf['physics'] = OrderedDict( num_land_cat = num_land_cat ) return wrf