def test_set_crs_epsg(tmpdir): """ Tests for EPSG codes specifically """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') data_var = ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(init='EPSG:4326'), set_proj4_att=True) data_atts = get_ncattrs(data_var) crs_var = ds.variables[data_atts['grid_mapping']] ncatts = get_ncattrs(crs_var) assert data_atts['proj4'] == '+proj=latlong +datum=WGS84 +no_defs' assert ncatts['grid_mapping_name'] == 'latitude_longitude' assert ncatts['semi_major_axis'] == 6378137.0 assert ncatts['inverse_flattening'] == 298.257223563 data_var = ds.createVariable('data2', 'S1') set_crs(ds, 'data2', Proj(init='EPSG:4269'), set_proj4_att=True) data_atts = get_ncattrs(data_var) crs_var = ds.variables[data_atts['grid_mapping']] ncatts = get_ncattrs(crs_var) assert data_atts['proj4'] == '+proj=latlong +datum=NAD83 +no_defs' assert ncatts['grid_mapping_name'] == 'latitude_longitude' assert ncatts['semi_major_axis'] == 6378137.0 assert ncatts['inverse_flattening'] == 298.257223563
def test_set_crs_epsg(tmpdir): """ Tests for EPSG codes specifically """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') data_var = ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(init='EPSG:4326'), set_proj4_att=True) data_atts = get_ncattrs(data_var) crs_var = ds.variables[data_atts['grid_mapping']] ncatts = get_ncattrs(crs_var) assert data_atts['proj4'] == '+proj=latlong +datum=WGS84 +no_defs' assert ncatts['grid_mapping_name'] == 'latitude_longitude' assert ncatts['semi_major_axis'] == 6378137.0 assert ncatts['inverse_flattening'] == 298.257223563 data_var = ds.createVariable('data2', 'S1') set_crs(ds, 'data2', Proj(init='EPSG:4269'), set_proj4_att=True) data_atts = get_ncattrs(data_var) crs_var = ds.variables[data_atts['grid_mapping']] ncatts = get_ncattrs(crs_var) assert data_atts['proj4'] == '+proj=latlong +datum=NAD83 +no_defs' assert ncatts['grid_mapping_name'] == 'latitude_longitude' assert ncatts['semi_major_axis'] == 6378137.0 assert ncatts['inverse_flattening'] == 298.257223563
def raster_to_netcdf(filename_or_raster, outfilename=None, variable_name='data', format='NETCDF4', **kwargs): """ Parameters ---------- filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset outfilename: name of output file. If blank, will be same name as input with *.nc extension added variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4 format kwargs: arguments passed to variable creation: zlib Note: only rasters with descending y coordinates are currently supported """ start = time.time() if isinstance(filename_or_raster, string_types): if not os.path.exists(filename_or_raster): raise ValueError('File does not exist: {0}'.format(filename_or_raster)) src = rasterio.open(filename_or_raster) managed_raster = True else: src = filename_or_raster managed_raster = False if not src.count == 1: raise NotImplementedError('ERROR: multi-band rasters not yet supported for this operation') prj = pyproj.Proj(**src.crs) outfilename = outfilename or src.name + '.nc' with Dataset(outfilename, 'w', format=format) as target: if prj.is_latlong(): x_varname = 'longitude' y_varname = 'latitude' else: x_varname = 'x' y_varname = 'y' # TODO: may need to do this in blocks if source is big data = src.read(1, masked=True) coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj), src.width, src.height) coords.add_to_dataset(target, x_varname, y_varname, **kwargs) out_var = target.createVariable(variable_name, data.dtype, dimensions=(y_varname, x_varname), **kwargs) out_var[:] = data set_crs(target, variable_name, prj, set_proj4_att=False) if managed_raster: src.close() print('Elapsed {0:.3f} seconds'.format(time.time() - start))
def _create_elevation_service(self, zone, band, data, nodata_value, coords): low, high = band[:2] elevation_service_name = "zones/elevation/{}_{}_{}".format(zone.zone_uid, low, high) bbox = coords.bbox # Delete and recreate service as needed service = Service.objects.filter(name=elevation_service_name) if service.exists(): return service.first() rel_path = elevation_service_name + ".nc" abs_path = os.path.join(SERVICE_DATA_ROOT, rel_path) if not os.path.exists(os.path.dirname(abs_path)): os.makedirs(os.path.dirname(abs_path)) with Dataset(abs_path, "w", format="NETCDF4") as ds: coords.add_to_dataset(ds, "longitude", "latitude") data_var = ds.createVariable( "data", data.dtype, dimensions=("latitude", "longitude"), fill_value=nodata_value, ) data_var[:] = data set_crs(ds, "data", Proj("epsg:4326")) # extract out unmasked data masked_data = data[data != nodata_value] renderer = StretchedRenderer( [(masked_data.min().item(), Color(46, 173, 60),), (masked_data.max().item(), Color(46, 173, 60),),] ) service = Service.objects.create( name=elevation_service_name, description="Elevation for zone {}, {} - {}".format(zone.name, low, high), data_path=rel_path, projection="epsg:4326", full_extent=bbox, initial_extent=bbox, ) Variable.objects.create( service=service, index=0, variable="data", projection="epsg:4326", x_dimension="longitude", y_dimension="latitude", name="data", renderer=renderer, full_extent=bbox, ) return service
def test_symmetric_proj4(tmpdir): """ Test writing and reading proj4 string as attribute of variable """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') proj4 = '+proj=stere +units=m +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000' ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4), set_proj4_att=True) out_proj4 = get_crs(ds, 'data') out_data = CRS.from_string(out_proj4).to_dict() assert len(out_data) == 9 # There should be 9 parameters assert CRS.from_string(proj4).to_dict() == out_data
def test_symmetric_proj4(tmpdir): """ Test writing and reading proj4 string as attribute of variable """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') proj4 = '+proj=stere +units=m +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000' ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4), set_proj4_att=True) out_proj4 = get_crs(ds, 'data') out_data = CRS.from_string(out_proj4).to_dict() assert len(out_data) == 9 # There should be 9 parameters assert CRS.from_string(proj4).to_dict() == out_data
def set_crs(filename, proj4, variables): try: proj = Proj(proj4) except RuntimeError: raise click.BadArgumentUsage('Invalid projection: ' + proj4) with Dataset(filename, 'a') as ds: if not variables: variables_li = [v for v in ds.variables if v not in ds.dimensions] else: variables_li = [x.strip() for x in variables.split(',')] bad_variables = set(variables_li).difference(ds.variables.keys()) if bad_variables: raise click.BadArgumentUsage( 'The following variables do not exist in this dataset: ' + ', '.join(bad_variables)) for variable in variables_li: crs.set_crs(ds, variable, proj)
def test_utm(tmpdir): ds = Dataset(str(tmpdir.join('test.nc')), 'w') proj4 = '+init=epsg:3157' # UTM Zone 10 ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4), set_proj4_att=True) out_proj4 = get_crs(ds, 'data') out_data = CRS.from_string(out_proj4).to_dict() # ESPG will have been converted to long form assert len(out_data) == 6 expected = { u'zone': 10, u'ellps': u'GRS80', u'no_defs': True, u'proj': u'utm', u'units': u'm', u'towgs84': u'0,0,0,0,0,0,0' } assert expected == out_data
def test_utm(tmpdir): ds = Dataset(str(tmpdir.join('test.nc')), 'w') proj4 = '+init=epsg:3157' # UTM Zone 10 ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4), set_proj4_att=True) out_proj4 = get_crs(ds, 'data') out_data = CRS.from_string(out_proj4).to_dict() # ESPG will have been converted to long form assert len(out_data) == 6 expected = { u'zone': 10, u'ellps': u'GRS80', u'no_defs': True, u'proj': u'utm', u'units': u'm', u'towgs84': u'0,0,0,0,0,0,0' } assert expected == out_data
def test_set_crs(tmpdir): """ Test proper encoding of projection into CF Convention parameters """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') # Test polar stereographic proj4 = '+proj=stere +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000' data_var = ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4)) crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']] ncatts = get_ncattrs(crs_var) assert ncatts['grid_mapping_name'] == 'polar_stereographic' assert ncatts['inverse_flattening'] == 298.257223563 assert ncatts['latitude_of_projection_origin'] == 90 assert ncatts['straight_vertical_longitude_from_pole'] == 263 assert ncatts['standard_parallel'] == 60 assert ncatts['false_northing'] == 7475000 assert ncatts['false_easting'] == 3475000 # Test Lambert conformal conic proj4 = '+proj=lcc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000' data_var = ds.createVariable('data2', 'S1') set_crs(ds, 'data2', Proj(proj4)) crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']] ncatts = get_ncattrs(crs_var) assert ncatts['grid_mapping_name'] == 'lambert_conformal_conic' assert ncatts['latitude_of_projection_origin'] == 47.5 assert ncatts['longitude_of_central_meridian'] == -97 assert ncatts['standard_parallel'] == [30, 60] assert ncatts['false_northing'] == 3200000 assert ncatts['false_easting'] == 3825000 # Unsupported projection should fail proj4 = '+proj=merc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000' ds.createVariable('data3', 'S1') with pytest.raises(ValueError): set_crs(ds, 'data3', Proj(proj4))
def test_set_crs(tmpdir): """ Test proper encoding of projection into CF Convention parameters """ ds = Dataset(str(tmpdir.join('test.nc')), 'w') # Test polar stereographic proj4 = '+proj=stere +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000' data_var = ds.createVariable('data', 'S1') set_crs(ds, 'data', Proj(proj4)) crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']] ncatts = get_ncattrs(crs_var) assert ncatts['grid_mapping_name'] == 'polar_stereographic' assert ncatts['inverse_flattening'] == 298.257223563 assert ncatts['latitude_of_projection_origin'] == 90 assert ncatts['straight_vertical_longitude_from_pole'] == 263 assert ncatts['standard_parallel'] == 60 assert ncatts['false_northing'] == 7475000 assert ncatts['false_easting'] == 3475000 # Test Lambert conformal conic proj4 = '+proj=lcc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000' data_var = ds.createVariable('data2', 'S1') set_crs(ds, 'data2', Proj(proj4)) crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']] ncatts = get_ncattrs(crs_var) assert ncatts['grid_mapping_name'] == 'lambert_conformal_conic' assert ncatts['latitude_of_projection_origin'] == 47.5 assert ncatts['longitude_of_central_meridian'] == -97 assert ncatts['standard_parallel'] == [30, 60] assert ncatts['false_northing'] == 3200000 assert ncatts['false_easting'] == 3825000 # Unsupported projection should fail proj4 = '+proj=merc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000' ds.createVariable('data3', 'S1') with pytest.raises(ValueError): set_crs(ds, 'data3', Proj(proj4))
def main(original_file, climatena_file, out_dir, valid_variables): with rasterio.open(original_file) as ds: bounds = ds.bounds affine = ds.transform shape = ds.shape with open(climatena_file, 'r') as f_in: headers = csv.DictReader(f_in).fieldnames variables = [ x for x in headers if x not in ('ID1', 'ID2', 'Latitude', 'Longitude', 'Elevation') ] if valid_variables: valid = [x.strip().lower() for x in valid_variables.split(',')] else: valid = variables print('Creating datasets...') grid = numpy.zeros(shape, dtype='int32') grid = numpy.ma.masked_where(grid == 0, grid) for var in (x for x in variables if x.lower() in valid): out_path = os.path.join( out_dir, '{}_{}.nc'.format( os.path.splitext(os.path.basename(climatena_file))[0], var)) if os.path.exists(out_path): continue with Dataset(out_path, 'w', format='NETCDF4') as ds: projection = Proj('EPSG:4326') coord_vars = SpatialCoordinateVariables.from_bbox( BBox(bounds, projection=projection), *reversed(grid.shape)) coord_vars.add_to_dataset(ds, 'longitude', 'latitude') data_var = ds.createVariable(var, grid.dtype, dimensions=('latitude', 'longitude'), fill_value=grid.fill_value) data_var[:] = grid set_crs(ds, var, projection) print('Copying from ClimateNA data... (0%)', end='\r') with open(climatena_file, 'r') as f_in: f_in.seek(0, os.SEEK_END) end = f_in.tell() f_in.seek(0) f_in.readline() # Skip headers while f_in.tell() < end: lines = ''.join(f_in.readline() for _ in range(1000000)) arr = numpy.loadtxt(StringIO(lines), delimiter=',', usecols=[ headers.index(x) for x in ['Latitude', 'Longitude'] + variables ]) arr = numpy.moveaxis(arr, 1, 0) latitudes = arr[0] longitudes = arr[1] for i, var in enumerate(variables): if var.lower() in valid: out_path = os.path.join( out_dir, '{}_{}.nc'.format( os.path.splitext( os.path.basename(climatena_file))[0], var)) variable = arr[i + 2] with Dataset(out_path, 'a') as ds: grid = ds.variables[var][:] fill_value = grid.fill_value grid = grid.data for j, value in enumerate(variable): if value == -9999: continue col, row = [ int(round(x)) for x in ~affine * (longitudes[j], latitudes[j]) ] if var in MULTIPLIERS: value *= MULTIPLIERS[var] grid[row][col] = value ds.variables[var][:] = numpy.ma.masked_where( grid == fill_value, grid) print('Copying from ClimateNA data... ({}%)'.format( round(f_in.tell() / end * 100)), end='\r') print('Copying from ClimateNA data... (100%)') print('Done.')
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0)))] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter('Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = { 'fill_value': fill_value } format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format(output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()
def process_web_outputs(results, job, publish_raster_results=False, renderer_or_fn=None): outputs = results.format_args() for k, v in iter(outputs.items()): if is_raster(v) and publish_raster_results: service_name = '{0}/{1}'.format(job.uuid, k) rel_path = '{}.nc'.format(service_name) abs_path = os.path.join(SERVICE_DATA_ROOT, rel_path) os.makedirs(os.path.dirname(abs_path)) with Dataset(abs_path, 'w', format='NETCDF4') as ds: if is_latlong(v.extent.projection): x_var = 'longitude' y_var = 'latitude' else: x_var = 'x' y_var = 'y' coord_vars = SpatialCoordinateVariables.from_bbox( v.extent, *reversed(v.shape)) coord_vars.add_to_dataset(ds, x_var, y_var) fill_value = v.fill_value if numpy.ma.core.is_masked( v) else None data_var = ds.createVariable('data', v.dtype, dimensions=(y_var, x_var), fill_value=fill_value) data_var[:] = v set_crs(ds, 'data', v.extent.projection) if callable(renderer_or_fn): renderer = renderer_or_fn(v) elif renderer_or_fn is None: renderer = StretchedRenderer([ (numpy.min(v).item(), Color(0, 0, 0)), (numpy.max(v).item(), Color(255, 255, 255)) ]) else: renderer = renderer_or_fn with transaction.atomic(): service = Service.objects.create( name=service_name, description= ('This service has been automatically generated from the result of a geoprocessing job.' ), data_path=rel_path, projection=v.extent.projection.srs, full_extent=v.extent, initial_extent=v.extent, ) Variable.objects.create(service=service, index=0, variable='data', projection=v.extent.projection.srs, x_dimension=x_var, y_dimension=y_var, name='data', renderer=renderer, full_extent=v.extent) ProcessingResultService.objects.create(job=job, service=service) outputs[k] = service_name elif is_ndarray(v): if v.size < numpy.get_printoptions()['threshold']: outputs[k] = v.tolist() else: outputs[k] = str(v) return outputs
def raster_to_netcdf(filename_or_raster, outfilename=None, variable_name='data', format='NETCDF4', **kwargs): """ Parameters ---------- filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset outfilename: name of output file. If blank, will be same name as input with *.nc extension added variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4 format kwargs: arguments passed to variable creation: zlib Note: only rasters with descending y coordinates are currently supported """ start = time.time() if isinstance(filename_or_raster, string_types): if not os.path.exists(filename_or_raster): raise ValueError( 'File does not exist: {0}'.format(filename_or_raster)) src = rasterio.open(filename_or_raster) managed_raster = True else: src = filename_or_raster managed_raster = False if not src.count == 1: raise NotImplementedError( 'ERROR: multi-band rasters not yet supported for this operation') prj = pyproj.Proj(**src.crs) outfilename = outfilename or src.name + '.nc' with Dataset(outfilename, 'w', format=format) as target: if is_latlong(prj): x_varname = 'longitude' y_varname = 'latitude' else: x_varname = 'x' y_varname = 'y' # TODO: may need to do this in blocks if source is big data = src.read(1, masked=True) coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj), src.width, src.height) coords.add_to_dataset(target, x_varname, y_varname, **kwargs) out_var = target.createVariable(variable_name, data.dtype, dimensions=(y_varname, x_varname), **kwargs) out_var[:] = data set_crs(target, variable_name, prj, set_proj4_att=False) if managed_raster: src.close() print('Elapsed {0:.3f} seconds'.format(time.time() - start))
def main(in_pattern, out_pattern, boundary, single, varname): """ Clips and masks large NetCDF datasets to regional datasets based on the boundary. The in_pattern and out_pattern arguments should be filename patterns (can include path) with the pattern: /path/to/in_netcdf_{variable}.nc. Example usage: python cut_to_region.py NorthAmerica/NA_{variable}.nc USWest/west_{variable}.nc west.shp """ if single and not varname: print('--varname is required when --single is used') sys.exit(-1) if single: if not os.path.exists(in_pattern): print('Input file {} does not exist.'.format(in_pattern)) sys.exit(-1) input_paths = [(in_pattern, varname)] else: input_paths = [(in_pattern.format(variable=x), x) for x in VARIABLES] for path, _ in input_paths: if not os.path.exists(path): print('Input file {} does not exist.'.format(path)) sys.exit(-1) with fiona.open(boundary, 'r') as shp: features = [] wgs84 = Proj('+init=EPSG:4326') shp_projection = Proj(shp.crs) bounds = shp.bounds ll = transform(shp_projection, wgs84, bounds[0], bounds[1]) ur = transform(shp_projection, wgs84, bounds[2], bounds[3]) bbox = BBox([*ll, *ur], projection=wgs84) for feature in shp.items(): geometry = transform_geom(shp.crs, {'init': 'EPSG: 4326'}, feature[1]['geometry']) features.append(geometry) for in_path, variable in input_paths: if single: out_path = out_pattern else: out_path = out_pattern.format(variable=variable) if os.path.exists(out_path): confirm = input( "The output file '{}' already exists? Do you with to replace it? [y/n] " .format(out_path)) if confirm.lower().strip() not in ['y', 'yes']: print('Exiting...') sys.exit() with Dataset(in_path, 'r') as ds: coords = SpatialCoordinateVariables.from_dataset( ds, x_name='longitude', y_name='latitude') x_start, x_stop = coords.x.indices_for_range(bbox.xmin, bbox.xmax) y_start, y_stop = coords.y.indices_for_range(bbox.ymin, bbox.ymax) x_slice = slice(x_start, x_stop) y_slice = slice(y_start, y_stop) clipped_coords = coords.slice_by_bbox(bbox) grid = ds.variables[variable][y_slice, x_slice] if is_masked(grid): mask = grid.mask.astype('uint8') else: mask = numpy.zeros(grid.shape, dtype='uint8') mask |= rasterize(((x, 0) for x in features), out_shape=mask.shape, transform=clipped_coords.affine, fill=1, default_value=0) grid = numpy.ma.masked_where(mask == 1, grid.data) print('Writing {}...'.format(out_path)) with Dataset(out_path, 'w', format='NETCDF4') as ds: clipped_coords.add_to_dataset(ds, 'longitude', 'latitude') data_var = ds.createVariable(variable, grid.dtype, dimensions=('latitude', 'longitude'), fill_value=grid.fill_value) if data_var.shape != grid.shape: grid = grid[:data_var.shape[0], :data_var.shape[1]] data_var[:] = grid set_crs(ds, variable, Proj('+init=EPSG:4326'))
def handle(self, output_directory, region_name, zoneset, *args, **kwargs): output_directory = output_directory[0] region_name = region_name[0] if zoneset is None or zoneset.strip() == "": sources = ZoneSource.objects.all().order_by("name") if len(sources) == 0: raise CommandError("No zonesets available") else: sources = ZoneSource.objects.filter( name__in=zoneset.split(",")).order_by("name") if len(sources) == 0: raise CommandError( "No zonesets available to analyze that match --zones values" ) region = Region.objects.filter(name=region_name) if not region.exists(): raise CommandError( "Region {} is not available".format(region_name)) region = region.first() ### Create output directories if not os.path.exists(output_directory): os.makedirs(output_directory) with ElevationDataset() as elevation_ds: elevation_ds.load_region(region.name) for source in sources: all_species = [ e["species"] for e in source.seedzone_set.values("species").distinct() ] for species in all_species: zones = source.seedzone_set.filter( species=species).order_by("zone_id") out_index = 0 zone_ids = [] zone_input_ids = [] out = numpy.empty(shape=elevation_ds.data.shape, dtype="uint16") out.fill(NODATA) with ZoneConfig(source.name) as config: for zone in Bar( "Processing {} - {} zones".format( source.name, species), max=source.seedzone_set.count(), ).iter(zones): source_name = zone.source window, coords = elevation_ds.get_read_window( zone.polygon.extent) transform = coords.affine elevation = elevation_ds.data[window] zone_mask = rasterize( (json.loads(zone.polygon.geojson), ), out_shape=elevation.shape, transform=transform, fill=1, # mask is True OUTSIDE the zone default_value=0, dtype=numpy.dtype("uint8"), ).astype("bool") nodata_mask = elevation == elevation_ds.nodata_value mask = nodata_mask | zone_mask # Create a 2D array for extracting to new dataset, in integer feet elevation = (numpy.where( ~mask, elevation / 0.3048, elevation_ds.nodata_value).round().astype( "int")) # if there are no pixels in the mask, skip this zone if elevation.size == 0: continue elevation_data = elevation[ elevation != elevation_ds.nodata_value] min_elevation = math.floor( numpy.nanmin(elevation_data)) max_elevation = math.ceil( numpy.nanmax(elevation_data)) bands = list( config.get_elevation_bands( zone, min_elevation, max_elevation)) bands = generate_missing_bands( bands, min_elevation, max_elevation) if not bands: # min / max elevation outside defined bands warnings.warn( "\nElevation range {} - {} ft outside defined bands\n" .format(min_elevation, max_elevation)) continue for band in bands: low, high = band[:2] band_mask = (elevation >= low) & (elevation <= high) if not numpy.any(band_mask): continue # extract actual elevation range within the mask as integer feet band_elevation = elevation.flat[ band_mask.flatten()] band_range = [ math.floor(numpy.nanmin(band_elevation)), math.ceil(numpy.nanmax(band_elevation)), ] # extract 2D version of elevation within the band value = numpy.where( (~mask) & band_mask, out_index, out[window], ) if not numpy.any(value == out_index): continue out[window] = value # zone ids are based on actual elevation range zone_ids.append("{}_{}_{}".format( zone.zone_uid, *band_range)) # zone_input is based on input elevation range zone_input_ids.append("{}_{}_{}".format( zone.zone_uid, low, high)) out_index += 1 if out_index > NODATA - 1: raise ValueError( "Too many zone / band combinations for uint16") # Find the data window of the zones data_window = (windows.get_data_window( out, NODATA).round_offsets(op="floor").round_lengths( op="ceil")) out = out[data_window.toslices()] data_coords = elevation_ds.coords.slice_by_window( Window(*data_window.toslices())) filename = os.path.join( output_directory, "{}_{}_zones.nc".format(source_name, species)) with Dataset(filename, "w", format="NETCDF4") as ds: # create ID variable ds.createDimension("zone", len(zone_ids)) id_var = ds.createVariable("zone", str, dimensions=("zone", )) id_var[:] = numpy.array(zone_ids) data_coords.add_to_dataset(ds, "longitude", "latitude") data_var = ds.createVariable( "zones", "uint16", dimensions=("latitude", "longitude"), fill_value=NODATA, ) data_var[:] = out set_crs(ds, "zones", Proj({"init": "EPSG:4326"})) with open(filename.replace(".nc", ".csv"), "w") as fp: writer = csv.writer(fp) writer.writerow(["value", "zone", "zone_input"]) writer.writerows([[i, zone_ids[i], zone_input_ids[i]] for i in range(len(zone_ids))])
def handle(self, *args, **options): message = ( "WARNING: This will update all service data, casting each to it's smallest possible data type. Do you want " "to continue? [y/n]") if input(message).lower() not in {'y', 'yes'}: return for service in Service.objects.all(): if service.variable_set.all().count() > 1: print("Skipping service '{}' with more than one variable...". format(service.name)) continue variable = service.variable_set.all().get() path = os.path.join(SERVICE_DATA_ROOT, service.data_path) tmp_dir = mkdtemp() tmp_path = os.path.join(tmp_dir, os.path.basename(service.data_path)) try: with Dataset(path, 'r') as ds: data = ds.variables[variable.variable][:] coords = SpatialCoordinateVariables.from_bbox( service.full_extent, *reversed(data.shape)) if data.dtype.kind != 'i': print("Ignoring service '{}' with non-int type".format( service.name)) continue # The fill value will be the minimum value of the chosen type, so we want to make sure it's not # included in the actual data range min_value = data.min() - 1 max_value = data.max() # Determine the most suitable data type by finding the minimum type for the min/max values and then # using the type that will accurately represent both min_type = str(numpy.min_scalar_type(min_value)) max_type = str(numpy.min_scalar_type(max_value)) min_unsigned, min_size = min_type.split('int') max_unsigned, max_size = max_type.split('int') dtype = '{}int{}'.format(min_unsigned and max_unsigned, max(int(min_size), int(max_size))) if data.dtype == dtype: print( "Service '{}' already has the smallest possible type: {}" .format(service.name, dtype)) continue print("Converting service '{}' to type: {}".format( service.name, dtype)) with Dataset(tmp_path, 'w', format='NETCDF4') as ds: coords.add_to_dataset(ds, variable.x_dimension, variable.y_dimension) data = data.astype(dtype) fill_value = numpy.ma.maximum_fill_value( numpy.dtype(dtype)) numpy.ma.set_fill_value(data, fill_value) data_var = ds.createVariable( variable.variable, dtype, dimensions=(variable.y_dimension, variable.x_dimension), fill_value=fill_value) data_var[:] = data set_crs(ds, variable.variable, service.full_extent.projection) os.unlink(path) shutil.copy2(tmp_path, path) finally: try: shutil.rmtree(tmp_dir) except OSError: pass
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [ list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0))) ] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter( 'Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = {'fill_value': fill_value} format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format( output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()