def test_project_point(input, crs): x1, y1, proj_str_1, proj_str_2 = input if crs: proj_str_1 = get_authority_crs(proj_str_1) proj_str_2 = get_authority_crs(proj_str_2) point_1 = (x1, y1) # tuple point_2 = project(point_1, proj_str_1, proj_str_2) point_3 = project(point_2, proj_str_2, proj_str_1) assert isinstance(point_2, tuple) assert np.allclose(point_1, point_3) # list of tuples points_5070_list = [point_1] * 3 point_2 = project(points_5070_list, proj_str_1, proj_str_2) x, y = point_2 x2, y2 = project((x, y), proj_str_2, proj_str_1) assert len(x) == len(x2) assert np.allclose( np.array(points_5070_list).transpose(), np.array([x2, y2])) # shapely Point point_2 = project(Point(point_1), proj_str_1, proj_str_2) point_3 = project(Point(point_2), proj_str_2, proj_str_1) assert isinstance(point_2, Point) assert np.allclose(point_1, point_3) # list of Points point_2 = project([Point(point_1), Point(point_1)], proj_str_1, proj_str_2) point_3 = project(point_2, proj_str_2, proj_str_1) assert isinstance(point_2, list) for p in point_3: assert np.allclose(list(p.coords)[0], point_1)
def test_write_raster_crs(crs, test_output_path): filename = os.path.join(test_output_path, 'test_raster.tif') array = np.array([[0, 1], [2, 3]]) height, width = array.shape dx = 5. xll, yll = 0., 0. write_raster(filename, array, xll=xll, yll=yll, dx=dx, dy=None, rotation=0, crs=crs, nodata=-9999) if crs is not None: with rasterio.open(filename) as src: written_crs = get_authority_crs(src.crs) assert written_crs == get_authority_crs(crs)
def get_raster_crs(raster): """Get the coordinate reference system for a shapefile. Parameters ---------- raster : str (filepath) Path to a raster Returns ------- crs : pyproj.CRS instance """ if not rasterio: raise ModuleNotFoundError( 'This function requires rasterio. Please conda install rasterio.') with rasterio.open(raster) as src: if src.crs is not None: crs = get_authority_crs(src.crs) return crs
def test_shp2df_df2shp_crs(dest_crs, test_output_path, eel_river_polygon, eel_river_polygon_shapefile, request): # read in to dest_crs df_dest_crs = shp2df(eel_river_polygon_shapefile, dest_crs=dest_crs) # reproject back to 5070 if dest_crs is not None: geoms = project(df_dest_crs['geometry'], dest_crs, 5070) else: geoms = df_dest_crs['geometry'] # verify that polygon is the same as original in 5070 assert geoms[0].almost_equals(eel_river_polygon) # check that when writing the polygon back to a shapefile # a valid projection file is produced output_shapefile = os.path.join(test_output_path, 'results.shp') df2shp(df_dest_crs, output_shapefile, crs=dest_crs) written_crs = get_shapefile_crs(output_shapefile) if dest_crs is not None: assert written_crs == get_authority_crs(dest_crs)
def test_get_raster_crs(geotiff_3070): crs = get_raster_crs(geotiff_3070) expected = get_authority_crs(3070) assert crs == expected
def get_values_at_points(rasterfile, x=None, y=None, band=1, points=None, points_crs=None, xarray_variable=None, out_of_bounds_errors='coerce', method='nearest', size_thresh=1e9): """Get raster values single point or list of points. Points in a different coordinate reference system (CRS) specified with a points_crs will be reprojected to the raster CRS prior to sampling. Parameters ---------- rasterfile : str Filename of raster or NetCDF file. NetCDF files are assumed to have x and y coordinates in the same CRS as the points. x : 1D array X coordinate locations y : 1D array Y coordinate locations points : list of tuples or 2D numpy array (npoints, (row, col)) Points at which to sample raster. points_crs : obj, optional Coordinate reference system for points or x, y. Only needed if different than the CRS for the raster, in which case the points will be reprojected to the raster CRS prior to getting the values. A Python int, dict, str, or pyproj.crs.CRS instance passed to the pyproj.crs.from_user_input See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input. Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class xarray_variable : str If rasterfile is a NetCDF file, xarray_variable is the name of the variable in raster file to sample. Only required if rasterfile is a NetCDF file. by default, None. out_of_bounds_errors : {‘raise’, ‘coerce’}, default 'raise' * If 'raise', then x, y locations outside of the raster will raise an exception. * If 'coerce', then x, y locations outside of the raster will be set to NaN. method : str 'nearest' or 'linear' If 'nearest', the rasterio.DatasetReader.index() method is used to return the raster values at the nearest cell centers. If 'linear', scipy.interpolate.interpn is used for bilinear interpolation of values between raster cell centers. size_thresh : float Prior to reading any data, the raster size (height * width) is evaluated. If the size is larger than size_thresh, point values are read using :meth:`rasterio.io.DatasetReader.sample` (regardless of the specified method), which gets nearest pixel values without reading the whole dataset into memory. A 32-bit raster of size=1e9 would require approximately 4 GB of memory (at 4 bytes per pixel). By default, 1e9. Returns ------- list of floats Notes ----- requires rasterio """ if not rasterio: raise ModuleNotFoundError( 'This function requires rasterio. Please conda install rasterio.') # read in sample points array_shape = None if x is not None and isinstance(x[0], tuple): x, y = np.array(x).transpose() warnings.warn( "new argument input for get_values_at_points is x, y, or points") elif x is not None: if not isinstance(x, np.ndarray): x = np.array(x) if not isinstance(y, np.ndarray): y = np.array(y) if len(x.shape) > 1: array_shape = x.shape x = x.ravel() if len(y.shape) > 1: array_shape = y.shape y = y.ravel() elif points is not None: if not isinstance(points, np.ndarray): x, y = np.array(points) else: x, y = points[:, 0], points[:, 1] else: print('Must supply x, y or list/array of points.') assert os.path.exists(rasterfile), "raster {} not found".format(rasterfile) t0 = time.time() print("reading data from {}...".format(rasterfile)) data = None # getting points from a netcdf file if str(rasterfile).endswith('.nc'): if xarray_variable is None: raise ValueError('Input of NetCDF file for the raster ' 'requires specification of an xarray_variable.') ds = xr.open_dataset(rasterfile) x = xr.DataArray(x, dims="z") y = xr.DataArray(y, dims="z") results = ds[xarray_variable].interp(x=x, y=y, method=method) results = results.values # getting points from any raster openable by rasterio else: with rasterio.open(rasterfile) as src: meta = src.meta nodata = meta['nodata'] size = src.shape[0] * src.shape[1] if size < size_thresh: data = src.read(band) # reproject coordinates if needed if points_crs is not None: points_crs = get_authority_crs(points_crs) raster_crs = get_authority_crs(src.crs) if raster_crs is None: warnings.warn( f'Input raster {rasterfile} does not have a projection (CRS) assigned!' ) else: if points_crs is not None and points_crs != raster_crs: x, y = project((x, y), points_crs, raster_crs) if data is None: results = src.sample(list(zip(x, y))) results = np.atleast_1d(np.squeeze(list(results))) results = results.astype(float) if data is None: pass elif method == 'nearest': i, j = src.index(x, y) i = np.atleast_1d(np.array(i, dtype=int)) j = np.atleast_1d(np.array(j, dtype=int)) nrow, ncol = data.shape # mask row, col locations outside the raster within = (i >= 0) & (i < nrow) & (j >= 0) & (j < ncol) # get values at valid point locations results = np.ones(len(i), dtype=float) * np.nan results[within] = data[i[within], j[within]] if out_of_bounds_errors == 'raise' and np.any(np.isnan(results)): n_invalid = np.sum(np.isnan(results)) raise ValueError("{} points outside of {} extent.".format( n_invalid, rasterfile)) else: # map the points to interpolate to onto the raster coordinate system # (in case the raster is rotated) x_rx, y_ry = ~src.transform * (x, y) # coordinates of raster pixel centers in raster coordinate system # (e.g. i,j = 0, 0 = 0.5, 0.5) pad = 0.5 # extra padding, in pixels, so that points within the outer pixels are still counted padding = np.arange(0.5 - pad, 0.5) rx = padding.tolist() + list(np.arange(src.width) + 0.5) + list(src.width - padding) ry = padding.tolist() + list(np.arange(src.height) + 0.5) + list(src.height - padding) # pad the coordinates and the data pad_width = int(np.ceil(pad)) padded = np.pad(data.astype(float), pad_width=pad_width, mode='edge') # exclude nodata points prior to interpolating padded[padded == nodata] = np.nan bounds_error = False if out_of_bounds_errors == 'raise': bounds_error = True results = interpolate.interpn((ry, rx), padded, (y_ry, x_rx), method=method, bounds_error=bounds_error, fill_value=nodata) # convert nodata values to np.nans results[results == nodata] = np.nan # reshape to input shape if array_shape is not None: results = np.reshape(results, array_shape) print("finished in {:.2f}s".format(time.time() - t0)) return results
def clip_raster(inraster, clip_features, outraster, clip_features_crs=None, clip_kwargs=None, project_kwargs=None, **kwargs): """Clip raster to feature extent(s), write the output to a new raster file. If the feature extent(s) are in a different coordinate reference system, the raster will first be reprojected to that CRS and then clipped. The output raster will be in the CRS of the clip features. Parameters ---------- inraster : str Path to a raster file readable by rasterio.open clip_features : str or list-like Shapefile or sequence of features. Features can be in any format accepted by gisutils.raster.get_feature_geojson() outraster : str Filename for output raster. clip_features_crs : obj A Python int, dict, str, or pyproj.crs.CRS instance passed to the pyproj.crs.from_user_input See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input. Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class clip_kwargs: dict Keyword arguments to rasterio.mask project_kwargs : dict Key word arguments to gisutils.projection.project_raster() These are only used if the clip features are in a different coordinate system, in which case the raster will be reprojected into that coordinate system. kwargs : keyword arguments Keyword arguments to rasterio.open for writing the output raster. """ if not rasterio: raise ModuleNotFoundError( 'This function requires rasterio. Please conda install rasterio.') if clip_kwargs is None: clip_kwargs = {} if project_kwargs is None: project_kwargs = {} with rasterio.open(inraster) as src: raster_crs = get_authority_crs(src.crs) # start with assumption of same coordinates if clip_features_crs is None: clip_features_crs = raster_crs # get the clip feature crs from shapefile if isinstance(clip_features, str) or isinstance(clip_features, Path): if Path(clip_features).exists(): clip_features_crs = get_shapefile_crs(clip_features) # otherwise if clip feature crs was specified else: clip_features_crs = get_authority_crs(clip_features_crs) # convert the clip_features to geojson geoms = get_feature_geojson(clip_features) print('input raster crs:\n{}\n\n'.format(raster_crs), 'clip feature crs:\n{}\n'.format(clip_features_crs)) # if the coordinate systems are not the same # reproject the raster first before clipping # this could be greatly sped up by first clipping the input raster prior to reprojecting if raster_crs != clip_features_crs or len(project_kwargs) > 0: tmpraster = 'tmp.tif' tmpraster2 = 'tmp2.tif' print( 'Input raster and clip feature(s) are in different coordinate systems.\n' 'Reprojecting input raster from\n{}\nto\n{}\n'.format( raster_crs, clip_features_crs)) # make prelim clip of raster to speed up reprojection xmin, xmax, ymin, ymax = get_geojson_collection_bounds(geoms) longest_side = np.max([xmax - xmin, ymax - ymin]) bounds = box(xmin, ymin, xmax, ymax).buffer(longest_side * 0.1) bounds = project(bounds, clip_features_crs, raster_crs) _clip_raster(inraster, [bounds], tmpraster, clip_kwargs=clip_kwargs) project_raster(tmpraster, tmpraster2, clip_features_crs, **project_kwargs, **kwargs) inraster = tmpraster2 _clip_raster(inraster, geoms, outraster, clip_kwargs=clip_kwargs, **kwargs) if raster_crs != clip_features_crs: for tmp in [tmpraster, tmpraster2]: if os.path.exists(tmp): print('removing {}...'.format(tmp)) os.remove(tmp) print('Done.')
def shp2df(shplist, index=None, index_dtype=None, clipto=[], filter=None, true_values=None, false_values=None, layer=None, dest_crs=None, skip_empty_geom=True): """Read shapefile/DBF, list of shapefiles/DBFs, or File geodatabase (GDB) into pandas DataFrame. Parameters ---------- shplist : string or list of shapefile/DBF name(s) or FileGDB index : string Column to use as index for dataframe index_dtype : dtype Enforces a datatype for the index column (for example, if the index field is supposed to be integer but pandas reads it as strings, converts to integer) clipto : list limit what is brought in to items in index of clipto (requires index) filter : tuple (xmin, ymin, xmax, ymax) bounding box to filter which records are read from the shapefile. true_values : list same as argument for pandas read_csv false_values : list same as argument for pandas read_csv layer : str Layer name to read (if opening FileGDB) dest_crs : obj A Python int, dict, str, or pyproj.crs.CRS instance passed to the pyproj.crs.from_user_input See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input. Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class skip_empty_geom : True/False, default True Drops shapefile entries with null geometries. DBF files (which specify null geometries in their schema) will still be read. Returns ------- df : DataFrame with attribute fields as columns; feature geometries are stored as shapely geometry objects in the 'geometry' column. """ if isinstance(shplist, str) or isinstance(shplist, Path): shplist = [shplist] if not isinstance(true_values, list) and true_values is not None: true_values = [true_values] if not isinstance(false_values, list) and false_values is not None: false_values = [false_values] if len(clipto) > 0 and index: clip = True else: clip = False # destination crs for geometries read from shapefile(s) if dest_crs is not None: dest_crs = get_authority_crs(dest_crs) df = pd.DataFrame() for shp in shplist: print("\nreading {}...".format(shp)) if not os.path.exists(shp): raise IOError("{} doesn't exist".format(shp)) # crs of current shapefile shp_crs = get_shapefile_crs(shp) # set the destination CRS if none was specified # so that heterogenious shapefiles will be output to # the same CRS if dest_crs is None and shp_crs is not None: dest_crs = shp_crs with fiona.open(shp, 'r', layer=layer) as shp_obj: if index is not None: # handle capitolization issues with index field name fields = list(shp_obj.schema['properties'].keys()) index = [f for f in fields if index.lower() == f.lower()][0] attributes = [] # for reading in shapefiles meta = shp_obj.meta if meta['schema']['geometry'] != 'None': if filter is not None: print('filtering on bounding box {}, {}, {}, {}...'.format( *filter)) if clip: # limit what is brought in to items in index of clipto for line in shp_obj.filter(bbox=filter): props = line['properties'] if not props[index] in clipto: continue props['geometry'] = line.get('geometry', None) attributes.append(props) else: for line in shp_obj.filter(bbox=filter): props = line['properties'] props['geometry'] = line.get('geometry', None) attributes.append(props) print( '--> building dataframe... (may take a while for large shapefiles)' ) shp_df = pd.DataFrame(attributes) # reorder fields in the DataFrame to match the input shapefile if len(attributes) > 0: shp_df = shp_df[list(attributes[0].keys())] # handle null geometries if len(shp_df) == 0: print('Empty dataframe! No clip_features were read.') if filter is not None: print('Check filter {} for consistency \ with shapefile coordinate system'.format(filter)) # shp_df will only have a geometry column if it isn't empty else: geoms = shp_df.geometry.tolist() if geoms.count(None) == 0: shp_df['geometry'] = [shape(g) for g in geoms] elif skip_empty_geom: null_geoms = [ i for i, g in enumerate(geoms) if g is None ] shp_df.drop(null_geoms, axis=0, inplace=True) shp_df['geometry'] = [ shape(g) for g in shp_df.geometry.tolist() ] else: shp_df['geometry'] = [ shape(g) if g is not None else None for g in geoms ] # for reading in DBF files (just like shps, but without geometry) else: if clip: # limit what is brought in to items in index of clipto for line in shp_obj: props = line['properties'] if not props[index] in clipto: continue attributes.append(props) else: for line in shp_obj: attributes.append(line['properties']) print( '--> building dataframe... (may take a while for large shapefiles)' ) shp_df = pd.DataFrame(attributes) # reorder fields in the DataFrame to match the input shapefile if len(attributes) > 0: shp_df = shp_df[list(attributes[0].keys())] if len(shp_df) == 0: continue # set the dataframe index from the index column if index is not None: if index_dtype is not None: shp_df[index] = shp_df[index].astype(index_dtype) shp_df.index = shp_df[index].values # reproject geometries to dest_crs if needed if shp_crs is not None and dest_crs is not None and shp_crs != dest_crs: shp_df['geometry'] = project(shp_df['geometry'], shp_crs, dest_crs) df = df.append(shp_df) # convert any t/f columns to numpy boolean data if true_values is not None or false_values is not None: replace_boolean = {} for t in true_values: replace_boolean[t] = True for f in false_values: replace_boolean[f] = False # only remap columns that have values to be replaced cols = [c for c in df.columns if c != 'geometry'] for c in cols: if len(set(replace_boolean.keys()).intersection(set( df[c]))) > 0: df[c] = df[c].map(replace_boolean) return df
def df2shp(dataframe, shpname, geo_column='geometry', index=False, retain_order=False, prj=None, epsg=None, proj_str=None, crs=None): """Write a DataFrame with a column of shapely geometries to a shapefile. Parameters ---------- dataframe : pandas.DataFrame shpname : str, filepath Output shapefile geo_column : str Name of column in dataframe with feature geometries (default 'geometry') index : bool If True, include the DataFrame index in the written shapefile retain_order : bool Retain column order in dataframe, using an OrderedDict. Shapefile will take about twice as long to write, since OrderedDict output is not supported by the pandas DataFrame object. prj : str Path to ESRI projection file describing the coordinate reference system of the feature geometries in the 'geometry' column. (specify one of prj, epsg, proj_str) epsg : int EPSG code describing the coordinate reference system of the feature geometries in the 'geometry' column. proj_str : str PROJ string describing the coordinate reference system of the feature geometries in the 'geometry' column. crs : obj A Python int, dict, str, or pyproj.crs.CRS instance passed to the pyproj.crs.from_user_input See http://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input. Can be any of: - PROJ string - Dictionary of PROJ parameters - PROJ keyword arguments for parameters - JSON string with PROJ parameters - CRS WKT string - An authority string [i.e. 'epsg:4326'] - An EPSG integer code [i.e. 4326] - A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')] - An object with a `to_wkt` method. - A :class:`pyproj.crs.CRS` class Returns ------- writes a shapefile to shpname """ # first check if output path exists output_folder = os.path.abspath(os.path.split(shpname)[0]) if os.path.split(shpname)[0] != '' and not os.path.isdir(output_folder): raise IOError("Output folder doesn't exist:\n{}".format(output_folder)) # check for empty dataframe if len(dataframe) == 0: raise IndexError("DataFrame is empty!") df = dataframe.copy() # make a copy so the supplied dataframe isn't edited # reassign geometry column if geo_column is special (e.g. something other than "geometry") if geo_column != 'geometry': df['geometry'] = df[geo_column] df.drop(geo_column, axis=1, inplace=True) # assign none for geometry, to write a dbf file from dataframe Type = None if 'geometry' not in df.columns: df['geometry'] = None Type = 'None' mapped = [None] * len(df) # reset the index to integer index to enforce ordering # retain index as attribute field if index=True df.reset_index(inplace=True, drop=not index) # enforce 10 character limit df.columns = rename_fields_to_10_characters(df.columns) properties = shp_properties(df) del properties['geometry'] # set projection (or use a prj file, which must be copied after shp is written) # alternatively, provide a crs in dictionary form as read using fiona # from a shapefile like fiona.open(inshpfile).crs crs_wkt = None if epsg is not None: warnings.warn( 'gisutils.df2shp: the epsg argument is deprecated; use crs instead', DeprecationWarning) from fiona.crs import from_epsg crs = from_epsg(int(epsg)) elif proj_str is not None: warnings.warn( 'gisutils.df2shp: the proj_str argument is deprecated; use crs instead', DeprecationWarning) from fiona.crs import from_string crs = from_string(proj_str) elif crs is not None: proj_crs = get_authority_crs(crs) # https://pyproj4.github.io/pyproj/stable/crs_compatibility.html#converting-from-pyproj-crs-crs-for-fiona if LooseVersion(fiona.__gdal_version__) < LooseVersion("3.0.0"): crs_wkt = proj_crs.to_wkt(WktVersion.WKT1_GDAL) else: # GDAL 3+ can use WKT2 crs_wkt = proj_crs.to_wkt() crs = None else: pass if Type != 'None': for g in df.geometry: try: Type = g.type except: continue mapped = [mapping(g) for g in df.geometry] schema = {'geometry': Type, 'properties': properties} length = len(df) if not retain_order: props = df.drop('geometry', axis=1).astype(object).to_dict(orient='records') else: props = [ collections.OrderedDict(r) for i, r in df.drop('geometry', axis=1).astype(object).iterrows() ] print('writing {}...'.format(shpname), end='') #with fiona.collection(shpname, "w", driver="ESRI Shapefile", crs=crs, crs_wkt=crs_wkt, schema=schema) as output: with fiona.open(shpname, "w", driver="ESRI Shapefile", crs=crs, crs_wkt=crs_wkt, schema=schema) as output: for i in range(length): output.write({'properties': props[i], 'geometry': mapped[i]}) if prj is not None: try: print('copying {} --> {}...'.format(prj, "{}.prj".format(shpname[:-4]))) shutil.copyfile(prj, "{}.prj".format(shpname[:-4])) except IOError: print( 'Warning: could not find specified prj file. shp will not be projected.' ) print(' Done')
def test_get_authority_crs(input, expected_srs): if expected_srs is None: expected_srs = input crs = get_authority_crs(input) assert crs.srs == expected_srs