def __init__(self, netcdf_in_path, crs_string=None, settings_path=None ): ''' Constructor for class NetCDF2ASEGGDFConverter ''' assert (os.path.isfile(netcdf_in_path) or (netcdf_in_path.startswith('http') and netcdf_in_path.lower().endswith('.nc')) ), '{} is not a valid file or OPeNDAP endpoint'.format(netcdf_in_path) self.netcdf_in_path = netcdf_in_path self.defn = 0 # DEFN number for last line written to .dfn file self.settings_path = settings_path or os.path.join(os.path.dirname(__file__), 'aseg_gdf_settings.yml') try: self.settings = yaml.safe_load(open(self.settings_path)) except: self.settings = {} logger.debug('self.settings: {}'.format(pformat(self.settings))) self.nc_dataset = netCDF4.Dataset(self.netcdf_in_path, 'r') assert 'point' in self.nc_dataset.dimensions.keys(), 'No point dimension defined in netCDF dataset' self.spatial_ref = None if crs_string: self.spatial_ref = get_spatial_ref_from_wkt(crs_string) logger.debug('self.spatial_ref set from WKT {}'.format(crs_string)) else: wkt = None try: for crs_variable_name in ['crs', 'transverse_mercator']: if crs_variable_name in self.nc_dataset.variables.keys(): wkt = self.nc_dataset.variables[crs_variable_name].spatial_ref break self.spatial_ref = get_spatial_ref_from_wkt(wkt) except: pass assert self.spatial_ref, 'No Coordinate Reference System defined' self.total_points = self.nc_dataset.dimensions['point'].size
def __init__( self, dataset_keywords, grid_variable_name, # Name of data variable to grid grid_bounds, # Grid bounds as [xmin, ymin, xmax, ymax] grid_crs_wkt=None, # Defaults to GDA94 start_datetime=None, end_datetime=None, filter_variable_name=None, # e.g. 'gridflag' filter_value_list=None, # e.g. ['Station used in the production of GA grids.'] tile_extra=None, # Absolute extra per side. Defaults to 5% extra on each side ): ''' TileGridder Constructor ''' self._dataset_list = None self._dataset_values = None self.dataset_keywords = dataset_keywords self.grid_variable_name = grid_variable_name self.grid_bounds = grid_bounds self.grid_crs_wkt = get_wkt_from_spatial_ref( get_spatial_ref_from_wkt(grid_crs_wkt or TileGridder.GDA94_CRS_WKT)) self.filter_variable_name = filter_variable_name or TileGridder.DEFAULT_FILTER_VARIABLE_NAME self.filter_value_list = filter_value_list or TileGridder.DEFAULT_FILTER_VALUE_LIST self.start_datetime = start_datetime self.end_datetime = end_datetime if tile_extra is None: # Expand bounds by TileGridder.DEFAULT_TILE_EXPANSION_PERCENT percent each side self.expanded_grid_bounds = [ grid_bounds[0] - (grid_bounds[2] - grid_bounds[0]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[1] - (grid_bounds[3] - grid_bounds[1]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[2] + (grid_bounds[2] - grid_bounds[0]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[3] + (grid_bounds[3] - grid_bounds[1]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, ] else: # Expand bounds by absolute amount self.expanded_grid_bounds = [ grid_bounds[0] - tile_extra, grid_bounds[1] - tile_extra, grid_bounds[2] + tile_extra, grid_bounds[3] + tile_extra, ] self.expanded_gda94_grid_bounds = self.reproject_bounds( self.expanded_grid_bounds, self.grid_crs_wkt, TileGridder.GDA94_CRS_WKT) print('expanded_gda94_grid_bounds = {}'.format( self.expanded_gda94_grid_bounds))
# Setup proxy as required GA_STAFF_WIFI = False if GA_STAFF_WIFI: os.environ['http_proxy'] = 'http://proxy.inno.lan:3128' os.environ['https_proxy'] = 'http://proxy.inno.lan:3128' # N.B: GA internal CSW addresses will need port forwarding to work from the NCI # Also, dev.public.ecat.ga.gov.au requires a hack to csw_utils and owslib to overcome certificate problem DEFAULT_CSW_URL = 'https://dev.public.ecat.ga.gov.au/geonetwork/srv/eng/csw' # GA's internally-facing development eCat #csw_url = 'https://ecat.ga.gov.au/geonetwork/srv/eng/csw' # GA's externally-facing eCat #csw_url = 'https://internal.ecat.ga.gov.au/geonetwork/srv/eng/csw' # GA's internally-facing eCat #csw_url = 'http://geonetworkrr2.nci.org.au/geonetwork/srv/eng/csw' # NCI GeoNetwork WGS84_WKT = get_spatial_ref_from_wkt('EPSG:4326').ExportToWkt() # Set up search criteria #bounds = (120.0, -29.0, 121, -28) # Spatial subset of dataset in WGS84 coordinates #keywords = 'geophysics,airborne digital data,geophysical survey,magnetics,line,AWAGS' # Comma-separated list of keywords # Set spatial information about bounds #centre_coords = [(bounds[dim_index] + bounds[dim_index+2]) / 2.0 for dim_index in range(2)] #utm_wkt = get_utm_wkt(centre_coords, wgs84_wkt) #reprojected_bounding_box = np.array(transform_coords(((bounds[0], bounds[1]), (bounds[2], bounds[1]), (bounds[2], bounds[3]), (bounds[0], bounds[3])), wgs84_wkt, utm_wkt)) #utm_bounds = [min(reprojected_bounding_box[:,0]), # min(reprojected_bounding_box[:,1]), # max(reprojected_bounding_box[:,0]), # max(reprojected_bounding_box[:,1])]
def plot_point_dataset(netcdf_point_utils, variable_to_map, utm_bbox=None, plot_title=None, colour_scheme='binary', point_size=10, point_step=1): ''' Function to plot data points on a map @param netcdf_point_utils: NetCDFPointUtils object wrapping a netCDF dataset @param variable_to_map: String specifying variable name for colour map @param utm_bbox: UTM Bounding box of form [xmin, ymin, xmax, ymax] or None for all points. Default=None @param plot_title: String to prefix before dataset title. Default=None for dataset title or dataset basename @param colour_scheme: String specifying colour scheme for data points. Default='binary' @param point_size: Point size for data points. Default=10 @param point_step: Point step between plotted points - used to skip points in dense datasets. Default=1 ''' def rescale_array(input_np_array, new_range_min=0, new_range_max=1): old_min = input_np_array.min() old_range = input_np_array.max() - old_min new_range = new_range_max - new_range_min scaled_np_array = ( (input_np_array - old_min) / old_range * new_range) + new_range_min return scaled_np_array if plot_title is None: if hasattr(netcdf_point_utils.netcdf_dataset, 'title'): plot_title = netcdf_point_utils.netcdf_dataset.title else: plot_title = netcdf_point_utils.netcdf_dataset.filepath() utm_wkt, utm_coords = netcdf_point_utils.utm_coords( netcdf_point_utils.xycoords) utm_zone = get_spatial_ref_from_wkt( utm_wkt).GetUTMZone() # -ve for Southern Hemisphere southern_hemisphere = (utm_zone < 0) utm_zone = abs(utm_zone) projection = ccrs.UTM(zone=utm_zone, southern_hemisphere=southern_hemisphere) print('utm_zone = {}'.format(utm_zone)) #print(utm_coords) variable = netcdf_point_utils.netcdf_dataset.variables[variable_to_map] # Set geographic range of plot if utm_bbox is None: utm_bbox = [ np.min(utm_coords[:, 0]), np.min(utm_coords[:, 1]), np.max(utm_coords[:, 0]), np.max(utm_coords[:, 1]) ] spatial_mask = np.ones(shape=variable.shape, dtype='Bool') else: spatial_mask = np.logical_and( np.logical_and((utm_bbox[0] <= utm_coords[:, 0]), (utm_coords[:, 0] <= utm_bbox[2])), np.logical_and((utm_bbox[1] <= utm_coords[:, 1]), (utm_coords[:, 1] <= utm_bbox[3]))) utm_coords = utm_coords[spatial_mask] print('{} points in UTM bounding box: {}'.format( np.count_nonzero(spatial_mask), utm_bbox)) #print(utm_coords) colour_array = rescale_array(variable[spatial_mask], 0, 1) fig = plt.figure(figsize=(30, 30)) ax = fig.add_subplot(1, 1, 1, projection=projection) ax.set_title(plot_title) #map_image = cimgt.OSM() # https://www.openstreetmap.org/about #map_image = cimgt.StamenTerrain() # http://maps.stamen.com/ map_image = cimgt.QuadtreeTiles() #print(map_image.__dict__) ax.add_image(map_image, 10) # Compute and set regular tick spacing range_x = utm_bbox[2] - utm_bbox[0] range_y = utm_bbox[3] - utm_bbox[1] x_increment = pow(10.0, floor(log10(range_x))) / 2 y_increment = pow(10.0, floor(log10(range_y))) / 2 x_ticks = np.arange((utm_bbox[0] // x_increment + 1) * x_increment, utm_bbox[2], x_increment) y_ticks = np.arange((utm_bbox[1] // y_increment + 1) * y_increment, utm_bbox[3], y_increment) plt.xticks(x_ticks, rotation=45) plt.yticks(y_ticks) # set the x and y axis labels plt.xlabel("Eastings (m)", rotation=0, labelpad=20) plt.ylabel("Northings (m)", rotation=90, labelpad=20) # See link for possible colourmap schemes: https://matplotlib.org/examples/color/colormaps_reference.html cm = plt.cm.get_cmap(colour_scheme) # build a scatter plot of the specified data, define marker, spatial reference system, and the chosen colour map type sc = ax.scatter(utm_coords[::point_step, 0], utm_coords[::point_step, 1], marker='o', c=colour_array[::point_step], s=point_size, alpha=0.9, transform=projection, cmap=cm) # set the colour bar ticks and labels try: # not all variables have units. These will fail on the try and produce the map without tick labels. cb = plt.colorbar(sc, ticks=[0, 1]) cb.ax.set_yticklabels([ str(np.min(variable[spatial_mask])), str(np.max(variable[spatial_mask])) ]) # vertically oriented colorbar cb.set_label("{} {}".format(variable.long_name, variable.units)) except: pass plt.show()
def plot_survey_points( netcdf_point_utils, # NetCDFPointUtils object wrapping a netCDF dataset variable_to_map, # String specifying variable name for colour map utm_bbox=None, # utm_bbox is of form [xmin, ymin, xmax, ymax] colour_scheme='binary', # Colour map point_size=10 # Size of point in plot ): utm_wkt, utm_coords = netcdf_point_utils.utm_coords( netcdf_point_utils.xycoords[:]) utm_zone = get_spatial_ref_from_wkt( utm_wkt).GetUTMZone() # -ve for Southern Hemisphere southern_hemisphere = (utm_zone < 0) utm_zone = abs(utm_zone) projection = ccrs.UTM(zone=utm_zone, southern_hemisphere=southern_hemisphere) print('utm_zone = {}'.format(utm_zone)) # print(utm_coords) variable = netcdf_point_utils.netcdf_dataset.variables[variable_to_map] # Set geographic range of plot if utm_bbox is None: utm_bbox = [ np.min(utm_coords[:, 0]), np.min(utm_coords[:, 1]), np.max(utm_coords[:, 0]), np.max(utm_coords[:, 1]) ] spatial_mask = np.ones(shape=variable.shape, dtype='Bool') else: spatial_mask = np.logical_and( np.logical_and((utm_bbox[0] <= utm_coords[:, 0]), (utm_coords[:, 0] <= utm_bbox[2])), np.logical_and((utm_bbox[1] <= utm_coords[:, 1]), (utm_coords[:, 1] <= utm_bbox[3]))) utm_coords = utm_coords[spatial_mask] print('UTM bounding box: {}'.format(utm_bbox)) # print(utm_coords) print(netcdf_point_utils.dimensions['point']) colour_array = rescale_array(variable[spatial_mask], 0, 1) # map_image = cimgt.OSM() # https://www.openstreetmap.org/about # map_image = cimgt.StamenTerrain() # http://maps.stamen.com/ map_image = cimgt.QuadtreeTiles() fig = plt.figure(figsize=(30, 30)) ax = fig.add_subplot(1, 1, 1, projection=projection) ax.set_title("Point Gravity Survey - " + str(netcdf_point_utils.netcdf_dataset.getncattr('title'))) ax.add_image(map_image, 10) print(utm_bbox) # Compute and set regular tick spacing range_x = utm_bbox[2] - utm_bbox[0] range_y = utm_bbox[3] - utm_bbox[1] print("range_x: ".format(range_x)) x_increment = pow(10.0, floor(log10(range_x))) / 2 y_increment = pow(10.0, floor(log10(range_y))) / 2 x_ticks = np.arange((utm_bbox[0] // x_increment + 1) * x_increment, utm_bbox[2], x_increment) y_ticks = np.arange((utm_bbox[1] // y_increment + 1) * y_increment, utm_bbox[3], y_increment) ax.set_xticks(x_ticks) ax.set_yticks(y_ticks) # set the x and y axis labels ax.set_xlabel("Eastings (m)", rotation=0, labelpad=20) ax.set_ylabel("Northings (m)", rotation=90, labelpad=20) # See link for possible colourmap schemes: https://matplotlib.org/examples/color/colormaps_reference.html cm = plt.cm.get_cmap(colour_scheme) # build a scatter plot of the specified data, define marker, spatial reference system, and the chosen colour map type sc = ax.scatter(utm_coords[:, 0], utm_coords[:, 1], marker='o', c=colour_array, s=point_size, alpha=0.9, transform=projection, cmap=cm) # set the colour bar ticks and labels cb = plt.colorbar(sc, ticks=[0, 1]) cb.ax.set_yticklabels([str(np.min(variable)), str(np.max(variable)) ]) # vertically oriented colorbar cb.set_label("{} {}".format(variable.long_name, variable.units)) #plt.show() plt.savefig("C:\\Users\\u62231\\Desktop\\GravityWork\\maps\\{}".format( netcdf_point_utils.netcdf_dataset.getncattr('title')))
def main(): ''' ''' def quote_delimitedtext(text, delimiter, quote_char='"'): ''' Helper function to quote text containing delimiters or whitespace ''' if delimiter in text or quote_char in text or re.search('\s', text): if delimiter == ',': # Use double quote to escape quote character for CSV return quote_char + text.replace( quote_char, quote_char + quote_char) + quote_char else: # Use backslash to escape quote character for tab or space delimited text return quote_char + text.replace( quote_char, '\\' + quote_char) + quote_char else: return text #=========================================================================== # dataset_keywords = 'point, gravity, point located data, ground digital data, geophysical survey' # grid_variable_name = 'bouguer' # Name of data variable to grid # #grid_bounds = (118.75, -28.5, 119.5, -27.75) # Sandstone, WA # grid_bounds = (141.0, -32.5, 142.0, -31.5) # Broken Hill, NSW (~40k points - takes a while) # #grid_bounds = (136.5, -31.0, 137.5, -30.0) # Roxby Downs, SA (possibly some levelling issues?) # grid_crs_wkt = get_wkt_from_spatial_ref(get_spatial_ref_from_wkt('EPSG:4283')) # Defaults to GDA94 # start_datetime = None # end_datetime = None # filter_variable_name = 'gridflag' # filter_value_list = ['Station used in the production of GA grids.'] # tile_extra=None # Absolute extra per side. Defaults to 5% extra on each side # grid_resolution=0.001 # resampling_method='cubic' #=========================================================================== # Define command line arguments parser = argparse.ArgumentParser() # Required arguments parser.add_argument( "-k", "--keywords", help="comma-separated list of required keywords for search", type=str, required=True) parser.add_argument( "-b", "--bounds", help= 'comma-separated <minx>,<miny>,<maxx>,<maxy> ordinates of bounding box for gridding. N.B: A leading "-" sign on this list should NOT be preceded by a space', type=str, required=True) parser.add_argument( "-t", "--tilesize", help="single value or comma-separated pair of x,y values for tile size", type=str, required=True) parser.add_argument("-d", "--data_variable", help="data variable name", type=str, required=True) parser.add_argument("-r", "--grid_resolution", help='grid resolution', type=float, required=True) parser.add_argument( "-c", "--crs", help= 'coordinate reference system for bounding box coordinates for search. Defaults to "EPSG:4283".', type=str, default='EPSG:4283') parser.add_argument("-f", "--filter_variable", help='name of filter variable. Defaults to "gridflag"', type=str, default='gridflag') parser.add_argument( "-v", "--filter_values", help= 'comma separated list of allowed filter values. Defaults to "Station used in the production of GA grids."', type=str, default='Station used in the production of GA grids.') parser.add_argument("-m", "--resampling_method", help='resampling method. Defaults to "cubic"', type=str) parser.add_argument( "-x", "--tile_extra", help= 'absolute extra per side for tiling. Defaults to 5% extra on each side', type=float, required=False) #parser.add_argument("-s", "--start_date", help="start date for search", type=str) #parser.add_argument("-e", "--end_date", help="end date for search", type=str) parser.add_argument( '-p', '--process', action='store_const', const=True, default=False, help= 'Process tiles. Default is not to process, but just to create point CSV files' ) parser.add_argument( '--debug', action='store_const', const=True, default=False, help='output debug information. Default is no debug info') parser.add_argument("-o", "--output_dir", help='output directory. Defaults to ".".', type=str, default='.') args = parser.parse_args() dataset_keywords = args.keywords grid_variable_name = args.data_variable grid_resolution = args.grid_resolution grid_bounds = [ round(float(ordinate.strip()) / grid_resolution) * grid_resolution for ordinate in args.bounds.split(',') ] tile_size = [ round(float(size.strip()) / grid_resolution) * grid_resolution for size in args.tilesize.split(',') ] if len(tile_size) == 1: # Only one size given tile_size.append(tile_size[0]) # Use same size for X and Y grid_crs_wkt = get_wkt_from_spatial_ref(get_spatial_ref_from_wkt( args.crs)) # Defaults to GDA94 start_datetime = None # args.start_date end_datetime = None # args.end_date filter_variable_name = args.filter_variable filter_value_list = [ value.strip() for value in args.filter_values.split(',') ] tile_extra = args.tile_extra # Absolute extra per side. Defaults to 5% extra on each side resampling_method = os.environ.get('filter_variable_name') or 'cubic' assert os.path.isdir(args.output_dir), 'Invalid output directory' output_dir = os.path.abspath(args.output_dir) # Make subdirectories if required try: os.makedirs(os.path.join(output_dir, 'tiles')) except: pass try: os.makedirs(os.path.join(output_dir, 'point_lists')) except: pass try: os.makedirs(os.path.join(output_dir, 'dataset_lists')) except: pass for ll_point in itertools.product(*[ np.arange(grid_bounds[0 + dim_index], grid_bounds[2 + dim_index], tile_size[dim_index]) for dim_index in range(2) ]): tile_bounds = [ ll_point[0], ll_point[1], ll_point[0] + tile_size[0], ll_point[1] + tile_size[1] ] print(tile_bounds) tile_path = os.path.join( output_dir, 'tiles', '_'.join([grid_variable_name] + [str(ordinate) for ordinate in tile_bounds]) + '.tif') point_list_path = os.path.join( output_dir, 'point_lists', '_'.join([grid_variable_name] + [str(ordinate) for ordinate in tile_bounds]) + '.csv') dataset_list_path = os.path.join( output_dir, 'dataset_lists', '_'.join([grid_variable_name] + [str(ordinate) for ordinate in tile_bounds]) + '.txt') tg = TileGridder( dataset_keywords, grid_variable_name, # Name of data variable to grid tile_bounds, # Grid bounds as [xmin, ymin, xmax, ymax] grid_crs_wkt, # Defaults to GDA94 start_datetime, end_datetime, filter_variable_name, # e.g. 'gridflag' filter_value_list, # e.g. ['Station used in the production of GA grids.'] tile_extra, # Absolute extra per side. Defaults to 5% extra on each side ) if not os.path.isfile( dataset_list_path ): # No dataset list file exists - try to make one try: # pprint(tg.dataset_values) tg.output_dataset_list(dataset_list_path) print('Finished writing dataset list file {}'.format( dataset_list_path)) except Exception as e: print('Unable to create dataset list file {}: {}'.format( dataset_list_path, e)) # Process tiles if required if args.process and os.path.isfile(dataset_list_path): # Skip processing tile if already done if os.path.isfile(tile_path): print('Already created tile file {}'.format(tile_path)) continue if os.path.isfile(dataset_list_path ): # No point file exists - try to make one try: # pprint(tg.dataset_values) tg.read_dataset_list(dataset_list_path) print('Finished read dataset list file {}'.format( dataset_list_path)) except Exception as e: print('Unable to read dataset list file {}: {}'.format( dataset_list_path, e)) continue if not len(tg.dataset_list): print('No datasets to process') continue if not os.path.isfile( point_list_path): # No point file exists - try to make one try: # pprint(tg.dataset_values) tg.output_points(point_list_path) print('Finished writing point file {}'.format( point_list_path)) except Exception as e: print('Unable to create point file {}: {}'.format( point_list_path, e)) continue # Process tile point_coordinates = [] point_values = [] with open(point_list_path, 'r') as csv_file: _header = csv_file.readline() # Read header for line in csv_file.readlines(): line_values = [value.strip() for value in line.split(',')] point_coordinates.append( tuple( float(ordinate) for ordinate in line_values[1:3])) point_values.append(float(line_values[3])) point_coordinates = np.array(point_coordinates) point_values = np.array(point_values) print('Finished reading point file {}'.format(point_list_path)) print(point_coordinates) if not len(point_values): print('No points to grid') continue grid_array, grid_wkt, geotransform = tg.grid_tile( grid_resolution=grid_resolution, coordinates=point_coordinates, values=point_values, resampling_method=resampling_method, point_step=1) print(grid_array.shape, grid_wkt, geotransform) array2file(data_arrays=[grid_array], projection=grid_crs_wkt, geotransform=geotransform, file_path=tile_path, file_format='GTiff')
class TileGridder(object): ''' TileGridder ''' DEFAULT_TILE_EXPANSION_PERCENT = 0.05 DEFAULT_CSW_URL = 'https://ecat.ga.gov.au/geonetwork/srv/eng/csw' GDA94_CRS_WKT = get_wkt_from_spatial_ref( get_spatial_ref_from_wkt('EPSG:4283')) # Defaults to GDA94 DEFAULT_FILTER_VARIABLE_NAME = 'gridflag' DEFAULT_FILTER_VALUE_LIST = ['Station used in the production of GA grids.'] def __init__( self, dataset_keywords, grid_variable_name, # Name of data variable to grid grid_bounds, # Grid bounds as [xmin, ymin, xmax, ymax] grid_crs_wkt=None, # Defaults to GDA94 start_datetime=None, end_datetime=None, filter_variable_name=None, # e.g. 'gridflag' filter_value_list=None, # e.g. ['Station used in the production of GA grids.'] tile_extra=None, # Absolute extra per side. Defaults to 5% extra on each side ): ''' TileGridder Constructor ''' self._dataset_list = None self._dataset_values = None self.dataset_keywords = dataset_keywords self.grid_variable_name = grid_variable_name self.grid_bounds = grid_bounds self.grid_crs_wkt = get_wkt_from_spatial_ref( get_spatial_ref_from_wkt(grid_crs_wkt or TileGridder.GDA94_CRS_WKT)) self.filter_variable_name = filter_variable_name or TileGridder.DEFAULT_FILTER_VARIABLE_NAME self.filter_value_list = filter_value_list or TileGridder.DEFAULT_FILTER_VALUE_LIST self.start_datetime = start_datetime self.end_datetime = end_datetime if tile_extra is None: # Expand bounds by TileGridder.DEFAULT_TILE_EXPANSION_PERCENT percent each side self.expanded_grid_bounds = [ grid_bounds[0] - (grid_bounds[2] - grid_bounds[0]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[1] - (grid_bounds[3] - grid_bounds[1]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[2] + (grid_bounds[2] - grid_bounds[0]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, grid_bounds[3] + (grid_bounds[3] - grid_bounds[1]) * TileGridder.DEFAULT_TILE_EXPANSION_PERCENT, ] else: # Expand bounds by absolute amount self.expanded_grid_bounds = [ grid_bounds[0] - tile_extra, grid_bounds[1] - tile_extra, grid_bounds[2] + tile_extra, grid_bounds[3] + tile_extra, ] self.expanded_gda94_grid_bounds = self.reproject_bounds( self.expanded_grid_bounds, self.grid_crs_wkt, TileGridder.GDA94_CRS_WKT) print('expanded_gda94_grid_bounds = {}'.format( self.expanded_gda94_grid_bounds)) @property def dataset_list(self): ''' list of individual datasets which intersect bounding box ''' if self._dataset_list is None: self._dataset_list = self.get_netcdf_datasets( self.dataset_keywords, bounding_box=self.expanded_gda94_grid_bounds, start_datetime=self.start_datetime, end_datetime=self.end_datetime, csw_url=None, ) return self._dataset_list @property def dataset_values(self): ''' Read and filter points from individual datasets ''' if self._dataset_values is None: self._dataset_values = { dataset: dataset_value_dict for dataset, dataset_value_dict in self.dataset_value_generator( [self.grid_variable_name, self.filter_variable_name], self.dataset_list, self.expanded_gda94_grid_bounds, ) } self.filter_points() return self._dataset_values def filter_points(self): ''' Set filter points from individual datasets e.g. Only use points where gridflag == 'Station used in the production of GA grids.' ''' # Only filter if we have a filter variable and allowed values if not (self.filter_variable_name and self.filter_value_list): return for dataset in sorted(self.dataset_values.keys()): filter_mask = np.zeros( shape=(self.dataset_values[dataset]['coordinates'].shape[0], ), dtype=np.bool) for filter_value in self.filter_value_list: filter_mask = np.logical_or( filter_mask, (self.dataset_values[dataset][self.filter_variable_name] == filter_value)) coordinates = self.dataset_values[dataset]['coordinates'][ filter_mask] if len(coordinates): self.dataset_values[dataset]['coordinates'] = coordinates self.dataset_values[dataset][ self.grid_variable_name] = self.dataset_values[dataset][ self.grid_variable_name][filter_mask] del self.dataset_values[dataset][ self.filter_variable_name] # We don't need this any more else: del self.dataset_values[dataset] # No usable points in dataset def reproject_bounds(self, bounds, from_crs_wkt, to_crs_wkt): ''' Function to return orthogonal bounds reprojected to new CRS ''' if from_crs_wkt == to_crs_wkt: # No change return bounds bounding_box = ((bounds[0], bounds[1]), (bounds[2], bounds[1]), (bounds[2], bounds[3]), (bounds[0], bounds[3])) reprojected_bounding_box = np.array( transform_coords(bounding_box, from_crs_wkt, to_crs_wkt)) reprojected_bounds = (min(reprojected_bounding_box[:, 0]), min(reprojected_bounding_box[:, 1]), max(reprojected_bounding_box[:, 0]), max(reprojected_bounding_box[:, 1])) return reprojected_bounds def get_netcdf_datasets( self, keywords, bounding_box=None, start_datetime=None, end_datetime=None, csw_url=None, ): ''' Find all datasets of interest and return a list of NetCDF file paths or OPeNDAP web service endpoints ''' csw_url = csw_url or TileGridder.DEFAULT_CSW_URL #create a csw_utils object and populate the parameters with search parameters try: cswu = CSWUtils(csw_url) except: cswu = CSWUtils(csw_url, verify=False) print('Querying CSW') record_list = [ record for record in cswu.query_csw( keyword_list=keywords, #anytext_list=allwords, #titleword_list=titlewords, bounding_box=bounding_box, start_datetime=start_datetime, stop_datetime=end_datetime, #max_total_records=2000, get_layers=False, ) ] print('{} matching dataset records found from CSW'.format( len(record_list))) netcdf_list = [ str(distribution['url']) for distribution in cswu.get_netcdf_urls(record_list) ] print('{} NetCDF distributions found'.format(len(netcdf_list))) return netcdf_list def dataset_value_generator(self, variable_name_list, dataset_list, bounding_box, min_points=None, max_points=None): ''' Generator yielding coordinates and values of the specified variable for all points from the supplied dataset list which fall within bounds ''' for dataset in dataset_list: try: try: nc_dataset = Dataset(dataset) except: nc_dataset = Dataset( dataset + '#fillmismatch' ) # Note work-around for bad _FillValue: https://github.com/Unidata/netcdf-c/issues/1299 netcdf_point_utils = NetCDFPointUtils(nc_dataset) #print netcdf_point_utils.__dict__ #print(nc_dataset.variables.keys()) #print('Computing spatial mask') spatial_mask = netcdf_point_utils.get_spatial_mask( bounding_box, self.grid_crs_wkt) point_count = np.count_nonzero(spatial_mask) print('{}/{} points found in expanded bounding box for {}'. format(point_count, netcdf_point_utils.point_count, dataset)) if not point_count: continue # Enforce min/max point counts if min_points and point_count < min_points: print( 'Skipping dataset with < {} points'.format(min_points)) continue if max_points and point_count > max_points: print( 'Skipping dataset with > {} points'.format(max_points)) continue dataset_value_dict = { 'coordinates': transform_coords( netcdf_point_utils.xycoords[spatial_mask], get_wkt_from_spatial_ref( get_spatial_ref_from_wkt(netcdf_point_utils.wkt)), self.grid_crs_wkt) } # Read all variable attributes and values for variable_name in variable_name_list: variable = nc_dataset.variables[variable_name] if ( variable.dimensions[0] != 'point' ): # Variable is NOT of point dimension - must be lookup dataset_value_dict[ variable_name] = netcdf_point_utils.expand_lookup_variable( lookup_variable_name=variable_name, mask=spatial_mask) else: # 'point' is in variable.dimensions - "normal" variable dataset_value_dict[variable_name] = variable[ spatial_mask] yield dataset, dataset_value_dict except Exception as e: print('Unable to read point dataset {}: {}'.format(dataset, e)) def grid_points(self, coordinates, coordinate_wkt, values, grid_wkt, grid_bounds, grid_resolution, resampling_method='linear', point_step=1): ''' Return geotransform CRS WKT, and interpolated grid from supplied coordinates and points ''' # Determine spatial grid bounds rounded out to nearest GRID_RESOLUTION multiple pixel_centre_bounds = ( round((math.floor(grid_bounds[0] / grid_resolution) + 0.5) * grid_resolution, 6), round((math.floor(grid_bounds[1] / grid_resolution) + 0.5) * grid_resolution, 6), round((math.floor(grid_bounds[2] / grid_resolution) - 0.5) * grid_resolution, 6), round((math.floor(grid_bounds[3] / grid_resolution) - 0.5) * grid_resolution, 6)) print("Reprojecting coordinates") grid_coordinates = np.array( transform_coords(coordinates, coordinate_wkt, grid_wkt)) #print('grid_coordinates = {}'.format(grid_coordinates)) # Create grids of Y and X values. Note YX ordering and inverted Y for image # Note GRID_RESOLUTION/2.0 fudge to avoid truncation due to rounding error print("Generating grid coordinates") grid_y, grid_x = np.mgrid[ pixel_centre_bounds[3]:pixel_centre_bounds[1] - grid_resolution / 2.0:-grid_resolution, pixel_centre_bounds[0]:pixel_centre_bounds[2] + grid_resolution / 2.0:grid_resolution] # Skip points to reduce memory requirements print("Generating point subset mask") point_subset_mask = np.zeros(shape=values.shape, dtype=bool) point_subset_mask[0:-1:point_step] = True grid_coordinates = grid_coordinates[point_subset_mask] values = values[point_subset_mask] # Interpolate required values to the grid - Note yx ordering and inverted y for image print("Interpolating {} points".format(grid_coordinates.shape[0])) grid_array = griddata(grid_coordinates[:, ::-1], values, (grid_y, grid_x), method=resampling_method) print("Interpolation complete") # crs:GeoTransform = "109.1002342895272 0.00833333 0 -9.354948067227777 0 -0.00833333 " geotransform = [ pixel_centre_bounds[0] - grid_resolution / 2.0, grid_resolution, 0, pixel_centre_bounds[3] + grid_resolution / 2.0, 0, -grid_resolution ] return grid_array, grid_wkt, geotransform def grid_tile(self, grid_resolution, coordinates=None, values=None, resampling_method='linear', point_step=1): if coordinates is None: coordinates = np.concatenate([ self.dataset_values[dataset]['coordinates'] for dataset in sorted(self.dataset_values.keys()) ]) if values is None: values = np.concatenate([ self.dataset_values[dataset][self.grid_variable_name] for dataset in sorted(self.dataset_values.keys()) ]) return self.grid_points(coordinates=coordinates, coordinate_wkt=self.grid_crs_wkt, values=values, grid_wkt=self.grid_crs_wkt, grid_bounds=self.grid_bounds, grid_resolution=grid_resolution, resampling_method=resampling_method, point_step=point_step) def output_points(self, point_list_path): ''' Write CSV containing all points to point_list_path ''' with open(point_list_path, 'w') as output_file: output_file.write( ', '.join(['Dataset', 'X', 'Y', self.grid_variable_name]) + '\n') for dataset in sorted(self.dataset_values.keys()): #for array_name in sorted(self.dataset_values[dataset].keys()): arrays = self.dataset_values[dataset] for point_index in range(len(arrays['coordinates'])): output_file.write(','.join([ os.path.basename(dataset), ','.join([ str(ordinate) for ordinate in arrays['coordinates'][point_index] ]), str(arrays[self.grid_variable_name][point_index]) ]) + '\n') def output_dataset_list(self, dataset_list_path): ''' Write a text file containing all dataset paths or URLs to dataset_list_path ''' with open(dataset_list_path, 'w') as output_file: for dataset in sorted(self.dataset_list): output_file.write(dataset + '\n') def read_dataset_list(self, dataset_list_path): ''' Write a text file containing all dataset paths or URLs to dataset_list_path ''' with open(dataset_list_path, 'r') as input_file: self._dataset_list = list( [dataset.strip() for dataset in input_file.readlines()])
def dataset_value_generator(self, variable_name_list, dataset_list, bounding_box, min_points=None, max_points=None): ''' Generator yielding coordinates and values of the specified variable for all points from the supplied dataset list which fall within bounds ''' for dataset in dataset_list: try: try: nc_dataset = Dataset(dataset) except: nc_dataset = Dataset( dataset + '#fillmismatch' ) # Note work-around for bad _FillValue: https://github.com/Unidata/netcdf-c/issues/1299 netcdf_point_utils = NetCDFPointUtils(nc_dataset) #print netcdf_point_utils.__dict__ #print(nc_dataset.variables.keys()) #print('Computing spatial mask') spatial_mask = netcdf_point_utils.get_spatial_mask( bounding_box, self.grid_crs_wkt) point_count = np.count_nonzero(spatial_mask) print('{}/{} points found in expanded bounding box for {}'. format(point_count, netcdf_point_utils.point_count, dataset)) if not point_count: continue # Enforce min/max point counts if min_points and point_count < min_points: print( 'Skipping dataset with < {} points'.format(min_points)) continue if max_points and point_count > max_points: print( 'Skipping dataset with > {} points'.format(max_points)) continue dataset_value_dict = { 'coordinates': transform_coords( netcdf_point_utils.xycoords[spatial_mask], get_wkt_from_spatial_ref( get_spatial_ref_from_wkt(netcdf_point_utils.wkt)), self.grid_crs_wkt) } # Read all variable attributes and values for variable_name in variable_name_list: variable = nc_dataset.variables[variable_name] if ( variable.dimensions[0] != 'point' ): # Variable is NOT of point dimension - must be lookup dataset_value_dict[ variable_name] = netcdf_point_utils.expand_lookup_variable( lookup_variable_name=variable_name, mask=spatial_mask) else: # 'point' is in variable.dimensions - "normal" variable dataset_value_dict[variable_name] = variable[ spatial_mask] yield dataset, dataset_value_dict except Exception as e: print('Unable to read point dataset {}: {}'.format(dataset, e))
def build_crs_variable(self, crs, grid_dimensions=None): ''' Concrete method to build "crs" or "transverse_mercator" NetCDFVariable instance from well known text N.B: Need to create dimensions and dimension variables first if grid_dimensions is specified @param crs: Either osgeo.osr.SpatialReference or WKT string defining Coordinate Reference System @grid_dimensions: list of two dimension names for spatial grid, or None for ungridded data @grid_resolutions: list of two floats defining resolutions for spatial grid, or None for ungridded data ''' def get_geotransform(grid_dimensions): ''' Helper function to return geotransform for gridded data. Assumes yx array ordering for grid N.B: This will fail if dimensions and dimension variables don't exist ''' assert len( grid_dimensions) == 2, 'grid_dimensions must be of length 2' assert set(grid_dimensions) <= set( self.nc_output_dataset.dimensions.keys( )), 'Invalid grid_dimensions' assert set(grid_dimensions) <= set( self.nc_output_dataset.variables.keys( )), 'Dimension index variables not created' cell_sizes = [ (self.nc_output_dataset.variables[grid_dimensions[dim_index]] [-1] - self.nc_output_dataset.variables[ grid_dimensions[dim_index]][0]) / self.nc_output_dataset.dimensions[ grid_dimensions[dim_index]].size for dim_index in range(2) ] return [ self.nc_output_dataset.variables[grid_dimensions[1]][0] - cell_sizes[1] / 2, # x_min cell_sizes[1], # x_size 0.0, self.nc_output_dataset.variables[grid_dimensions[0]][0] - cell_sizes[1] / 2, # y_min 0.0, cell_sizes[0], # y_size ] # Determine wkt and spatial_ref from crs as required if type(crs) == osgeo.osr.SpatialReference: spatial_ref = crs elif type(crs) == str: spatial_ref = get_spatial_ref_from_wkt(crs) wkt = spatial_ref.ExportToWkt( ) # Export WKT from spatial_ref for consistency even if supplied crs_attributes = {'spatial_ref': wkt} crs_attributes['inverse_flattening'] = spatial_ref.GetInvFlattening() crs_attributes['semi_major_axis'] = spatial_ref.GetSemiMajor() crs_attributes[ 'longitude_of_prime_meridian'] = spatial_ref.GetAttrValue( 'PRIMEM', 1) if spatial_ref.GetUTMZone(): # CRS is UTM crs_variable_name = 'transverse_mercator' crs_attributes['grid_mapping_name'] = 'transverse_mercator' crs_attributes[ 'latitude_of_projection_origin'] = spatial_ref.GetProjParm( 'latitude_of_origin') crs_attributes[ 'scale_factor_at_central_meridian'] = spatial_ref.GetProjParm( 'scale_factor') crs_attributes[ 'longitude_of_central_meridian'] = spatial_ref.GetProjParm( 'central_meridian') crs_attributes['false_northing'] = spatial_ref.GetProjParm( 'false_northing') crs_attributes['false_easting'] = spatial_ref.GetProjParm( 'false_easting') else: #=================================================================== # # Example crs attributes created by GDAL: # # crs:inverse_flattening = 298.257222101 ; # crs:spatial_ref = "GEOGCS[\"GEOCENTRIC DATUM of AUSTRALIA\",DATUM[\"GDA94\",SPHEROID[\"GRS80\",6378137,298.257222101]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433]]" ; # crs:semi_major_axis = 6378137. ; # crs:GeoTransform = "121.1202390605822 0.0037 0 -20.56227098919639 0 -0.0037 " ; # crs:grid_mapping_name = "latitude_longitude" ; # crs:longitude_of_prime_meridian = 0. ; #=================================================================== crs_variable_name = 'crs' crs_attributes['grid_mapping_name'] = 'latitude_longitude' # Set GeoTransform only for regular gridded data if grid_dimensions: crs_attributes['GeoTransform'] = ' '.join( [str(value) for value in get_geotransform(grid_dimensions)]) logger.debug('crs_attributes: {}'.format(pformat(crs_attributes))) return NetCDFVariable( short_name=crs_variable_name, data=0, dimensions=[], # Scalar fill_value=None, chunk_size=0, attributes=crs_attributes, dtype='int8' # Byte datatype )