def _bbox2poly(bbox): xmin = bbox['boundingBox']['minX'] xmax = bbox['boundingBox']['maxX'] ymin = bbox['boundingBox']['minY'] ymax = bbox['boundingBox']['maxY'] return util.bbox2poly(xmin, ymin, xmax, ymax, as_shapely=True)
def _run_tool(self): dataset = self.dataset # get metadata, path etc from first dataset, i.e. assume all datasets # are in same folder. This will break if you try and combine datasets # from different providers orig_metadata = get_metadata(dataset)[dataset] src_path = orig_metadata['file_path'] if self.new_crs is None: raise ValueError( "A new coordinated reference system MUST be provided") dst_crs = self.new_crs new_metadata = { 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], } new_dset, file_path, catalog_entry = self._create_new_dataset( old_dataset=dataset, ext='.tif', dataset_metadata=new_metadata, ) # run filter with rasterio.open(src_path) as src: # write out tif file subprocess.check_output([ 'gdalwarp', src_path, file_path, '-s_srs', src.crs.to_string(), '-t_srs', dst_crs ]) with rasterio.open(file_path) as f: geometry = util.bbox2poly(f.bounds.left, f.bounds.bottom, f.bounds.right, f.bounds.top, as_shapely=True) update_metadata(catalog_entry, quest_metadata={'geometry': geometry.to_wkt()}) return {'datasets': new_dset, 'catalog_entries': catalog_entry}
def _run_tool(self): # if len(datasets) < 2: # raise ValueError('There must be at LEAST two datasets for this filter') datasets = self.datasets orig_metadata = get_metadata(datasets[0])[datasets[0]] raster_files = [get_metadata(dataset)[dataset]['file_path'] for dataset in datasets] for dataset in datasets: if get_metadata(dataset)[dataset]['parameter'] != orig_metadata['parameter']: raise ValueError('Parameters must match for all datasets') if get_metadata(dataset)[dataset]['unit'] != orig_metadata['unit']: raise ValueError('Units must match for all datasets') new_metadata = { 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'unit': orig_metadata['unit'], } new_dset, file_path, catalog_entry = self._create_new_dataset( old_dataset=datasets[0], ext='.tif', dataset_metadata=new_metadata, ) open_datasets = [rasterio.open(d) for d in raster_files] profile = open_datasets[0].profile # hack to avoid nodata out of range of dtype error for NED datasets profile['nodata'] = -32768.0 if profile['nodata'] == -3.4028234663853e+38 else profile['nodata'] new_data, transform = rasterio.merge.merge(open_datasets, nodata=profile['nodata']) for d in open_datasets: d.close() profile.pop('tiled', None) profile.update( height=new_data.shape[1], width=new_data.shape[2], transform=transform, driver='GTiff' ) with rasterio.open(file_path, 'w', **profile) as output: output.write(new_data.astype(profile['dtype'])) bbox = self.bbox if bbox is not None: bbox = box(*bbox) geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(4326)) geo = geo.to_crs(crs=profile['crs']) bbox = geo.geometry with rasterio.open(file_path, 'r') as merged: new_data, transform = rasterio.mask.mask(dataset=merged, shapes=bbox, all_touched=True, crop=True) # profile.pop('tiled', None) profile.update( height=new_data.shape[1], width=new_data.shape[2], transform=transform, ) with rasterio.open(file_path, 'w', **profile) as clipped: clipped.write(new_data) with rasterio.open(file_path) as f: geometry = util.bbox2poly(f.bounds.left, f.bounds.bottom, f.bounds.right, f.bounds.top, as_shapely=True) update_metadata(catalog_entry, quest_metadata={'geometry': geometry.to_wkt()}) return {'datasets': new_dset, 'catalog_entries': catalog_entry}
def search_catalog(self, **kwargs): fmt = self.catalog_file_format paths = self._get_paths(self.catalog_file) all_catalog_entries = [] for p in util.listify(paths): with uri_open(p, self.is_remote) as f: if fmt.lower() == 'geojson': catalog_entries = geojson.load(f) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'mbr': # TODO creating FeatureCollection not needed anymore # this can be rewritten as directly creating a pandas dataframe polys = [] # skip first line which is a bunding polygon f.readline() for line in f: catalog_id, x1, y1, x2, y2 = line.split() properties = {} polys.append( Feature(geometry=util.bbox2poly( x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) catalog_entries = FeatureCollection(polys) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'mbr-csv': # TODO merge this with the above, # mbr format from datalibrary not exactly the same as # mbr fromat in quest-demo-data polys = [] for line in f: catalog_id, y1, x1, y2, x2 = line.split(',') catalog_id = catalog_id.split('.')[0] properties = {} polys.append( Feature(geometry=util.bbox2poly( x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) catalog_entries = FeatureCollection(polys) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'isep-json': # uses exported json file from ISEP DataBase # assuming ISEP if a geotypical service for now. catalog_entries = pd.read_json(p) catalog_entries.rename(columns={'_id': 'service_id'}, inplace=True) catalog_entries['download_url'] = catalog_entries[ 'files'].apply(lambda x: os.path.join( x[0].get('file location'), x[0].get('file name' ))) # remove leading slash from file path catalog_entries['download_url'] = catalog_entries[ 'download_url'].str.lstrip('/') catalog_entries['parameters'] = 'met' all_catalog_entries.append(catalog_entries) # drop duplicates fails when some columns have nested list/tuples like # _geom_coords. so drop based on _service_id catalog_entries = pd.concat(all_catalog_entries) catalog_entries = catalog_entries.drop_duplicates( subset='service_id') catalog_entries.index = catalog_entries['service_id'] catalog_entries.sort_index(inplace=True) return catalog_entries