Ejemplo n.º 1
0
def _bbox2poly(bbox):
    xmin = bbox['boundingBox']['minX']
    xmax = bbox['boundingBox']['maxX']
    ymin = bbox['boundingBox']['minY']
    ymax = bbox['boundingBox']['maxY']

    return util.bbox2poly(xmin, ymin, xmax, ymax, as_shapely=True)
Ejemplo n.º 2
0
    def _run_tool(self):

        dataset = self.dataset

        # get metadata, path etc from first dataset, i.e. assume all datasets
        # are in same folder. This will break if you try and combine datasets
        # from different providers

        orig_metadata = get_metadata(dataset)[dataset]
        src_path = orig_metadata['file_path']

        if self.new_crs is None:
            raise ValueError(
                "A new coordinated reference system MUST be provided")

        dst_crs = self.new_crs

        new_metadata = {
            'parameter': orig_metadata['parameter'],
            'datatype': orig_metadata['datatype'],
            'file_format': orig_metadata['file_format'],
        }

        new_dset, file_path, catalog_entry = self._create_new_dataset(
            old_dataset=dataset,
            ext='.tif',
            dataset_metadata=new_metadata,
        )

        # run filter
        with rasterio.open(src_path) as src:
            # write out tif file
            subprocess.check_output([
                'gdalwarp', src_path, file_path, '-s_srs',
                src.crs.to_string(), '-t_srs', dst_crs
            ])

        with rasterio.open(file_path) as f:
            geometry = util.bbox2poly(f.bounds.left,
                                      f.bounds.bottom,
                                      f.bounds.right,
                                      f.bounds.top,
                                      as_shapely=True)
        update_metadata(catalog_entry,
                        quest_metadata={'geometry': geometry.to_wkt()})

        return {'datasets': new_dset, 'catalog_entries': catalog_entry}
Ejemplo n.º 3
0
    def _run_tool(self):

        # if len(datasets) < 2:
        #     raise ValueError('There must be at LEAST two datasets for this filter')

        datasets = self.datasets

        orig_metadata = get_metadata(datasets[0])[datasets[0]]
        raster_files = [get_metadata(dataset)[dataset]['file_path'] for dataset in datasets]

        for dataset in datasets:
            if get_metadata(dataset)[dataset]['parameter'] != orig_metadata['parameter']:
                raise ValueError('Parameters must match for all datasets')
            if get_metadata(dataset)[dataset]['unit'] != orig_metadata['unit']:
                raise ValueError('Units must match for all datasets')

        new_metadata = {
            'parameter': orig_metadata['parameter'],
            'datatype': orig_metadata['datatype'],
            'file_format': orig_metadata['file_format'],
            'unit': orig_metadata['unit'],
        }

        new_dset, file_path, catalog_entry = self._create_new_dataset(
            old_dataset=datasets[0],
            ext='.tif',
            dataset_metadata=new_metadata,
        )

        open_datasets = [rasterio.open(d) for d in raster_files]
        profile = open_datasets[0].profile
        # hack to avoid nodata out of range of dtype error for NED datasets
        profile['nodata'] = -32768.0 if profile['nodata'] == -3.4028234663853e+38 else profile['nodata']
        new_data, transform = rasterio.merge.merge(open_datasets, nodata=profile['nodata'])
        for d in open_datasets:
            d.close()
        profile.pop('tiled', None)
        profile.update(
            height=new_data.shape[1],
            width=new_data.shape[2],
            transform=transform,
            driver='GTiff'
        )
        with rasterio.open(file_path, 'w', **profile) as output:
            output.write(new_data.astype(profile['dtype']))

        bbox = self.bbox

        if bbox is not None:
            bbox = box(*bbox)
            geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(4326))
            geo = geo.to_crs(crs=profile['crs'])
            bbox = geo.geometry

            with rasterio.open(file_path, 'r') as merged:
                new_data, transform = rasterio.mask.mask(dataset=merged, shapes=bbox, all_touched=True, crop=True)

            # profile.pop('tiled', None)
            profile.update(
                height=new_data.shape[1],
                width=new_data.shape[2],
                transform=transform,
            )
            with rasterio.open(file_path, 'w', **profile) as clipped:
                clipped.write(new_data)

        with rasterio.open(file_path) as f:
            geometry = util.bbox2poly(f.bounds.left, f.bounds.bottom, f.bounds.right, f.bounds.top, as_shapely=True)
        update_metadata(catalog_entry, quest_metadata={'geometry': geometry.to_wkt()})

        return {'datasets': new_dset, 'catalog_entries': catalog_entry}
Ejemplo n.º 4
0
        def search_catalog(self, **kwargs):
            fmt = self.catalog_file_format
            paths = self._get_paths(self.catalog_file)

            all_catalog_entries = []

            for p in util.listify(paths):
                with uri_open(p, self.is_remote) as f:
                    if fmt.lower() == 'geojson':
                        catalog_entries = geojson.load(f)
                        catalog_entries = util.to_geodataframe(catalog_entries)

                    if fmt.lower() == 'mbr':
                        # TODO creating FeatureCollection not needed anymore
                        # this can be rewritten as directly creating a pandas dataframe
                        polys = []
                        # skip first line which is a bunding polygon
                        f.readline()
                        for line in f:
                            catalog_id, x1, y1, x2, y2 = line.split()
                            properties = {}
                            polys.append(
                                Feature(geometry=util.bbox2poly(
                                    x1, y1, x2, y2, as_geojson=True),
                                        properties=properties,
                                        id=catalog_id))
                        catalog_entries = FeatureCollection(polys)
                        catalog_entries = util.to_geodataframe(catalog_entries)

                    if fmt.lower() == 'mbr-csv':
                        # TODO merge this with the above,
                        # mbr format from datalibrary not exactly the same as
                        # mbr fromat in quest-demo-data
                        polys = []
                        for line in f:
                            catalog_id, y1, x1, y2, x2 = line.split(',')
                            catalog_id = catalog_id.split('.')[0]
                            properties = {}
                            polys.append(
                                Feature(geometry=util.bbox2poly(
                                    x1, y1, x2, y2, as_geojson=True),
                                        properties=properties,
                                        id=catalog_id))
                        catalog_entries = FeatureCollection(polys)
                        catalog_entries = util.to_geodataframe(catalog_entries)

                    if fmt.lower() == 'isep-json':
                        # uses exported json file from ISEP DataBase
                        # assuming ISEP if a geotypical service for now.
                        catalog_entries = pd.read_json(p)
                        catalog_entries.rename(columns={'_id': 'service_id'},
                                               inplace=True)
                        catalog_entries['download_url'] = catalog_entries[
                            'files'].apply(lambda x: os.path.join(
                                x[0].get('file location'), x[0].get('file name'
                                                                    )))
                        # remove leading slash from file path
                        catalog_entries['download_url'] = catalog_entries[
                            'download_url'].str.lstrip('/')
                        catalog_entries['parameters'] = 'met'

                all_catalog_entries.append(catalog_entries)

            # drop duplicates fails when some columns have nested list/tuples like
            # _geom_coords. so drop based on _service_id
            catalog_entries = pd.concat(all_catalog_entries)
            catalog_entries = catalog_entries.drop_duplicates(
                subset='service_id')
            catalog_entries.index = catalog_entries['service_id']
            catalog_entries.sort_index(inplace=True)
            return catalog_entries