def instance(cls, service_name, service_data, provider, uri, is_remote): parameters = util.listify( service_data['metadata'].pop('parameters')) if len(parameters) > 1: cls.params()['parameter'].objects = sorted(parameters) else: cls.params()['parameter'] = param.String(default=parameters[0], doc="""parameter""", constant=True) self = UserServiceBase(name=service_name, provider=provider) self.service_name = service_name self.uri = uri self.is_remote = is_remote self._parameter_map = {p: p for p in parameters} for k, v in service_data['metadata'].items(): setattr(self, k, v) self.service_folder = util.listify(service_data['service_folder']) if len(self.service_folder) > 1: raise ValueError() # Now only supporting one service folder else: self.service_folder = self.service_folder[0] self.catalog_file = service_data['features']['file'] self.catalog_file_format = service_data['features']['format'] self.datasets_mapping = service_data['datasets']['mapping'] self.datasets_save_folder = service_data['datasets']['save_folder'] self.datasets_metadata = service_data['datasets'].get( 'metadata', None) return self
def download(self, catalog_id, file_path, dataset, **kwargs): if self.datasets_mapping is not None: fnames = self.datasets_mapping if isinstance(self.datasets_mapping, dict): fnames = self.dataset_mapping[self.parameter] fnames = [ f.replace('<feature>', catalog_id) for f in util.listify(fnames) ] else: fnames = self.catalog_entries.loc[catalog_id][ '_download_url'] # TODO where does self.catalog_entries get initialized? final_path = [] for src, file_name in zip(self._get_paths(fnames), fnames): dst = file_path if self.datasets_save_folder is not None: dst = os.path.join(dst, self.datasets_save_folder, self.service_folder) dst = os.path.join(dst, file_name) base, _ = os.path.split(dst) os.makedirs(base, exist_ok=True) final_path.append(dst) if self.is_remote: with warnings.catch_warnings(): warnings.filterwarnings( "ignore", category=requests.packages.urllib3.exceptions. InsecureRequestWarning) r = requests.get(src, verify=False) if r.status_code == 200: # only download if file exists chunk_size = 64 * 1024 with open(dst, 'wb') as f: for content in r.iter_content(chunk_size): f.write(content) else: if os.path.exists(src): shutil.copyfile(src, dst) # only copy if file exists # TODO need to deal with parameters if multiple params exist if len(final_path) == 1: final_path = final_path[0] else: final_path = ','.join(final_path) metadata = { 'file_path': final_path, 'file_format': self.file_format, 'datatype': self.datatype, 'parameter': self.parameters['parameters'][0], } if self.datasets_metadata is not None: metadata.update(self.datasets_metadata) return metadata
def _get_paths(self, filenames): folder = self.service_folder paths = list() for filename in util.listify(filenames): if self.uri.startswith('http'): paths.append( self.uri.rstrip('/') + '/{}/{}'.format(folder, filename)) else: paths.append(os.path.join(self.uri, folder, filename)) return paths
def _combine_dicts(self, this, other): """Helper function for `get_tags` to combine dictionaries by aggregating values rather than overwriting them. """ for k, other_v in other.items(): other_v = util.listify(other_v) if k in this: this_v = this[k] if isinstance(this_v, list): other_v.extend(this_v) else: other_v.append(this_v) this[k] = other_v
def points_to_shp(points, shp_file=None): """Take a list of coordinates or Shapely Point objects and write them to a ShapeFile. """ points = listify(points) test_point = points[0] if isinstance(test_point, Point): pts = points elif isinstance(test_point, list) or isinstance(test_point, tuple): pts = [Point(*xy) for xy in points] elif isinstance(test_point, float) or isinstance(test_point, int): pts = [Point(points)] shp_file = shp_file or os.path.join(whitebox_temp_dir, '{}_{}.{}'.format('point', time.time(), 'shp')) gdf = gpd.GeoDataFrame(geometry=pts) gdf.to_file(shp_file) return shp_file
def download(self, catalog_id, file_path, dataset, **kwargs): p = param.ParamOverrides(self, kwargs) bbox = listify(p.bbox) tile_indices = self._get_indices_from_bbox(*bbox, zoom_level=p.zoom_level) pixel_indices = self._get_indices_from_bbox(*bbox, zoom_level=p.zoom_level, as_pixels=True) tile_bbox = self._get_bbox_from_indices(*tile_indices, zoom_level=p.zoom_level) pixel_bbox = self._get_bbox_from_indices(*pixel_indices, zoom_level=p.zoom_level, from_pixels=True) if p.crop_to_bbox: upper_left_corner = tile_bbox[0], tile_bbox[3] crop_bbox = self._get_crop_bbox(pixel_indices, *upper_left_corner, zoom_level=p.zoom_level) adjusted_bbox = pixel_bbox else: crop_bbox = None adjusted_bbox = tile_bbox image_array = self._download_and_stitch_tiles(p.url, tile_indices, crop_bbox, p.zoom_level, p.max_tiles) file_path = os.path.join(file_path, dataset + '.tiff') self._write_image_to_tif(image_array, adjusted_bbox, file_path) metadata = { 'metadata': { 'bbox': adjusted_bbox }, 'file_path': file_path, 'file_format': 'raster-gdal', 'datatype': 'image', } return metadata
def publish(self, **kwargs): p = param.ParamOverrides(self, kwargs) valid_file_paths = [] valid_extensions = [] if p.resource_type == "": raise ValueError("There was no resource type selected.") else: resource_type = self._resource_type_map[p.resource_type] datasets = listify(p.datasets) extension_dict = { 'GeographicFeatureResource': [ '.zip', '.shp', '.shx', '.dbf', '.prj', '.sbx', '.sbn', '.cpg', '.xml', '.fbn', '.fbx', '.ain', '.alh', '.atx', '.ixs', '.mxs' ], 'RasterResource': ['.zip', '.tif'], 'NetcdfResource': ['.nc'], 'ScriptResource': ['.r', '.py', '.m'], 'TimeSeriesResource': ['.sqlite', '.csv'] } if resource_type in [ 'GeographicFeatureResource', 'RasterResource', 'NetcdfResource', 'ScriptResource', 'TimeSeriesResource' ]: valid_extensions = extension_dict[resource_type] if len(datasets) > 1 and resource_type in [ 'TimeSeriesResource', 'RasterResource' ]: raise ValueError( "The selected resource cannot have more than one dataset.") if len(datasets) == 0: raise ValueError("There was no dataset selected.") for dataset in datasets: dataset_metadata = get_metadata(dataset)[dataset] fpath = dataset_metadata['file_path'] filename, file_extension = os.path.splitext(fpath) if len(valid_extensions) != 0: if file_extension in valid_extensions: valid_file_paths.append(fpath) else: raise ValueError( "There was a problem with one of the dataset file extentions for your resource." ) else: valid_file_paths.append(fpath) resource_id = self.create_resource(resource_type=resource_type, title=p.title, file_path=valid_file_paths[0], keywords=p.keywords, abstract=p.abstract) for path in valid_file_paths[1:]: self.add_file_to_resource(resource_id, path) return resource_id
def search_catalog(self, **kwargs): fmt = self.catalog_file_format paths = self._get_paths(self.catalog_file) all_catalog_entries = [] for p in util.listify(paths): with uri_open(p, self.is_remote) as f: if fmt.lower() == 'geojson': catalog_entries = geojson.load(f) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'mbr': # TODO creating FeatureCollection not needed anymore # this can be rewritten as directly creating a pandas dataframe polys = [] # skip first line which is a bunding polygon f.readline() for line in f: catalog_id, x1, y1, x2, y2 = line.split() properties = {} polys.append( Feature(geometry=util.bbox2poly( x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) catalog_entries = FeatureCollection(polys) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'mbr-csv': # TODO merge this with the above, # mbr format from datalibrary not exactly the same as # mbr fromat in quest-demo-data polys = [] for line in f: catalog_id, y1, x1, y2, x2 = line.split(',') catalog_id = catalog_id.split('.')[0] properties = {} polys.append( Feature(geometry=util.bbox2poly( x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) catalog_entries = FeatureCollection(polys) catalog_entries = util.to_geodataframe(catalog_entries) if fmt.lower() == 'isep-json': # uses exported json file from ISEP DataBase # assuming ISEP if a geotypical service for now. catalog_entries = pd.read_json(p) catalog_entries.rename(columns={'_id': 'service_id'}, inplace=True) catalog_entries['download_url'] = catalog_entries[ 'files'].apply(lambda x: os.path.join( x[0].get('file location'), x[0].get('file name' ))) # remove leading slash from file path catalog_entries['download_url'] = catalog_entries[ 'download_url'].str.lstrip('/') catalog_entries['parameters'] = 'met' all_catalog_entries.append(catalog_entries) # drop duplicates fails when some columns have nested list/tuples like # _geom_coords. so drop based on _service_id catalog_entries = pd.concat(all_catalog_entries) catalog_entries = catalog_entries.drop_duplicates( subset='service_id') catalog_entries.index = catalog_entries['service_id'] catalog_entries.sort_index(inplace=True) return catalog_entries