def oisst_avhrr_sst() -> SampleStructuredXY: """ Load NOAA/NCEI OISST AVHRR rectilinear mesh. Returns ------- SampleStructuredXY The curvilinear spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "oisst-avhrr.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat grid lons = ds.variables["lon_bnds"][:] lats = ds.variables["lat_bnds"][:] # load the mesh payload data = ds.variables["sst"] name = capitalise(data.long_name) units = data.units sample = SampleStructuredXY(lons, lats, data=data[0, 0], name=name, units=units) return sample
def fetch_coastlines(resolution: Optional[str] = None) -> pv.PolyData: """ Get the Natural Earth coastlines for the required resolution. If the resource is not already available in the GeoVista :data:`CACHE`, then it will be downloaded from the :data:`BASE_URL`. Parameters ---------- resolution : str, optional The resolution of the Natural Earth coastlines, which may be either ``110m``, ``50m`` or ``10m``. Default is :data:`DEFAULT_RESOLUTION_COASTLINES`. Returns ------- PolyData The coastlines mesh. Notes ----- .. versionadded:: 0.1.0 """ if resolution is None: resolution = DEFAULT_RESOLUTION_COASTLINES fname = f"ne_coastlines_{resolution}.vtk" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"natural_earth/physical/{fname}.bz2", processor=processor) mesh = pv.read(resource) return mesh
def fetch_topography_earth(): """ Fetch a global grid of Earth relief (topography and bathymetry). The grid is based on the ETOPO1 model [AmanteEakins2009]_. The original model has 1 arc-minute grid spacing but here we downsampled to 0.5 degree grid spacing to save space and download times. The downsampled grid was generated from a spherical harmonic model using the `ICGEM Calculation Service <http://icgem.gfz-potsdam.de/>`__. See the ``attrs`` attribute of the returned :class:`xarray.Dataset` for information regarding the grid generation. ETOPO1 heights are referenced to "sea level". If the file isn't already in your data directory, it will be downloaded automatically. Returns ------- grid : :class:`xarray.Dataset` The topography grid (in meters) relative to sea level. Coordinates are geodetic latitude and longitude. """ fname = REGISTRY.fetch("etopo1-0.5deg.nc.xz", processor=pooch.Decompress()) data = xr.open_dataset(fname, engine="scipy") # Capture attributes dict because it's removed after converting the data to # float64 attrs = data.attrs.copy() # The data are stored as int16 to save disk space. Cast them to floats to # avoid integer division problems when processing. data = data.astype("float64") data.attrs = attrs return data
def fetch_gravity_earth(): """ Fetch a global grid of Earth gravity. Gravity is the magnitude of the gravity vector of the Earth (gravitational + centrifugal). The gravity observations are at 10 km (geometric) height and on a regular grid with 0.5 degree spacing. The grid was generated from the spherical harmonic model EIGEN-6C4 [Forste_etal2014]_ using the `ICGEM Calculation Service <http://icgem.gfz-potsdam.de/>`__. See the ``attrs`` attribute of the :class:`xarray.Dataset` for information regarding the grid generation. If the file isn't already in your data directory, it will be downloaded automatically. Returns ------- grid : :class:`xarray.Dataset` The gravity grid (in mGal). Includes a computation (geometric) height grid (``height_over_ell``). Coordinates are geodetic latitude and longitude. """ fname = REGISTRY.fetch("gravity-earth-0.5deg.nc.xz", processor=pooch.Decompress()) data = xr.open_dataset(fname, engine="scipy") # Capture attributes dict because it's removed after converting the data to # float64 attrs = data.attrs.copy() # The data are stored as ints and data as float32 to save space on the # data file. Cast them to float64 to avoid integer division errors. data = data.astype("float64") data.attrs = attrs return data
def fetch_geoid_earth(): """ Fetch a global grid of the geoid height. The geoid height is the height of the geoid above (positive) or below (negative) the ellipsoid (WGS84). The data are on a regular grid with 0.5 degree spacing, which was generated from the spherical harmonic model EIGEN-6C4 [Forste_etal2014]_ using the `ICGEM Calculation Service <http://icgem.gfz-potsdam.de/>`__. See the ``attrs`` attribute of the :class:`xarray.Dataset` for information regarding the grid generation. If the file isn't already in your data directory, it will be downloaded automatically. Returns ------- grid : :class:`xarray.Dataset` The geoid grid (in meters). Coordinates are geodetic latitude and longitude. """ fname = REGISTRY.fetch("geoid-earth-0.5deg.nc.xz", processor=pooch.Decompress()) data = xr.open_dataset(fname, engine="scipy") # Capture attributes dict because it's removed after converting the data to # float64 attrs = data.attrs.copy() # The data are stored as ints and data as float32 to save space on the # data file. Cast them to float64 to avoid integer division errors. data = data.astype("float64") data.attrs = attrs return data
def ww3_global_smc(step: Optional[int] = None) -> SampleUnstructuredXY: """ Load the WAVEWATCH III (WW3) unstructured Spherical Multi-Cell (SMC) mesh. Parameters ---------- step : int Timeseries index offset. Returns ------- SampleUnstructuredXY The unstructured spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "ww3_gbl_smc_hs.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/ww3/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat grid cell centres cc_lons = ds.variables["longitude"][:] cc_lats = ds.variables["latitude"][:] # load integer scaling factor for the grid cells cx = ds.variables["cx"][:] cy = ds.variables["cy"][:] base_lon_size = ds.getncattr("base_lon_size") base_lat_size = ds.getncattr("base_lat_size") # construct the grid cells dlon = cx * base_lon_size dlat = cy * base_lat_size fac = 0.5 x1 = (cc_lons - fac * dlon).reshape(-1, 1) x2 = (cc_lons + fac * dlon).reshape(-1, 1) y1 = (cc_lats - fac * dlat).reshape(-1, 1) y2 = (cc_lats + fac * dlat).reshape(-1, 1) lons = np.hstack([x1, x2, x2, x1]) lats = np.hstack([y1, y1, y2, y2]) # deal with the timeseries step steps = ds.dimensions["time"].size idx = 0 if step is None else (step % steps) # load mesh payload data = ds.variables["hs"] name = capitalise(data.standard_name) units = data.units sample = SampleUnstructuredXY( lons, lats, lons.shape, data=data[idx], name=name, units=units, steps=steps ) return sample
def fetch_mosaic_of_antarctica(): r"""Fetch the MODIS optical image mosaic of Antarctica""" return nsidc_data.fetch( "moa750_2009_hp1_v02.0.tif.gz", downloader=_earthdata_downloader, processor=pooch.Decompress(), )
def lfric(resolution: Optional[str] = None) -> pv.PolyData: """ Get the LFRic Model unstructured cubed-sphere at the specified resolution. If the resource is not already available in the GeoVista :data:`CACHE`, then it will be downloaded from the :data:`BASE_URL`. Parameters ---------- resolution : str, optional The resolution of the LFRic Model mesh. Defaults to :data:`DEFAULT_RESOLUTION_LFRIC`. Returns ------- PolyData The LFRic mesh. Notes ----- .. versionadded:: 0.1.0 """ if resolution is None: resolution = DEFAULT_RESOLUTION_LFRIC fname = f"lfric_{resolution}.vtk" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"mesh/{fname}.bz2", processor=processor) mesh = pv.read(resource) return mesh
def hexahedron() -> SampleUnstructuredXY: """ Load DYNAMICO hexahedron unstructured mesh. Returns ------- SampleUnstructuredXY The hexagonal unstructured spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "hexahedron.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat hex cell grid lons = ds.variables["bounds_lon_i"][:] lats = ds.variables["bounds_lat_i"][:] # load the mesh payload data = ds.variables["phis"][:] name = capitalise("synthetic") units = 1 sample = SampleUnstructuredXY( lons, lats, lons.shape, data=data, name=name, units=units ) return sample
def fetch_south_africa_topography(): """ Fetch downsampled ETOPO1 topography grid for South Africa The grid is based on the ETOPO1 model [AmanteEakins2009]_. The original model has 1 arc-minute grid spacing but here we downsampled to 0.1 degree grid spacing to save space and download times and cut it to the South Africa region. ETOPO1 heights are referenced to "sea level". If the file isn't already in your data directory, it will be downloaded automatically. Returns ------- grid : :class:`xarray.Dataset` The topography grid (in meters) relative to sea level. Coordinates are geodetic latitude and longitude. """ fname = REGISTRY.fetch("south-africa-topography.nc.xz", processor=pooch.Decompress()) data = xr.open_dataset(fname, engine="scipy") return data
def um_orca2() -> SampleStructuredXY: """ Load Met Office Unified Model ORCA2 curvilinear mesh. Returns ------- SampleStructuredXY The curvilinear spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "votemper.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat grid lons = ds.variables["lont_bounds"][:] lats = ds.variables["latt_bounds"][:] # load the mesh payload data = ds.variables["votemper"] name = capitalise(data.standard_name) units = data.units sample = SampleStructuredXY(lons, lats, data=data[0, 0], name=name, units=units) return sample
def choose_processor(url): """ chooses the processor to uncompress if required """ known_processors = { pooch.Decompress(): (".gz2", ".gz"), pooch.Untar(): (".tar", ".tgz", ".tar.gz"), pooch.Unzip(): (".zip", ), None: "*", } chosen = None for processor, extensions in known_processors.items(): for ext in extensions: if ext in url.lower(): chosen = processor return chosen
def lfric_sst() -> SampleUnstructuredXY: """ Load CF UGRID global unstructured mesh. Returns ------- SampleUnstructuredXY The unstructured spatial coordinates and data payload. Notes: .. versionadded:: 0.1.0 """ fname = "qrclim.sst.ugrid.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat cell grid lons = ds.variables["dynamics_node_x"][:] lats = ds.variables["dynamics_node_y"][:] # load the face/node connectivity connectivity = ds.variables["dynamics_face_nodes"] start_index = connectivity.start_index # load the mesh payload data = ds.variables["surface_temperature"] name = capitalise(data.standard_name) units = data.units sample = SampleUnstructuredXY( lons, lats, connectivity[:], data=data[:], start_index=start_index, name=name, units=units, ) return sample
def fetch_network_data(net_name: str = "karate_club", net_type: str = "igraph") -> object: """ Load the required network from the remote repository :param net_name: network name :param net_type: desired graph object among "networkx" and "igraph". Default, igraph. :return: a graph object :Example: >>> from cdlib import datasets >>> G = datasets.fetch_network_data(net_name="karate_club", net_type="igraph") """ download = pooch.HTTPDownloader(progressbar=True) fname = __networks.fetch(f"{net_name}.csv.gz", processor=pooch.Decompress(), downloader=download) if net_type == "networkx": g = nx.Graph() with open(fname) as f: for line in f: line = line.replace(" ", "\t").split("\t") g.add_edge(int(line[0]), int(line[1])) else: if ig is None: raise ModuleNotFoundError( "Optional dependency not satisfied: install python-igraph to use the selected " "feature.") edges = [] with open(fname) as f: for line in f: line = line.replace(" ", "\t").split("\t") edges.append((int(line[0]), int(line[1]))) g = ig.Graph.TupleList(edges) return g
def ww3_global_tri() -> SampleUnstructuredXY: """ Load the WAVEWATCH III (WW3) unstructured triangular mesh. Returns ------- SampleUnstructuredXY The unstructured spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "ww3_gbl_tri_hs.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/ww3/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat points lons = ds.variables["longitude"][:] lats = ds.variables["latitude"][:] # load the face/node connectivity offset = 1 # minimum connectivity index offset connectivity = ds.variables["tri"][:] - offset # we know this is a single step timeseries, a priori idx = 0 # load mesh payload data = ds.variables["hs"] name = capitalise(data.standard_name) units = data.units sample = SampleUnstructuredXY( lons, lats, connectivity, data=data[idx], name=name, units=units ) return sample
def fvcom_tamar() -> SampleUnstructuredXY: """ Load PML FVCOM unstructured mesh. Returns ------- SampleUnstructuredXY The unstructured spatial coordinates and data payload. Notes ----- .. versionadded:: 0.1.0 """ fname = "fvcom_tamar.nc" processor = pooch.Decompress(method="auto", name=fname) resource = CACHE.fetch(f"pantry/{fname}.bz2", processor=processor) ds = nc.Dataset(resource) # load the lon/lat cell grid lons = ds.variables["lon"][:] lats = ds.variables["lat"][:] # load the face/node connectivity offset = 1 # minimum connectivity index offset connectivity = ds.variables["nv"][:] - offset # load the mesh payload face = ds.variables["h_center"] name = capitalise(face.standard_name) units = face.units node = ds.variables["h"][:] sample = SampleUnstructuredXY( lons, lats, connectivity.T, face=face[:], node=node, name=name, units=units ) return sample
def fetch_ground_truth_data(net_name="karate_club", graph=None): """ Load the required ground truth clustering from the remote repository :param net_name: network name :param graph: the graph object associated to the ground truth (optional) :return: a NodeClustering object :Example: >>> from cdlib import datasets >>> gt_coms = datasets.fetch_network_data(fetch_ground_truth_data="karate_club") """ download = pooch.HTTPDownloader(progressbar=True) fname = __ground_truths.fetch(f"{net_name}.json.gz", processor=pooch.Decompress(), downloader=download) gt = read_community_json(fname) if graph is not None: gt.graph = graph return gt
def fetch_mosaic_of_antarctica(): return moa.fetch('moa750_2009_hp1_v01.1.tif.gz', downloader=_earthdata_downloader, processor=pooch.Decompress())
def example_filepath(filename: str) -> Path: return Path(goodboy.fetch(filename + ".gz", processor=pooch.Decompress()))
def get(filename: str) -> Path: return Path( goodboy.fetch(filename + ".gz", processor=pooch.Decompress(name=filename)))