def init(self): # TODO check prefix and the options at the next level self._prefix = '%s/%s/%s/%s/' % (self.parameter, self.level, self.date, self.hour) self.forecasts = [ obj.key.replace(self._prefix, '') for obj in bucket.objects.filter(Prefix=self._prefix) ] if not self.forecasts: raise ValueError("Not found: '%s/*'" % self._prefix) params = { 'parameter': self.parameter, 'level': self.level, 'date': self.date, 'hour': self.hour, 'cache_ctrl': self.cache_ctrl } self._sources = np.array([ GFSSource(forecast=h, **params) for h in self.forecasts ]) # can we load this lazily? nc = self._sources[0].native_coordinates base_time = datetime.datetime.strptime( '%s %s' % (self.date, self.hour), '%Y%m%d %H%M') forecast_times = [ base_time + datetime.timedelta(hours=int(h)) for h in self.forecasts ] tc = Coordinates( [[dt.strftime('%Y-%m-%d %H:%M') for dt in forecast_times]], dims=['time']) self.native_coordinates = merge_dims([nc, tc])
def source_coordinates(self): """{source_coordinates}""" available_times = [ np.datetime64(date.replace(".", "-")) for date in self.available_dates ] return Coordinates([available_times], dims=["time"])
def get_coordinates(self): """{get_coordinates}""" lons = np.array(self.dataset[self.lon_key][:, :]) lats = np.array(self.dataset[self.lat_key][:, :]) lons[lons == self.nan_vals[0]] = np.nan lats[lats == self.nan_vals[0]] = np.nan lons = np.nanmean(lons, axis=0) lats = np.nanmean(lats, axis=1) coords = Coordinates([lats, lons], dims=["lat", "lon"]) return coords
def source_coordinates(self): base_time = datetime.datetime.strptime( "%s %s" % (self.date, self.hour), "%Y%m%d %H%M") forecast_times = [ base_time + datetime.timedelta(hours=int(h)) for h in self.forecasts ] return Coordinates( [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]], dims=["time"], validate_crs=False)
def source_coordinates(self): """{source_coordinates}""" try: times, latlon, _ = self.available_coords_sources except: try: return self.get_cache("source.coordinates") except NodeException as e: raise NodeException( "Connection or Authentication error, and no disk cache to fall back on for determining sources." ) else: if latlon is not None and latlon.size > 0: crds = Coordinates([[times, latlon[:, 0], latlon[:, 1]]], dims=["time_lat_lon"]) else: crds = Coordinates([times], dims=["time"]) self.put_cache(crds, "source.coordinates", overwrite=True) return crds
def get_coordinates(self): nc = self.sources[0].coordinates base_time = datetime.datetime.strptime( "%s %s" % (self.date, self.hour), "%Y%m%d %H%M") forecast_times = [ base_time + datetime.timedelta(hours=int(h)) for h in self.forecasts ] tc = Coordinates( [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]], dims=["time"], crs=nc.crs, validate_crs=False) return merge_dims([nc, tc])
def select_sources(self, coordinates): """Select sources based on requested coordinates, including setting coordinates, if possible. Parameters ---------- coordinates : :class:`podpac.Coordinates` Coordinates to evaluate at compositor sources Returns ------- sources : :class:`np.ndarray` Array of sources Notes ----- * If :attr:`source_coordinates` is defined, only sources that intersect the requested coordinates are selected. * Sets sources :attr:`interpolation`. * If source coordinates complete, sets sources :attr:`coordinates` as an optimization. """ """ Optimization: . """ src_subset = super(SMAPCompositor, self).select_sources(coordinates) if self.is_source_coordinates_complete: coords_subset = list( self.source_coordinates.intersect( coordinates, outer=True).coords.values())[0] coords_dim = list(self.source_coordinates.dims)[0] crs = self.source_coordinates.crs for s, c in zip(src_subset, coords_subset): nc = merge_dims([ Coordinates(np.atleast_1d(c), dims=[coords_dim], crs=crs, validate_crs=False), self.shared_coordinates, ]) s.set_coordinates(nc) return src_subset
def find_coordinates(self): """ {coordinates} Notes ----- These coordinates are computed, assuming dataset is regular. """ if self.product in SMAP_IRREGULAR_COORDINATES: raise Exception( "Native coordinates too large. Try using get_filename_coordinates_sources()." ) partial_sources = self.source_coordinates["time"].coordinates complete_source_0 = self.sources[0].source_coordinates[ "time"].coordinates offset = complete_source_0 - partial_sources[0] full_times = (partial_sources[:, None] + offset[None, :]).ravel() return [ podpac.coordinates.merge_dims( [Coordinates([full_times], ["time"]), self.shared_coordinates]) ]
print(o) # GFS (specify source date/time, select forecast at evaluation) print("GFS node (parameter, level, date, hour)") gfs_soim = GFS( parameter=parameter, level=level, date=yesterday.strftime("%Y%m%d"), hour="1200", cache_ctrl=cache_ctrl, anon=True, ) # whole world forecast at this time tomorrow c = Coordinates( [gfs_soim.coordinates["lat"], gfs_soim.coordinates["lon"], tomorrow], dims=["lat", "lon", "time"]) o = gfs_soim.eval(c) print(o) # time series: get the forecast at lat=42, lon=275 every hour for the next 6 hours start = now stop = now + datetime.timedelta(hours=6) c = Coordinates([42, 282, podpac.crange(start, stop, "1,h")], dims=["lat", "lon", "time"]) o = gfs_soim.eval(c) print(o) # latest (get latest source, select forecast at evaluation) print("GFSLatest node (parameter, level)") gfs_soim = GFSLatest(parameter=parameter,
from podpac.datalib import terraintiles from podpac.coordinates import Coordinates, clinspace from podpac.datalib.terraintiles import TerrainTilesSource from pyspark import SparkContext import numpy as np # sc = SparkContext(master="local[4]") ZOOM_LEVEL = 9 # create coordinates for region c = Coordinates([clinspace(75, -60, 1000), clinspace(-155, -35, 1000)], dims=['lat', 'lon']) # get all tile urls for tile format ('geotiff') certain zoom level (9) within coordinates tiles_urls = terraintiles.get_tile_urls('geotiff', ZOOM_LEVEL, coordinates=c) tiles_urls = [ tile.replace(f'geotiff/{ZOOM_LEVEL}/', '').replace('.tif', '') for tile in tiles_urls ] tiles_xy = [(int(tile.partition('/')[0]), int(tile.partition('/')[2])) for tile in tiles_urls] # make query to get those resources #https://s3.amazonaws.com/elevation-tiles-prod/geotiff/{z}/{x}/{y}.tif # 'https://s3.amazonaws.com/elevation-tiles-prod/geotiff/{ZOOM_LEVEL}/{44}/{90}.tif'
def cmr_query(kwargs=None, bounds=None): """Helper function for making and parsing cmr queries. This is used for building the initial index and for updating the cached index with new data. """ if not kwargs: kwargs = {} # Set up regular expressions and maps to convert filenames to coordinates date_re = self.sources[0].date_url_re date_time_re = self.sources[0].date_time_url_re latlon_re = self.sources[0].latlon_url_re def datemap(x): m = date_time_re.search(x) if not m: m = date_re.search(x) return smap2np_date(m.group()) def latlonmap(x): m = latlon_re.search(x) if not m: return () lonlat = m.group() return ( float(lonlat[4:6]) * (1 - 2 * (lonlat[6] == "S")), float(lonlat[:3]) * (1 - 2 * (lonlat[3] == "W")), ) # Restrict the query to any specified bounds if bounds: kwargs["temporal"] = ",".join([ str(b.astype("datetime64[s]")) for b in bounds["time"].bounds ]) # Get CMR data filenames = nasaCMR.search_granule_json( session=self.session, entry_map=lambda x: x["producer_granule_id"], short_name=self.product, **kwargs) if not filenames: return Coordinates([]), [], [] # Extract coordinate information from filenames # filenames.sort() # Assume it comes sorted... dims = ["time"] dates = [ d for d in np.array(list(map(datemap, filenames))).squeeze() ] coords = [dates] if latlonmap(filenames[0]): latlons = list(map(latlonmap, filenames)) lats = np.array([l[0] for l in latlons]) lons = np.array([l[1] for l in latlons]) dims = ["time_lat_lon"] coords = [[dates, lats, lons]] # Create PODPAC Coordinates object, and return relevant data structures crds = Coordinates(coords, dims) return crds, filenames, dates
"... (%d)" % len(sm_datefolder.sources)) # sample SMAPSource info sm_source = sm_datefolder.sources[0] print("Sample DAP Source:", sm_source) print("Sample DAP Source Definition:", sm_source.json_pretty) print("Sample DAP Native Coordinates:", sm_source.coordinates) print("Another Sample DAP Native Coordinates:", sm_datefolder.sources[1].coordinates) # eval whole world c_world = Coordinates( [ podpac.crange(90, -90, -2.0), podpac.crange(-180, 180, 2.0), "2018-05-19T12:00:00" ], dims=["lat", "lon", "time"], ) o = sm.eval(c_world) o.plot(cmap="gist_earth_r") pyplot.axis("scaled") # eval points over time lat = [45.0, 45.0, 0.0, 45.0] lon = [-100.0, 20.0, 20.0, 100.0] c_pts = Coordinates( [[lat, lon], podpac.crange("2018-05-15T00", "2018-05-19T00", "3,h")], dims=["lat_lon", "time"])