Ejemplo n.º 1
0
    def init(self):
        # TODO check prefix and the options at the next level

        self._prefix = '%s/%s/%s/%s/' % (self.parameter, self.level, self.date,
                                         self.hour)
        self.forecasts = [
            obj.key.replace(self._prefix, '')
            for obj in bucket.objects.filter(Prefix=self._prefix)
        ]

        if not self.forecasts:
            raise ValueError("Not found: '%s/*'" % self._prefix)

        params = {
            'parameter': self.parameter,
            'level': self.level,
            'date': self.date,
            'hour': self.hour,
            'cache_ctrl': self.cache_ctrl
        }
        self._sources = np.array([
            GFSSource(forecast=h, **params) for h in self.forecasts
        ])  # can we load this lazily?

        nc = self._sources[0].native_coordinates
        base_time = datetime.datetime.strptime(
            '%s %s' % (self.date, self.hour), '%Y%m%d %H%M')
        forecast_times = [
            base_time + datetime.timedelta(hours=int(h))
            for h in self.forecasts
        ]
        tc = Coordinates(
            [[dt.strftime('%Y-%m-%d %H:%M') for dt in forecast_times]],
            dims=['time'])
        self.native_coordinates = merge_dims([nc, tc])
Ejemplo n.º 2
0
 def source_coordinates(self):
     """{source_coordinates}"""
     available_times = [
         np.datetime64(date.replace(".", "-"))
         for date in self.available_dates
     ]
     return Coordinates([available_times], dims=["time"])
Ejemplo n.º 3
0
 def get_coordinates(self):
     """{get_coordinates}"""
     lons = np.array(self.dataset[self.lon_key][:, :])
     lats = np.array(self.dataset[self.lat_key][:, :])
     lons[lons == self.nan_vals[0]] = np.nan
     lats[lats == self.nan_vals[0]] = np.nan
     lons = np.nanmean(lons, axis=0)
     lats = np.nanmean(lats, axis=1)
     coords = Coordinates([lats, lons], dims=["lat", "lon"])
     return coords
Ejemplo n.º 4
0
 def source_coordinates(self):
     base_time = datetime.datetime.strptime(
         "%s %s" % (self.date, self.hour), "%Y%m%d %H%M")
     forecast_times = [
         base_time + datetime.timedelta(hours=int(h))
         for h in self.forecasts
     ]
     return Coordinates(
         [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]],
         dims=["time"],
         validate_crs=False)
Ejemplo n.º 5
0
    def source_coordinates(self):
        """{source_coordinates}"""

        try:
            times, latlon, _ = self.available_coords_sources
        except:
            try:
                return self.get_cache("source.coordinates")
            except NodeException as e:
                raise NodeException(
                    "Connection or Authentication error, and no disk cache to fall back on for determining sources."
                )
        else:
            if latlon is not None and latlon.size > 0:
                crds = Coordinates([[times, latlon[:, 0], latlon[:, 1]]],
                                   dims=["time_lat_lon"])
            else:
                crds = Coordinates([times], dims=["time"])
            self.put_cache(crds, "source.coordinates", overwrite=True)
            return crds
Ejemplo n.º 6
0
 def get_coordinates(self):
     nc = self.sources[0].coordinates
     base_time = datetime.datetime.strptime(
         "%s %s" % (self.date, self.hour), "%Y%m%d %H%M")
     forecast_times = [
         base_time + datetime.timedelta(hours=int(h))
         for h in self.forecasts
     ]
     tc = Coordinates(
         [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]],
         dims=["time"],
         crs=nc.crs,
         validate_crs=False)
     return merge_dims([nc, tc])
Ejemplo n.º 7
0
    def select_sources(self, coordinates):
        """Select sources based on requested coordinates, including setting coordinates, if possible.

        Parameters
        ----------
        coordinates : :class:`podpac.Coordinates`
            Coordinates to evaluate at compositor sources

        Returns
        -------
        sources : :class:`np.ndarray`
            Array of sources

        Notes
        -----
         * If :attr:`source_coordinates` is defined, only sources that intersect the requested coordinates are selected.
         * Sets sources :attr:`interpolation`.
         * If source coordinates complete, sets sources :attr:`coordinates` as an optimization.
        """
        """ Optimization: . """

        src_subset = super(SMAPCompositor, self).select_sources(coordinates)

        if self.is_source_coordinates_complete:
            coords_subset = list(
                self.source_coordinates.intersect(
                    coordinates, outer=True).coords.values())[0]
            coords_dim = list(self.source_coordinates.dims)[0]
            crs = self.source_coordinates.crs
            for s, c in zip(src_subset, coords_subset):
                nc = merge_dims([
                    Coordinates(np.atleast_1d(c),
                                dims=[coords_dim],
                                crs=crs,
                                validate_crs=False),
                    self.shared_coordinates,
                ])
                s.set_coordinates(nc)

        return src_subset
Ejemplo n.º 8
0
    def find_coordinates(self):
        """
        {coordinates}

        Notes
        -----
        These coordinates are computed, assuming dataset is regular.
        """
        if self.product in SMAP_IRREGULAR_COORDINATES:
            raise Exception(
                "Native coordinates too large. Try using get_filename_coordinates_sources()."
            )

        partial_sources = self.source_coordinates["time"].coordinates
        complete_source_0 = self.sources[0].source_coordinates[
            "time"].coordinates
        offset = complete_source_0 - partial_sources[0]
        full_times = (partial_sources[:, None] + offset[None, :]).ravel()
        return [
            podpac.coordinates.merge_dims(
                [Coordinates([full_times], ["time"]), self.shared_coordinates])
        ]
Ejemplo n.º 9
0
    print(o)

    # GFS (specify source date/time, select forecast at evaluation)
    print("GFS node (parameter, level, date, hour)")
    gfs_soim = GFS(
        parameter=parameter,
        level=level,
        date=yesterday.strftime("%Y%m%d"),
        hour="1200",
        cache_ctrl=cache_ctrl,
        anon=True,
    )

    # whole world forecast at this time tomorrow
    c = Coordinates(
        [gfs_soim.coordinates["lat"], gfs_soim.coordinates["lon"], tomorrow],
        dims=["lat", "lon", "time"])
    o = gfs_soim.eval(c)
    print(o)

    # time series: get the forecast at lat=42, lon=275 every hour for the next 6 hours
    start = now
    stop = now + datetime.timedelta(hours=6)
    c = Coordinates([42, 282, podpac.crange(start, stop, "1,h")],
                    dims=["lat", "lon", "time"])
    o = gfs_soim.eval(c)
    print(o)

    # latest (get latest source, select forecast at evaluation)
    print("GFSLatest node (parameter, level)")
    gfs_soim = GFSLatest(parameter=parameter,
from podpac.datalib import terraintiles
from podpac.coordinates import Coordinates, clinspace
from podpac.datalib.terraintiles import TerrainTilesSource

from pyspark import SparkContext
import numpy as np

# sc = SparkContext(master="local[4]")

ZOOM_LEVEL = 9

# create coordinates for region
c = Coordinates([clinspace(75, -60, 1000),
                 clinspace(-155, -35, 1000)],
                dims=['lat', 'lon'])

# get all tile urls for tile format ('geotiff') certain zoom level (9) within coordinates
tiles_urls = terraintiles.get_tile_urls('geotiff', ZOOM_LEVEL, coordinates=c)
tiles_urls = [
    tile.replace(f'geotiff/{ZOOM_LEVEL}/', '').replace('.tif', '')
    for tile in tiles_urls
]

tiles_xy = [(int(tile.partition('/')[0]), int(tile.partition('/')[2]))
            for tile in tiles_urls]

# make query to get those resources

#https://s3.amazonaws.com/elevation-tiles-prod/geotiff/{z}/{x}/{y}.tif
# 'https://s3.amazonaws.com/elevation-tiles-prod/geotiff/{ZOOM_LEVEL}/{44}/{90}.tif'
Ejemplo n.º 11
0
        def cmr_query(kwargs=None, bounds=None):
            """Helper function for making and parsing cmr queries. This is used for building the initial index
            and for updating the cached index with new data.
            """
            if not kwargs:
                kwargs = {}

            # Set up regular expressions and maps to convert filenames to coordinates
            date_re = self.sources[0].date_url_re
            date_time_re = self.sources[0].date_time_url_re
            latlon_re = self.sources[0].latlon_url_re

            def datemap(x):
                m = date_time_re.search(x)
                if not m:
                    m = date_re.search(x)
                return smap2np_date(m.group())

            def latlonmap(x):
                m = latlon_re.search(x)
                if not m:
                    return ()
                lonlat = m.group()
                return (
                    float(lonlat[4:6]) * (1 - 2 * (lonlat[6] == "S")),
                    float(lonlat[:3]) * (1 - 2 * (lonlat[3] == "W")),
                )

            # Restrict the query to any specified bounds
            if bounds:
                kwargs["temporal"] = ",".join([
                    str(b.astype("datetime64[s]"))
                    for b in bounds["time"].bounds
                ])

            # Get CMR data
            filenames = nasaCMR.search_granule_json(
                session=self.session,
                entry_map=lambda x: x["producer_granule_id"],
                short_name=self.product,
                **kwargs)
            if not filenames:
                return Coordinates([]), [], []

            # Extract coordinate information from filenames
            # filenames.sort()  # Assume it comes sorted...
            dims = ["time"]
            dates = [
                d for d in np.array(list(map(datemap, filenames))).squeeze()
            ]
            coords = [dates]
            if latlonmap(filenames[0]):
                latlons = list(map(latlonmap, filenames))
                lats = np.array([l[0] for l in latlons])
                lons = np.array([l[1] for l in latlons])
                dims = ["time_lat_lon"]
                coords = [[dates, lats, lons]]

            # Create PODPAC Coordinates object, and return relevant data structures
            crds = Coordinates(coords, dims)
            return crds, filenames, dates
Ejemplo n.º 12
0
          "... (%d)" % len(sm_datefolder.sources))

    # sample SMAPSource info
    sm_source = sm_datefolder.sources[0]
    print("Sample DAP Source:", sm_source)
    print("Sample DAP Source Definition:", sm_source.json_pretty)
    print("Sample DAP Native Coordinates:", sm_source.coordinates)

    print("Another Sample DAP Native Coordinates:",
          sm_datefolder.sources[1].coordinates)

    # eval whole world
    c_world = Coordinates(
        [
            podpac.crange(90, -90, -2.0),
            podpac.crange(-180, 180, 2.0), "2018-05-19T12:00:00"
        ],
        dims=["lat", "lon", "time"],
    )
    o = sm.eval(c_world)
    o.plot(cmap="gist_earth_r")
    pyplot.axis("scaled")

    # eval points over time
    lat = [45.0, 45.0, 0.0, 45.0]
    lon = [-100.0, 20.0, 20.0, 100.0]
    c_pts = Coordinates(
        [[lat, lon],
         podpac.crange("2018-05-15T00", "2018-05-19T00", "3,h")],
        dims=["lat_lon", "time"])