def uri(self): """ List of URLs to load for a request Returns ------- list(str) """ Lt = np.timedelta64( pd.to_datetime(self.BOX[7]) - pd.to_datetime(self.BOX[6]), "D") MaxLenTime = 90 MaxLen = np.timedelta64(MaxLenTime, "D") if not self.parallel: # Check if the time range is not larger than allowed (90 days): if Lt > MaxLen: self.Chunker = Chunker( {"box": self.BOX}, chunks={ "lon": 1, "lat": 1, "dpt": 1, "time": "auto" }, chunksize={"time": MaxLenTime}, ) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append( Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls else: return [self.get_url()] else: if 'time' not in self.chunks_maxsize: self.chunks_maxsize['time'] = MaxLenTime elif self.chunks_maxsize['time'] > MaxLenTime: warnings.warn( ("argovis only allows requests of %i days interval, " "modify chunks_maxsize['time'] to %i" % (MaxLenTime, MaxLenTime))) self.chunks_maxsize['time'] = MaxLenTime # Ensure time chunks will never exceed what's allowed by argovis: if Lt > MaxLen and 'time' in self.chunks and self.chunks[ 'time'] not in ['auto']: self.chunks['time'] = 'auto' self.Chunker = Chunker({"box": self.BOX}, chunks=self.chunks, chunksize=self.chunks_maxsize) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls
def uri(self): """ List of files to load for a request Returns ------- list(str) """ if not self.parallel: return [self.get_url()] else: self.Chunker = Chunker({"box": self.BOX}, chunks=self.chunks, chunksize=self.chunks_maxsize) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls
class Fetch_box(ErddapArgoDataFetcher): """ Manage access to Argo data through Ifremer ERDDAP for: an ocean rectangle """ def init(self, box: list, **kw): """ Create Argo data loader Parameters ---------- box : list(float, float, float, float, float, float, str, str) The box domain to load all Argo data for: box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max] or: box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max] """ self.BOX = box if self.dataset_id == "phy": self.definition = "Ifremer erddap Argo data fetcher for a space/time region" elif self.dataset_id == "ref": self.definition = ( "Ifremer erddap Argo REFERENCE data fetcher for a space/time region" ) return self def define_constraints(self): """ Define request constraints """ self.erddap.constraints = {"longitude>=": self.BOX[0]} self.erddap.constraints.update({"longitude<=": self.BOX[1]}) self.erddap.constraints.update({"latitude>=": self.BOX[2]}) self.erddap.constraints.update({"latitude<=": self.BOX[3]}) self.erddap.constraints.update({"pres>=": self.BOX[4]}) self.erddap.constraints.update({"pres<=": self.BOX[5]}) if len(self.BOX) == 8: self.erddap.constraints.update({"time>=": self.BOX[6]}) self.erddap.constraints.update({"time<=": self.BOX[7]}) return None @property def uri(self): """ List of files to load for a request Returns ------- list(str) """ if not self.parallel: return [self.get_url()] else: self.Chunker = Chunker({"box": self.BOX}, chunks=self.chunks, chunksize=self.chunks_maxsize) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls
def uri(self): """ List of URLs to load for a request Returns ------- list(str) """ if not self.parallel: if len( self.WMO ) <= 5: # todo: This max WMO number should be parameterized somewhere else # Retrieve all WMOs in a single request return [self.get_url()] else: # Retrieve one WMO by URL sequentially (same behaviour as localftp and argovis) urls = [] for wmo in self.WMO: urls.append( Fetch_wmo(WMO=wmo, CYC=self.CYC, ds=self.dataset_id, parallel=False).get_url()) return urls else: self.Chunker = Chunker({"wmo": self.WMO}, chunks=self.chunks, chunksize=self.chunks_maxsize) wmo_grps = self.Chunker.fit_transform() # self.chunks = C.chunks urls = [] for wmos in wmo_grps: urls.append( Fetch_wmo(WMO=wmos, CYC=self.CYC, ds=self.dataset_id, parallel=False).get_url()) return urls
class Fetch_wmo(ErddapArgoDataFetcher): """ Manage access to Argo data through Ifremer ERDDAP for: a list of WMOs This class is instantiated when a call is made to these facade access points: - `ArgoDataFetcher(src='erddap').float(**)` - `ArgoDataFetcher(src='erddap').profile(**)` """ def init(self, WMO=[], CYC=None, **kw): """ Create Argo data loader for WMOs Parameters ---------- WMO : list(int) The list of WMOs to load all Argo data for. CYC : int, np.array(int), list(int) The cycle numbers to load. """ # Make sure we deal with a list of integers for WMOs: if not isinstance(WMO, list): WMO = [WMO] if not all(isinstance(x, (int, np.int64)) for x in WMO): raise ValueError("WMO must be a list of integers") if isinstance(CYC, int): CYC = np.array( (CYC, ), dtype="int") # Make sure we deal with an array of integers if isinstance(CYC, list): CYC = np.array( CYC, dtype="int") # Make sure we deal with an array of integers self.WMO = WMO self.CYC = CYC self.definition = "?" if self.dataset_id == "phy": self.definition = "Ifremer erddap Argo data fetcher for floats" elif self.dataset_id == "ref": self.definition = "Ifremer erddap Argo REFERENCE data fetcher for floats" return self def define_constraints(self): """ Define erddap constraints """ self.erddap.constraints = { "platform_number=~": "|".join(["%i" % i for i in self.WMO]) } if isinstance(self.CYC, (np.ndarray)): self.erddap.constraints.update( {"cycle_number=~": "|".join(["%i" % i for i in self.CYC])}) return self @property def uri(self): """ List of URLs to load for a request Returns ------- list(str) """ if not self.parallel: if len( self.WMO ) <= 5: # todo: This max WMO number should be parameterized somewhere else # Retrieve all WMOs in a single request return [self.get_url()] else: # Retrieve one WMO by URL sequentially (same behaviour as localftp and argovis) urls = [] for wmo in self.WMO: urls.append( Fetch_wmo(WMO=wmo, CYC=self.CYC, ds=self.dataset_id, parallel=False).get_url()) return urls else: self.Chunker = Chunker({"wmo": self.WMO}, chunks=self.chunks, chunksize=self.chunks_maxsize) wmo_grps = self.Chunker.fit_transform() # self.chunks = C.chunks urls = [] for wmos in wmo_grps: urls.append( Fetch_wmo(WMO=wmos, CYC=self.CYC, ds=self.dataset_id, parallel=False).get_url()) return urls def dashboard(self, **kw): if len(self.WMO) == 1: return open_dashboard(wmo=self.WMO[0], **kw) else: warnings.warn( "Plot dashboard only available for a single float request")
class Fetch_box(ArgovisDataFetcher): def init(self, box: list): """ Create Argo data loader Parameters ---------- box : list(float, float, float, float, float, float, str, str) The box domain to load all Argo data for: box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max] or: box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max] """ if len(box) == 6: # Select the last months of data: end = pd.to_datetime("now") start = end - pd.DateOffset(months=1) box.append(start.strftime("%Y-%m-%d")) box.append(end.strftime("%Y-%m-%d")) self.BOX = box self.definition = "?" if self.dataset_id == "phy": self.definition = "Argovis Argo data fetcher for a space/time region" return self def get_url_shape(self): """ Return the URL used to download data """ shape = [[ [self.BOX[0], self.BOX[2]], # ll [self.BOX[0], self.BOX[3]], # ul [self.BOX[1], self.BOX[3]], # ur [self.BOX[1], self.BOX[2]], # lr [self.BOX[0], self.BOX[2]], # ll ]] strShape = str(shape).replace(" ", "") url = self.server + "/selection/profiles" url += "?startDate={}".format( pd.to_datetime(self.BOX[6]).strftime("%Y-%m-%dT%H:%M:%SZ")) url += "&endDate={}".format( pd.to_datetime(self.BOX[7]).strftime("%Y-%m-%dT%H:%M:%SZ")) url += "&shape={}".format(strShape) url += "&presRange=[{},{}]".format(self.BOX[4], self.BOX[5]) return url def get_url_rect(self): """ Return the URL used to download data """ def strCorner(b, i): return str([b[i[0]], b[i[1]]]).replace(" ", "") def strDate(b, i): return pd.to_datetime(b[i]).strftime("%Y-%m-%dT%H:%M:%SZ") url = self.server + "/selection/box/profiles" url += "?startDate={}".format(strDate(self.BOX, 6)) url += "&endDate={}".format(strDate(self.BOX, 7)) url += "&presRange=[{},{}]".format(self.BOX[4], self.BOX[5]) url += "&llCorner={}".format(strCorner(self.BOX, [0, 2])) url += "&urCorner={}".format(strCorner(self.BOX, [1, 3])) return url def get_url(self): # return self.get_url_shape() return self.get_url_rect() @property def uri(self): """ List of URLs to load for a request Returns ------- list(str) """ Lt = np.timedelta64( pd.to_datetime(self.BOX[7]) - pd.to_datetime(self.BOX[6]), "D") MaxLenTime = 90 MaxLen = np.timedelta64(MaxLenTime, "D") if not self.parallel: # Check if the time range is not larger than allowed (90 days): if Lt > MaxLen: self.Chunker = Chunker( {"box": self.BOX}, chunks={ "lon": 1, "lat": 1, "dpt": 1, "time": "auto" }, chunksize={"time": MaxLenTime}, ) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append( Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls else: return [self.get_url()] else: if 'time' not in self.chunks_maxsize: self.chunks_maxsize['time'] = MaxLenTime elif self.chunks_maxsize['time'] > MaxLenTime: warnings.warn( ("argovis only allows requests of %i days interval, " "modify chunks_maxsize['time'] to %i" % (MaxLenTime, MaxLenTime))) self.chunks_maxsize['time'] = MaxLenTime # Ensure time chunks will never exceed what's allowed by argovis: if Lt > MaxLen and 'time' in self.chunks and self.chunks[ 'time'] not in ['auto']: self.chunks['time'] = 'auto' self.Chunker = Chunker({"box": self.BOX}, chunks=self.chunks, chunksize=self.chunks_maxsize) boxes = self.Chunker.fit_transform() urls = [] for box in boxes: urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url()) return urls @property def url(self): # return self.get_url_shape() return self.get_url_rect()
def test_chunk_box4d(self): C = Chunker({"box": self.BOX4d}) assert all([is_box(chunk) for chunk in C.fit_transform()]) C = Chunker({"box": self.BOX4d}, chunks="auto") assert all([is_box(chunk) for chunk in C.fit_transform()]) C = Chunker({"box": self.BOX4d}, chunks={ "lon": 2, "lat": 1, "dpt": 1, "time": 1 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 C = Chunker( {"box": self.BOX4d}, chunks={ "lat": 1, "dpt": 1, "time": 1 }, chunksize={"lon": 10}, ) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][1] - chunks[0][0] == 10 C = Chunker({"box": self.BOX4d}, chunks={ "lon": 1, "lat": 2, "dpt": 1, "time": 1 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 C = Chunker( {"box": self.BOX4d}, chunks={ "lon": 1, "dpt": 1, "time": 1 }, chunksize={"lat": 10}, ) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][3] - chunks[0][2] == 10 C = Chunker({"box": self.BOX4d}, chunks={ "lon": 1, "lat": 1, "dpt": 2, "time": 1 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 C = Chunker( {"box": self.BOX4d}, chunks={ "lon": 1, "lat": 1, "time": 1 }, chunksize={"dpt": 10}, ) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][5] - chunks[0][4] == 10 C = Chunker({"box": self.BOX4d}, chunks={ "lon": 1, "lat": 1, "dpt": 1, "time": 2 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 C = Chunker( {"box": self.BOX4d}, chunks={ "lon": 1, "lat": 1, "dpt": 1 }, chunksize={"time": 5}, ) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert np.timedelta64( pd.to_datetime(chunks[0][7]) - pd.to_datetime(chunks[0][6]), "D") <= np.timedelta64(5, "D") with pytest.raises(ValueError): Chunker({"box": self.BOX4d}, chunks=["lon", 1]) C = Chunker({"box": self.BOX4d}) assert isinstance(C.this_chunker, types.FunctionType) or isinstance( C.this_chunker, types.MethodType)
def test_chunk_box3d(self): C = Chunker({"box": self.BOX3d}) assert all([is_box(chunk) for chunk in C.fit_transform()]) C = Chunker({"box": self.BOX3d}, chunks="auto") assert all([is_box(chunk) for chunk in C.fit_transform()]) C = Chunker({"box": self.BOX3d}, chunks={ "lon": 12, "lat": 1, "dpt": 1 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 12 C = Chunker({"box": self.BOX3d}, chunks={ "lat": 1, "dpt": 1 }, chunksize={"lon": 10}) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][1] - chunks[0][0] == 10 C = Chunker({"box": self.BOX3d}, chunks={ "lon": 1, "lat": 12, "dpt": 1 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 12 C = Chunker({"box": self.BOX3d}, chunks={ "lon": 1, "dpt": 1 }, chunksize={"lat": 10}) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][3] - chunks[0][2] == 10 C = Chunker({"box": self.BOX3d}, chunks={ "lon": 1, "lat": 1, "dpt": 12 }) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 12 C = Chunker({"box": self.BOX3d}, chunks={ "lon": 1, "lat": 1 }, chunksize={"dpt": 10}) chunks = C.fit_transform() assert all([is_box(chunk) for chunk in chunks]) assert chunks[0][5] - chunks[0][4] == 10 C = Chunker({"box": self.BOX3d}, chunks={"lon": 4, "lat": 2, "dpt": 1}) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 * 4 C = Chunker({"box": self.BOX3d}, chunks={"lon": 2, "lat": 3, "dpt": 4}) assert all([is_box(chunk) for chunk in C.fit_transform()]) assert len(C.fit_transform()) == 2 * 3 * 4 with pytest.raises(ValueError): Chunker({"box": self.BOX3d}, chunks=["lon", 1]) C = Chunker({"box": self.BOX3d}) assert isinstance(C.this_chunker, types.FunctionType) or isinstance( C.this_chunker, types.MethodType)
def test_chunk_wmo(self): C = Chunker({"wmo": self.WMO}) assert all([ all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform() ]) C = Chunker({"wmo": self.WMO}, chunks="auto") assert all([ all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform() ]) C = Chunker({"wmo": self.WMO}, chunks={"wmo": 1}) assert all([ all(isinstance(x, int) for x in chunk) for chunk in C.fit_transform() ]) assert len(C.fit_transform()) == 1 with pytest.raises(ValueError): Chunker({"wmo": self.WMO}, chunks=["wmo", 1]) C = Chunker({"wmo": self.WMO}) assert isinstance(C.this_chunker, types.FunctionType) or isinstance( C.this_chunker, types.MethodType)
def test_invalid_chunksize(self): with pytest.raises(ValueError): Chunker({"box": self.BOX3d}, chunksize='toto')
def test_InvalidFetcherAccessPoint(self): with pytest.raises(InvalidFetcherAccessPoint): Chunker({"invalid": self.WMO})