Ejemplo n.º 1
0
    def uri(self):
        """ List of URLs to load for a request

        Returns
        -------
        list(str)
        """
        Lt = np.timedelta64(
            pd.to_datetime(self.BOX[7]) - pd.to_datetime(self.BOX[6]), "D")
        MaxLenTime = 90
        MaxLen = np.timedelta64(MaxLenTime, "D")

        if not self.parallel:
            # Check if the time range is not larger than allowed (90 days):
            if Lt > MaxLen:
                self.Chunker = Chunker(
                    {"box": self.BOX},
                    chunks={
                        "lon": 1,
                        "lat": 1,
                        "dpt": 1,
                        "time": "auto"
                    },
                    chunksize={"time": MaxLenTime},
                )
                boxes = self.Chunker.fit_transform()
                urls = []
                for box in boxes:
                    urls.append(
                        Fetch_box(box=box, ds=self.dataset_id).get_url())
                return urls
            else:
                return [self.get_url()]
        else:
            if 'time' not in self.chunks_maxsize:
                self.chunks_maxsize['time'] = MaxLenTime
            elif self.chunks_maxsize['time'] > MaxLenTime:
                warnings.warn(
                    ("argovis only allows requests of %i days interval, "
                     "modify chunks_maxsize['time'] to %i" %
                     (MaxLenTime, MaxLenTime)))
                self.chunks_maxsize['time'] = MaxLenTime

            # Ensure time chunks will never exceed what's allowed by argovis:
            if Lt > MaxLen and 'time' in self.chunks and self.chunks[
                    'time'] not in ['auto']:
                self.chunks['time'] = 'auto'

            self.Chunker = Chunker({"box": self.BOX},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            boxes = self.Chunker.fit_transform()
            urls = []
            for box in boxes:
                urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url())
            return urls
Ejemplo n.º 2
0
    def uri(self):
        """ List of files to load for a request

        Returns
        -------
        list(str)
        """
        if not self.parallel:
            return [self.get_url()]
        else:
            self.Chunker = Chunker({"box": self.BOX},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            boxes = self.Chunker.fit_transform()
            urls = []
            for box in boxes:
                urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url())
            return urls
Ejemplo n.º 3
0
class Fetch_box(ErddapArgoDataFetcher):
    """ Manage access to Argo data through Ifremer ERDDAP for: an ocean rectangle
    """
    def init(self, box: list, **kw):
        """ Create Argo data loader

            Parameters
            ----------
            box : list(float, float, float, float, float, float, str, str)
                The box domain to load all Argo data for:
                    box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max]
                    or:
                    box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max]
        """
        self.BOX = box

        if self.dataset_id == "phy":
            self.definition = "Ifremer erddap Argo data fetcher for a space/time region"
        elif self.dataset_id == "ref":
            self.definition = (
                "Ifremer erddap Argo REFERENCE data fetcher for a space/time region"
            )

        return self

    def define_constraints(self):
        """ Define request constraints """
        self.erddap.constraints = {"longitude>=": self.BOX[0]}
        self.erddap.constraints.update({"longitude<=": self.BOX[1]})
        self.erddap.constraints.update({"latitude>=": self.BOX[2]})
        self.erddap.constraints.update({"latitude<=": self.BOX[3]})
        self.erddap.constraints.update({"pres>=": self.BOX[4]})
        self.erddap.constraints.update({"pres<=": self.BOX[5]})
        if len(self.BOX) == 8:
            self.erddap.constraints.update({"time>=": self.BOX[6]})
            self.erddap.constraints.update({"time<=": self.BOX[7]})
        return None

    @property
    def uri(self):
        """ List of files to load for a request

        Returns
        -------
        list(str)
        """
        if not self.parallel:
            return [self.get_url()]
        else:
            self.Chunker = Chunker({"box": self.BOX},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            boxes = self.Chunker.fit_transform()
            urls = []
            for box in boxes:
                urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url())
            return urls
Ejemplo n.º 4
0
    def uri(self):
        """ List of URLs to load for a request

        Returns
        -------
        list(str)
        """
        if not self.parallel:
            if len(
                    self.WMO
            ) <= 5:  # todo: This max WMO number should be parameterized somewhere else
                # Retrieve all WMOs in a single request
                return [self.get_url()]
            else:
                # Retrieve one WMO by URL sequentially (same behaviour as localftp and argovis)
                urls = []
                for wmo in self.WMO:
                    urls.append(
                        Fetch_wmo(WMO=wmo,
                                  CYC=self.CYC,
                                  ds=self.dataset_id,
                                  parallel=False).get_url())
                return urls
        else:
            self.Chunker = Chunker({"wmo": self.WMO},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            wmo_grps = self.Chunker.fit_transform()
            # self.chunks = C.chunks
            urls = []
            for wmos in wmo_grps:
                urls.append(
                    Fetch_wmo(WMO=wmos,
                              CYC=self.CYC,
                              ds=self.dataset_id,
                              parallel=False).get_url())
            return urls
Ejemplo n.º 5
0
class Fetch_wmo(ErddapArgoDataFetcher):
    """ Manage access to Argo data through Ifremer ERDDAP for: a list of WMOs

    This class is instantiated when a call is made to these facade access points:
        - `ArgoDataFetcher(src='erddap').float(**)`
        - `ArgoDataFetcher(src='erddap').profile(**)`

    """
    def init(self, WMO=[], CYC=None, **kw):
        """ Create Argo data loader for WMOs

            Parameters
            ----------
            WMO : list(int)
                The list of WMOs to load all Argo data for.
            CYC : int, np.array(int), list(int)
                The cycle numbers to load.
        """
        # Make sure we deal with a list of integers for WMOs:
        if not isinstance(WMO, list):
            WMO = [WMO]
        if not all(isinstance(x, (int, np.int64)) for x in WMO):
            raise ValueError("WMO must be a list of integers")

        if isinstance(CYC, int):
            CYC = np.array(
                (CYC, ),
                dtype="int")  # Make sure we deal with an array of integers
        if isinstance(CYC, list):
            CYC = np.array(
                CYC,
                dtype="int")  # Make sure we deal with an array of integers

        self.WMO = WMO
        self.CYC = CYC

        self.definition = "?"
        if self.dataset_id == "phy":
            self.definition = "Ifremer erddap Argo data fetcher for floats"
        elif self.dataset_id == "ref":
            self.definition = "Ifremer erddap Argo REFERENCE data fetcher for floats"
        return self

    def define_constraints(self):
        """ Define erddap constraints """
        self.erddap.constraints = {
            "platform_number=~": "|".join(["%i" % i for i in self.WMO])
        }
        if isinstance(self.CYC, (np.ndarray)):
            self.erddap.constraints.update(
                {"cycle_number=~": "|".join(["%i" % i for i in self.CYC])})
        return self

    @property
    def uri(self):
        """ List of URLs to load for a request

        Returns
        -------
        list(str)
        """
        if not self.parallel:
            if len(
                    self.WMO
            ) <= 5:  # todo: This max WMO number should be parameterized somewhere else
                # Retrieve all WMOs in a single request
                return [self.get_url()]
            else:
                # Retrieve one WMO by URL sequentially (same behaviour as localftp and argovis)
                urls = []
                for wmo in self.WMO:
                    urls.append(
                        Fetch_wmo(WMO=wmo,
                                  CYC=self.CYC,
                                  ds=self.dataset_id,
                                  parallel=False).get_url())
                return urls
        else:
            self.Chunker = Chunker({"wmo": self.WMO},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            wmo_grps = self.Chunker.fit_transform()
            # self.chunks = C.chunks
            urls = []
            for wmos in wmo_grps:
                urls.append(
                    Fetch_wmo(WMO=wmos,
                              CYC=self.CYC,
                              ds=self.dataset_id,
                              parallel=False).get_url())
            return urls

    def dashboard(self, **kw):
        if len(self.WMO) == 1:
            return open_dashboard(wmo=self.WMO[0], **kw)
        else:
            warnings.warn(
                "Plot dashboard only available for a single float request")
Ejemplo n.º 6
0
class Fetch_box(ArgovisDataFetcher):
    def init(self, box: list):
        """ Create Argo data loader

            Parameters
            ----------
            box : list(float, float, float, float, float, float, str, str)
                The box domain to load all Argo data for:
                box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max]
                or:
                box = [lon_min, lon_max, lat_min, lat_max, pres_min, pres_max, datim_min, datim_max]
        """
        if len(box) == 6:
            # Select the last months of data:
            end = pd.to_datetime("now")
            start = end - pd.DateOffset(months=1)
            box.append(start.strftime("%Y-%m-%d"))
            box.append(end.strftime("%Y-%m-%d"))
        self.BOX = box

        self.definition = "?"
        if self.dataset_id == "phy":
            self.definition = "Argovis Argo data fetcher for a space/time region"
        return self

    def get_url_shape(self):
        """ Return the URL used to download data """
        shape = [[
            [self.BOX[0], self.BOX[2]],  # ll
            [self.BOX[0], self.BOX[3]],  # ul
            [self.BOX[1], self.BOX[3]],  # ur
            [self.BOX[1], self.BOX[2]],  # lr
            [self.BOX[0], self.BOX[2]],  # ll
        ]]
        strShape = str(shape).replace(" ", "")
        url = self.server + "/selection/profiles"
        url += "?startDate={}".format(
            pd.to_datetime(self.BOX[6]).strftime("%Y-%m-%dT%H:%M:%SZ"))
        url += "&endDate={}".format(
            pd.to_datetime(self.BOX[7]).strftime("%Y-%m-%dT%H:%M:%SZ"))
        url += "&shape={}".format(strShape)
        url += "&presRange=[{},{}]".format(self.BOX[4], self.BOX[5])
        return url

    def get_url_rect(self):
        """ Return the URL used to download data """
        def strCorner(b, i):
            return str([b[i[0]], b[i[1]]]).replace(" ", "")

        def strDate(b, i):
            return pd.to_datetime(b[i]).strftime("%Y-%m-%dT%H:%M:%SZ")

        url = self.server + "/selection/box/profiles"
        url += "?startDate={}".format(strDate(self.BOX, 6))
        url += "&endDate={}".format(strDate(self.BOX, 7))
        url += "&presRange=[{},{}]".format(self.BOX[4], self.BOX[5])
        url += "&llCorner={}".format(strCorner(self.BOX, [0, 2]))
        url += "&urCorner={}".format(strCorner(self.BOX, [1, 3]))
        return url

    def get_url(self):
        # return self.get_url_shape()
        return self.get_url_rect()

    @property
    def uri(self):
        """ List of URLs to load for a request

        Returns
        -------
        list(str)
        """
        Lt = np.timedelta64(
            pd.to_datetime(self.BOX[7]) - pd.to_datetime(self.BOX[6]), "D")
        MaxLenTime = 90
        MaxLen = np.timedelta64(MaxLenTime, "D")

        if not self.parallel:
            # Check if the time range is not larger than allowed (90 days):
            if Lt > MaxLen:
                self.Chunker = Chunker(
                    {"box": self.BOX},
                    chunks={
                        "lon": 1,
                        "lat": 1,
                        "dpt": 1,
                        "time": "auto"
                    },
                    chunksize={"time": MaxLenTime},
                )
                boxes = self.Chunker.fit_transform()
                urls = []
                for box in boxes:
                    urls.append(
                        Fetch_box(box=box, ds=self.dataset_id).get_url())
                return urls
            else:
                return [self.get_url()]
        else:
            if 'time' not in self.chunks_maxsize:
                self.chunks_maxsize['time'] = MaxLenTime
            elif self.chunks_maxsize['time'] > MaxLenTime:
                warnings.warn(
                    ("argovis only allows requests of %i days interval, "
                     "modify chunks_maxsize['time'] to %i" %
                     (MaxLenTime, MaxLenTime)))
                self.chunks_maxsize['time'] = MaxLenTime

            # Ensure time chunks will never exceed what's allowed by argovis:
            if Lt > MaxLen and 'time' in self.chunks and self.chunks[
                    'time'] not in ['auto']:
                self.chunks['time'] = 'auto'

            self.Chunker = Chunker({"box": self.BOX},
                                   chunks=self.chunks,
                                   chunksize=self.chunks_maxsize)
            boxes = self.Chunker.fit_transform()
            urls = []
            for box in boxes:
                urls.append(Fetch_box(box=box, ds=self.dataset_id).get_url())
            return urls

    @property
    def url(self):
        # return self.get_url_shape()
        return self.get_url_rect()
Ejemplo n.º 7
0
    def test_chunk_box4d(self):
        C = Chunker({"box": self.BOX4d})
        assert all([is_box(chunk) for chunk in C.fit_transform()])

        C = Chunker({"box": self.BOX4d}, chunks="auto")
        assert all([is_box(chunk) for chunk in C.fit_transform()])

        C = Chunker({"box": self.BOX4d},
                    chunks={
                        "lon": 2,
                        "lat": 1,
                        "dpt": 1,
                        "time": 1
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2

        C = Chunker(
            {"box": self.BOX4d},
            chunks={
                "lat": 1,
                "dpt": 1,
                "time": 1
            },
            chunksize={"lon": 10},
        )
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][1] - chunks[0][0] == 10

        C = Chunker({"box": self.BOX4d},
                    chunks={
                        "lon": 1,
                        "lat": 2,
                        "dpt": 1,
                        "time": 1
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2

        C = Chunker(
            {"box": self.BOX4d},
            chunks={
                "lon": 1,
                "dpt": 1,
                "time": 1
            },
            chunksize={"lat": 10},
        )
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][3] - chunks[0][2] == 10

        C = Chunker({"box": self.BOX4d},
                    chunks={
                        "lon": 1,
                        "lat": 1,
                        "dpt": 2,
                        "time": 1
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2

        C = Chunker(
            {"box": self.BOX4d},
            chunks={
                "lon": 1,
                "lat": 1,
                "time": 1
            },
            chunksize={"dpt": 10},
        )
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][5] - chunks[0][4] == 10

        C = Chunker({"box": self.BOX4d},
                    chunks={
                        "lon": 1,
                        "lat": 1,
                        "dpt": 1,
                        "time": 2
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2

        C = Chunker(
            {"box": self.BOX4d},
            chunks={
                "lon": 1,
                "lat": 1,
                "dpt": 1
            },
            chunksize={"time": 5},
        )
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert np.timedelta64(
            pd.to_datetime(chunks[0][7]) - pd.to_datetime(chunks[0][6]),
            "D") <= np.timedelta64(5, "D")

        with pytest.raises(ValueError):
            Chunker({"box": self.BOX4d}, chunks=["lon", 1])

        C = Chunker({"box": self.BOX4d})
        assert isinstance(C.this_chunker, types.FunctionType) or isinstance(
            C.this_chunker, types.MethodType)
Ejemplo n.º 8
0
    def test_chunk_box3d(self):
        C = Chunker({"box": self.BOX3d})
        assert all([is_box(chunk) for chunk in C.fit_transform()])

        C = Chunker({"box": self.BOX3d}, chunks="auto")
        assert all([is_box(chunk) for chunk in C.fit_transform()])

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lon": 12,
                        "lat": 1,
                        "dpt": 1
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 12

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lat": 1,
                        "dpt": 1
                    },
                    chunksize={"lon": 10})
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][1] - chunks[0][0] == 10

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lon": 1,
                        "lat": 12,
                        "dpt": 1
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 12

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lon": 1,
                        "dpt": 1
                    },
                    chunksize={"lat": 10})
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][3] - chunks[0][2] == 10

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lon": 1,
                        "lat": 1,
                        "dpt": 12
                    })
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 12

        C = Chunker({"box": self.BOX3d},
                    chunks={
                        "lon": 1,
                        "lat": 1
                    },
                    chunksize={"dpt": 10})
        chunks = C.fit_transform()
        assert all([is_box(chunk) for chunk in chunks])
        assert chunks[0][5] - chunks[0][4] == 10

        C = Chunker({"box": self.BOX3d}, chunks={"lon": 4, "lat": 2, "dpt": 1})
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2 * 4

        C = Chunker({"box": self.BOX3d}, chunks={"lon": 2, "lat": 3, "dpt": 4})
        assert all([is_box(chunk) for chunk in C.fit_transform()])
        assert len(C.fit_transform()) == 2 * 3 * 4

        with pytest.raises(ValueError):
            Chunker({"box": self.BOX3d}, chunks=["lon", 1])

        C = Chunker({"box": self.BOX3d})
        assert isinstance(C.this_chunker, types.FunctionType) or isinstance(
            C.this_chunker, types.MethodType)
Ejemplo n.º 9
0
    def test_chunk_wmo(self):
        C = Chunker({"wmo": self.WMO})
        assert all([
            all(isinstance(x, int) for x in chunk)
            for chunk in C.fit_transform()
        ])

        C = Chunker({"wmo": self.WMO}, chunks="auto")
        assert all([
            all(isinstance(x, int) for x in chunk)
            for chunk in C.fit_transform()
        ])

        C = Chunker({"wmo": self.WMO}, chunks={"wmo": 1})
        assert all([
            all(isinstance(x, int) for x in chunk)
            for chunk in C.fit_transform()
        ])
        assert len(C.fit_transform()) == 1

        with pytest.raises(ValueError):
            Chunker({"wmo": self.WMO}, chunks=["wmo", 1])

        C = Chunker({"wmo": self.WMO})
        assert isinstance(C.this_chunker, types.FunctionType) or isinstance(
            C.this_chunker, types.MethodType)
Ejemplo n.º 10
0
 def test_invalid_chunksize(self):
     with pytest.raises(ValueError):
         Chunker({"box": self.BOX3d}, chunksize='toto')
Ejemplo n.º 11
0
 def test_InvalidFetcherAccessPoint(self):
     with pytest.raises(InvalidFetcherAccessPoint):
         Chunker({"invalid": self.WMO})