Esempio n. 1
0
def download_all_nc(turl, folder):
    nc_urls = get_nc_urls(turl, download=True)

    jobs = [gevent.spawn(download_nc, url, folder) for url in nc_urls]
    gevent.joinall(jobs, timeout=300)
    ncfiles = [job.value for job in jobs]
    return ncfiles
Esempio n. 2
0
def fetch_xr(params, **kwargs):
    turl, ref_degs = params
    datasets = get_nc_urls(turl)
    # only include instruments where ref_deg appears twice (i.e. was in original filter)
    filt_ds = list(filter(lambda x: any(x.count(ref) > 1 for ref in ref_degs), datasets))
    return xr.open_mfdataset(
        filt_ds,
        preprocess=preprocess_ds,
        decode_times=False,
        **kwargs)
Esempio n. 3
0
def fetch_xr(params, **kwargs):
    turl, ref_degs = params
    if kwargs.get("cloud_source"):
        filt_ds = get_nc_urls(
            turl,
            cloud_source=True,
            begin_date=kwargs.get("begin_date"),
            end_date=kwargs.get("end_date"),
        )
        # cleanup kwargs
        kwargs.pop("begin_date")
        kwargs.pop("end_date")
        kwargs.pop("cloud_source")
    else:
        datasets = get_nc_urls(turl)
        # only include instruments where ref_deg appears twice (i.e. was in original filter)
        filt_ds = list(
            filter(lambda x: any(x.count(ref) > 1 for ref in ref_degs),
                   datasets))

    # TODO: Place some chunking here
    return xr.open_mfdataset(filt_ds, engine="netcdf4", **kwargs)
Esempio n. 4
0
    def to_xarray(self, **kwargs):
        """
        Retrieve the OOI streams data and export to Xarray Datasets, saving in memory.

        Args:
            **kwargs: Keyword arguments for xarray open_mfdataset.

        Returns:
            list: List of xarray datasets
        """
        ref_degs = self._filtered_data_catalog["reference_designator"].values
        dataset_list = []
        if self._data_type == "netcdf":
            if not self._cloud_source:
                if self._raw_file_dict:
                    mvbsnc_list = perform_ek60_processing(self._raw_file_dict)
                    for k, v in mvbsnc_list.items():
                        resdf = xr.open_mfdataset(
                            v,
                            concat_dim=["ping_time"],
                            combine="nested",
                            **kwargs,
                        )
                        resdf.attrs["id"] = k
                        dataset_list.append(resdf)
                turls = self._perform_check()

                if len(turls) > 0:
                    # TODO: Cache netcdf urls so that no need to re-request data
                    self._netcdf_urls = [get_nc_urls(turl) for turl in turls]
                    logger.info("Acquiring data from opendap urls ...")
                    jobs = [
                        gevent.spawn(fetch_xr, (url, ref_degs), **kwargs)
                        for url in turls
                    ]
                    gevent.joinall(jobs, timeout=300)
                    for job in jobs:
                        dataset_list.append(job.value)
        else:
            self._logger.warning(
                f"{self._data_type} cannot be converted to xarray dataset"
            )  # noqa

        if dataset_list:
            self._dataset_list = dataset_list

        return self._dataset_list
Esempio n. 5
0
def test_get_nc_urls():
    thredds_url = ("https://opendap.oceanobservatories.org/thredds"
                   "/catalog/ooi/[email protected]/20180606T232135"
                   "-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_"
                   "optode_sample/catalog.html")

    dataset_urls = parser.get_nc_urls(thredds_url=thredds_url)
    result_test = [
        "https://opendap.oceanobservatories.org/thredds"
        "/dodsC/ooi/[email protected]/20180606T232135"
        "-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_"
        "optode_sample/deployment0004_RS03AXPS-PC03A-4A"
        "-CTDPFA303-streamed-ctdpf_optode_sample_20180101T000000."
        "596438-20180131T235959.815406.nc"
    ]

    assert isinstance(dataset_urls, list)
    assert dataset_urls == result_test
Esempio n. 6
0
def fetch_xr(turl, **kwargs):
    datasets = get_nc_urls(turl)
    return xr.open_mfdataset(datasets,
                             preprocess=preprocess_ds,
                             decode_times=False,
                             **kwargs)