コード例 #1
0
    def __init__(
        self,
        filename,
        product,
        parameter=["swvl1", "swvl2"],
        subgrid=None,
        mask_seapoints=False,
        array_1D=False,
        mode="r",
    ):

        super(ERANcImg, self).__init__(filename, mode=mode)

        if type(parameter) == str:
            parameter = [parameter]

        # look up short names
        self.parameter = lookup(product, parameter)["short_name"].values

        self.mask_seapoints = mask_seapoints
        self.array_1D = array_1D
        self.subgrid = subgrid

        if self.subgrid and not self.array_1D:
            warnings.warn(
                "Reading spatial subsets as 2D arrays ony works if there "
                "is an equal number of points in each line")
コード例 #2
0
ファイル: interface.py プロジェクト: sciencewiki/ecmwf_models
    def __init__(self,
                 filename,
                 product,
                 parameter=['swvl1', 'swvl2'],
                 mode='r',
                 subgrid=None,
                 mask_seapoints=False,
                 array_1D=False):

        super(ERANcImg, self).__init__(filename, mode=mode)

        if type(parameter) == str:
            parameter = [parameter]

        # look up short names
        self.parameter = lookup(product, parameter)['short_name'].values

        self.mask_seapoints = mask_seapoints
        self.array_1D = array_1D
        self.subgrid = subgrid
コード例 #3
0
    def __init__(
        self,
        filename,
        product,
        parameter=("swvl1", "swvl2"),
        subgrid=None,
        mask_seapoints=False,
        array_1D=True,
        mode="r",
    ):

        super(ERAGrbImg, self).__init__(filename, mode=mode)

        if type(parameter) == str:
            parameter = [parameter]

        self.parameter = lookup(
            product, parameter)["short_name"].values  # look up short names
        self.product = product

        self.mask_seapoints = mask_seapoints
        self.array_1D = array_1D
        self.subgrid = subgrid
コード例 #4
0
    def read(self, timestamp=None):
        """
        Read data from the loaded image file.

        Parameters
        ---------
        timestamp : datetime, optional (default: None)
            Specific date (time) to read the data for.
        """
        grbs = pygrib.open(self.filename)

        grid = self.subgrid

        return_img = {}
        return_metadata = {}

        var_msg_lut = {p: None for p in self.parameter}
        sea_mask = None
        for N in range(grbs.messages):
            n = N + 1
            message = grbs.message(n)
            param_name = str(message.cfVarNameECMF)

            if param_name == "lsm":
                if self.mask_seapoints and sea_mask is None:
                    sea_mask = message.values.flatten()

            if param_name not in self.parameter:
                continue
            else:
                var_msg_lut[param_name] = n

        # available variables
        shape = None
        for param_name, n in var_msg_lut.items():
            if n is None:
                continue

            return_metadata[param_name] = {}

            message = grbs.message(n)

            param_data = message.values.flatten()
            if not shape:
                shape = param_data.shape
            return_img[param_name] = param_data

            if grid is None:
                lats, lons = message.latlons()
                try:
                    res_lat, res_lon = get_grid_resolution(lats, lons)
                    grid = ERA_RegularImgGrid(res_lat, res_lon)
                except ValueError:  # when grid not regular
                    lons_gt_180 = np.where(lons > 180.0)
                    lons[lons_gt_180] = lons[lons_gt_180] - 360
                    grid = ERA_IrregularImgGrid(lons, lats)

            return_metadata[param_name]["units"] = message["units"]
            return_metadata[param_name]["long_name"] = \
                message["parameterName"]

            if "levels" in message.keys():
                return_metadata[param_name]["depth"] = "{:} cm".format(
                    message["levels"])

        if self.mask_seapoints:
            if sea_mask is None:
                raise IOError(
                    "No land sea mask parameter (lsm) in passed image"
                    " for masking.")
            else:
                # mask the loaded data
                for name in return_img.keys():
                    param_data = return_img[name]
                    param_data = np.ma.array(
                        param_data,
                        mask=np.logical_not(sea_mask),
                        fill_value=np.nan,
                    )
                    param_data = param_data.filled()
                    return_img[name] = param_data

        grbs.close()

        # missing variables
        for param_name, n in var_msg_lut.items():
            if n is not None:
                continue
            param_data = np.full(shape, np.nan)
            warnings.warn("Cannot load variable {var} from file {thefile}. "
                          "Filling image with NaNs.".format(
                              var=param_name, thefile=self.filename))
            return_img[param_name] = param_data
            return_metadata[param_name] = {}
            return_metadata[param_name]["long_name"] = lookup(
                self.product, [param_name]).iloc[0]["long_name"]

        if self.array_1D:
            return Image(
                grid.activearrlon,
                grid.activearrlat,
                return_img,
                return_metadata,
                timestamp,
            )
        else:
            nlat = np.unique(grid.activearrlat).size
            nlon = np.unique(grid.activearrlon).size

            for key in return_img:
                return_img[key] = return_img[key].reshape((nlat, nlon))

            return Image(
                grid.activearrlon.reshape(nlat, nlon),
                grid.activearrlat.reshape(nlat, nlon),
                return_img,
                return_metadata,
                timestamp,
            )
コード例 #5
0
ファイル: download.py プロジェクト: wpreimes/ecmwf_models
def download_and_move(
    target_path,
    startdate,
    enddate,
    product="era5",
    variables=None,
    keep_original=False,
    h_steps=[0, 6, 12, 18],
    grb=False,
    dry_run=False,
    grid=None,
    remap_method="bil",
    cds_kwds={},
    stepsize="month",
) -> int:
    """
    Downloads the data from the ECMWF servers and moves them to the target
    path.
    This is done in 30 day increments between start and end date.

    The files are then extracted into separate grib files per parameter and
    stored in yearly folders under the target_path.

    Parameters
    ----------
    target_path : str
        Path where the files are stored to
    startdate: datetime
        first date to download
    enddate: datetime
        last date to download
    product : str, optional (default: ERA5)
        Either ERA5 or ERA5Land
    variables : list, optional (default: None)
        Name of variables to download
    keep_original: bool
        keep the original downloaded data
    h_steps: list
        List of full hours to download data at the selected dates e.g [0, 12]
    grb: bool, optional (default: False)
        Download data as grib files
    dry_run: bool
        Do not download anything, this is just used for testing the functions
    grid : dict, optional
        A grid on which to remap the data using CDO. This must be a dictionary
        using CDO's grid description format, e.g.::

            grid = {
                "gridtype": "lonlat",
                "xsize": 720,
                "ysize": 360,
                "xfirst": -179.75,
                "yfirst": 89.75,
                "xinc": 0.5,
                "yinc": -0.5,
            }

        Default is to use no regridding.
    remap_method : str, optional
        Method to be used for regridding. Available methods are:
        - "bil": bilinear (default)
        - "bic": bicubic
        - "nn": nearest neighbour
        - "dis": distance weighted
        - "con": 1st order conservative remapping
        - "con2": 2nd order conservative remapping
        - "laf": largest area fraction remapping
    cds_kwds: dict, optional
        Additional arguments to be passed to the CDS API retrieve request.
    stepsize : str, optional
        Size of steps for requests, can be "month" or "day".

    Returns
    -------
    status_code: int
        0 : Downloaded data ok
        -1 : Error
        -10 : No data available for requested time period
    """
    product = product.lower()

    if variables is None:
        variables = default_variables(product=product)
    else:
        # find the dl_names
        variables = lookup(name=product, variables=variables)
        variables = variables["dl_name"].values.tolist()

    curr_start = startdate

    if dry_run:
        warnings.warn("Dry run does not create connection to CDS")
        c = None
        cds_status_tracker = None
    else:
        cds_status_tracker = CDSStatusTracker()
        c = cdsapi.Client(
            error_callback=cds_status_tracker.handle_error_function)

    pool = multiprocessing.Pool(1)
    while curr_start <= enddate:
        status_code = -1
        sy, sm, sd = curr_start.year, curr_start.month, curr_start.day
        y, m = sy, sm
        if stepsize == "month":
            sm_days = calendar.monthrange(sy,
                                          sm)[1]  # days in the current month
            if (enddate.year == y) and (enddate.month == m):
                d = enddate.day
            else:
                d = sm_days
        elif stepsize == "day":
            d = sd
        else:
            raise ValueError(f"Invalid stepsize: {stepsize}")

        curr_end = datetime(y, m, d)

        fname = "{start}_{end}.{ext}".format(
            start=curr_start.strftime("%Y%m%d"),
            end=curr_end.strftime("%Y%m%d"),
            ext="grb" if grb else "nc",
        )

        downloaded_data_path = os.path.join(target_path, "temp_downloaded")
        if not os.path.exists(downloaded_data_path):
            os.mkdir(downloaded_data_path)
        dl_file = os.path.join(downloaded_data_path, fname)

        finished, i = False, 0

        while (not finished) and (i < 5):  # try max 5 times
            try:
                finished = download_era5(
                    c,
                    years=[sy],
                    months=[sm],
                    days=range(sd, d + 1),
                    h_steps=h_steps,
                    variables=variables,
                    grb=grb,
                    product=product,
                    target=dl_file,
                    dry_run=dry_run,
                    cds_kwds=cds_kwds,
                )
                status_code = 0
                break

            except:  # noqa: E722
                # If no data is available we don't need to retry
                if (cds_status_tracker.download_statuscode ==
                        CDSStatusTracker.statuscode_unavailable):
                    status_code = -10
                    break

                # delete the partly downloaded data and retry
                if os.path.isfile(dl_file):
                    os.remove(dl_file)
                finished = False
                i += 1
                continue

        if status_code == 0:
            if grb:
                pool.apply_async(
                    save_gribs_from_grib,
                    args=(dl_file, target_path),
                    kwds=dict(
                        product_name=product.upper(),
                        keep_original=keep_original,
                    ),
                )
            else:
                pool.apply_async(
                    save_ncs_from_nc,
                    args=(
                        dl_file,
                        target_path,
                    ),
                    kwds=dict(
                        product_name=product.upper(),
                        grid=grid,
                        remap_method=remap_method,
                        keep_original=keep_original,
                    ),
                )

        curr_start = curr_end + timedelta(days=1)
    pool.close()
    pool.join()

    # remove temporary files
    if not keep_original:
        shutil.rmtree(downloaded_data_path)
    if grid is not None:
        gridpath = os.path.join(target_path, "grid.txt")
        if os.path.exists(gridpath):
            os.unlink(gridpath)
        weightspath = os.path.join(target_path, "remap_weights.nc")
        if os.path.exists(weightspath):
            os.unlink(weightspath)

    return status_code
コード例 #6
0
def download_and_move(
        target_path,
        startdate,
        enddate,
        variables=None,
        keep_original=False,
        grid_size=None,
        type="an",
        h_steps=(0, 6, 12, 18),
        steps=(0, ),
        grb=False,
        dry_run=False,
):
    """
    Downloads the data from the ECMWF servers and moves them to the target
    path. This is done in 30 days increments between start and end date to
    be efficient with the MARS system.
    See the recommendation for doing it this way in
    https://software.ecmwf.int/wiki/display/WEBAPI/ERA-Interim+daily+retrieval+efficiency

    The files are then extracted into separate grib/nc files and stored in
    yearly folders under the target_path.

    Parameters
    ----------
    target_path: str
        Path to which to copy the extracted parameter files
    startdate: datetime
        First date to download
    enddate: datetime
        Last date to download
    variables : list, optional (default: None)
        List of variable ids to pass to the client, if None are passed,
        the default variable ids will be downloaded.
    keep_original: bool, optional (default: False)
        Keep the original downloaded data
    grid_size: list, optional (default: None)
        [lon, lat] extent of the grid (regular for netcdf, at lat=0 for grib)
        If None is passed, the default grid size for the data product is used.
    type : str, optional (default: 'an')
        Data stream, model to download data for (fc=forecase)
    h_steps: list, optional (default: [0, 6, 12, 18])
        List of full hours to download data at the selected dates
    grb: bool, optional (default: False)
        Download data as grib files instead of netcdf files
    dry_run: bool
        Do not download anything, this is just used for testing the functions
    """
    product = "eraint"
    if variables is None:
        variables = default_variables()
    else:
        # find the dl_names
        variables = lookup(name=product, variables=variables)
        variables = variables["dl_name"].values.tolist()

    td = timedelta(days=30)
    current_start = startdate

    while current_start <= enddate:
        current_end = current_start + td
        if current_end >= enddate:
            current_end = enddate

        fname = "{start}_{end}.{ext}".format(
            start=current_start.strftime("%Y%m%d"),
            end=current_end.strftime("%Y%m%d"),
            ext="grb" if grb else "nc",
        )

        downloaded_data_path = os.path.join(target_path, "temp_downloaded")
        if not os.path.exists(downloaded_data_path):
            os.mkdir(downloaded_data_path)

        dl_file = os.path.join(downloaded_data_path, fname)

        download_eraint(
            dl_file,
            current_start,
            current_end,
            variables,
            grid_size=grid_size,
            h_steps=h_steps,
            type=type,
            steps=steps,
            grb=grb,
            dry_run=dry_run,
        )

        if grb:
            save_gribs_from_grib(dl_file, target_path, product.upper())
        else:
            save_ncs_from_nc(dl_file, target_path, product.upper())

        if not keep_original:
            shutil.rmtree(downloaded_data_path)
        current_start = current_end + timedelta(days=1)