예제 #1
0
    def test_time_start_only(self):
        da = open_dataset(self.nc_poslons).tas
        yr_st = "2050"

        # start date only
        with pytest.warns(None):
            out = subset.subset_time(da, start_date=f"{yr_st}-01")
        np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
        np.testing.assert_array_equal(out.time.dt.year.max(),
                                      da.time.dt.year.max())

        with pytest.warns(None):
            out = subset.subset_time(da, start_date=f"{yr_st}-07")
        np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
        np.testing.assert_array_equal(out.time.min().dt.month, 7)
        np.testing.assert_array_equal(out.time.dt.year.max(),
                                      da.time.dt.year.max())
        np.testing.assert_array_equal(out.time.max(), da.time.max())

        with pytest.warns(None):
            out = subset.subset_time(da, start_date=f"{yr_st}-07-15")
        np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
        np.testing.assert_array_equal(out.time.min().dt.month, 7)
        np.testing.assert_array_equal(out.time.min().dt.day, 15)
        np.testing.assert_array_equal(out.time.dt.year.max(),
                                      da.time.dt.year.max())
        np.testing.assert_array_equal(out.time.max(), da.time.max())
예제 #2
0
    def test_warnings(self):
        da = xr.open_dataset(self.nc_poslons).tas

        with pytest.raises(ValueError) as record:
            subset.subset_time(da, start_date="2059", end_date="2050")

        with pytest.raises(TypeError):
            subset.subset_time(da, start_yr=2050, end_yr=2059)

        with pytest.warns(None) as record:
            subset.subset_time(
                da,
                start_date=2050,
                end_date=2055,
            )
        assert (
            'start_date and end_date require dates in (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".'
            in [str(q.message) for q in record])

        with pytest.warns(None) as record:
            subset.subset_time(da,
                               start_date="2064-01-01T00:00:00",
                               end_date="2065-02-01T03:12:01")
        assert [str(q.message) for q in record] == [
            '"start_date" has been nudged to nearest valid time step in xarray object.',
            '"end_date" has been nudged to nearest valid time step in xarray object.',
        ]
예제 #3
0
    def test_simple(self):
        da = open_dataset(self.nc_poslons).tas
        yr_st = "2050"
        yr_ed = "2059"

        out = subset.subset_time(da, start_date=yr_st, end_date=yr_ed)
        out1 = subset.subset_time(da,
                                  start_date=f"{yr_st}-01",
                                  end_date=f"{yr_ed}-12")
        out2 = subset.subset_time(da,
                                  start_date=f"{yr_st}-01-01",
                                  end_date=f"{yr_ed}-12-31")
        np.testing.assert_array_equal(out, out1)
        np.testing.assert_array_equal(out, out2)
        np.testing.assert_array_equal(len(np.unique(out.time.dt.year)), 10)
        np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed))
        np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
예제 #4
0
    def test_time_incomplete_years(self):
        da = open_dataset(self.nc_poslons).tas
        yr_st = "2050"
        yr_ed = "2059"

        out = subset.subset_time(da,
                                 start_date=f"{yr_st}-07-01",
                                 end_date=f"{yr_ed}-06-30")
        out1 = subset.subset_time(da,
                                  start_date=f"{yr_st}-07",
                                  end_date=f"{yr_ed}-06")
        np.testing.assert_array_equal(out, out1)
        np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
        np.testing.assert_array_equal(out.time.min().dt.month, 7)
        np.testing.assert_array_equal(out.time.min().dt.day, 1)
        np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed))
        np.testing.assert_array_equal(out.time.max().dt.month, 6)
        np.testing.assert_array_equal(out.time.max().dt.day, 30)
예제 #5
0
    def test_time_end_only(self):
        da = open_dataset(self.nc_poslons).tas
        yr_ed = "2059"

        # end date only
        with pytest.warns(None):
            out = subset.subset_time(da, end_date=f"{yr_ed}-01")
        np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed))
        np.testing.assert_array_equal(out.time.max().dt.month, 1)
        np.testing.assert_array_equal(out.time.max().dt.day, 31)
        np.testing.assert_array_equal(out.time.min(), da.time.min())

        with pytest.warns(None):
            out = subset.subset_time(da, end_date=f"{yr_ed}-06-15")
        np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed))
        np.testing.assert_array_equal(out.time.max().dt.month, 6)
        np.testing.assert_array_equal(out.time.max().dt.day, 15)
        np.testing.assert_array_equal(out.time.min(), da.time.min())
예제 #6
0
    def test_time_dates_outofbounds(self):
        da = open_dataset(self.nc_poslons).tas
        yr_st = "1776"
        yr_ed = "2077"

        with pytest.warns(None) as record:
            out = subset.subset_time(da,
                                     start_date=f"{yr_st}-01",
                                     end_date=f"{yr_ed}-01")
        np.testing.assert_array_equal(out.time.dt.year.min(),
                                      da.time.dt.year.min())
        np.testing.assert_array_equal(out.time.dt.year.max(),
                                      da.time.dt.year.max())

        assert (
            '"start_date" not found within input date time range. Defaulting to minimum time step in xarray object.'
            in [str(q.message) for q in record])
        assert (
            '"end_date" not found within input date time range. Defaulting to maximum time step in xarray object.'
            in [str(q.message) for q in record])
예제 #7
0
    def test_warnings(self):
        da = open_dataset(self.nc_poslons).tas

        with pytest.raises(ValueError) as record:
            subset.subset_time(da, start_date="2059", end_date="2050")

        with pytest.raises(TypeError):
            subset.subset_time(da, start_yr=2050, end_yr=2059)

        with pytest.warns(None) as record:
            subset.subset_time(
                da,
                start_date=2050,
                end_date=2055,
            )
        assert (
            'start_date and end_date require dates in (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".'
            in [str(q.message) for q in record])
예제 #8
0
파일: subset.py 프로젝트: bird-house/finch
def finch_average_shape(
    process: Process,
    netcdf_inputs: List[ComplexInput],
    request_inputs: RequestInputs,
) -> List[Path]:
    """Parse wps `request_inputs` based on their name and average `netcdf_inputs`.

    The expected names of the request_inputs are as followed (taken from `wpsio.py`):
     - shape: Polygon contour to average the data over.
     - start_date: Initial date for temporal subsetting.
     - end_date: Final date for temporal subsetting.
    """
    shp = Path(request_inputs[wpsio.shape.identifier][0].file)
    if shp.suffix == ".zip":
        shp = extract_shp(shp)

    start_date = single_input_or_none(request_inputs,
                                      wpsio.start_date.identifier)
    end_date = single_input_or_none(request_inputs, wpsio.end_date.identifier)
    tolerance = single_input_or_none(request_inputs,
                                     wpsio.tolerance.identifier)
    variables = [r.data for r in request_inputs.get("variable", [])]

    shape = gpd.read_file(shp)
    if tolerance > 0:
        shape['geometry'] = shape.simplify(tolerance)

    n_files = len(netcdf_inputs)
    count = 0

    output_files = []

    for resource in netcdf_inputs:
        # if not subsetting by time, it's not necessary to decode times
        time_subset = start_date is not None or end_date is not None
        dataset = try_opendap(resource,
                              decode_times=time_subset,
                              chunk_dims=['time', 'realization'])

        count += 1
        write_log(
            process,
            f"Averaging file {count} of {n_files} ({getattr(resource, resource.prop)})",
            subtask_percentage=(count - 1) * 100 // n_files,
        )

        dataset = dataset[variables] if variables else dataset

        if time_subset:
            dataset = subset_time(dataset,
                                  start_date=start_date,
                                  end_date=end_date)
        averaged = average_shape(dataset, shape)

        if not all(averaged.dims.values()):
            LOGGER.warning(f"Average is empty for dataset: {resource.url}")
            return

        p = make_subset_file_name(resource, kind="avg")
        output_filename = Path(process.workdir) / p

        dataset_to_netcdf(averaged, output_filename)

        output_files.append(output_filename)

    return output_files