コード例 #1
0
ファイル: compute.py プロジェクト: shivam14300/hydro-drought
def scale_values(
    values: np.ndarray,
    scale: int,
    periodicity: Periodicity,
):

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = "Invalid shape of input array: {shape}".format(shape=shape) + \
                  " -- only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(
            np.isnan(values)):
        return values

    # clip any negative values to zero
    if np.amin(values) < 0.0:
        _logger.warn(
            "Input contains negative values -- all negatives clipped to zero")
        values = np.clip(values, a_min=0.0, a_max=None)

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    scaled_values = sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is Periodicity.monthly:

        scaled_values = utils.reshape_to_2d(scaled_values, 12)

    elif periodicity is Periodicity.daily:

        scaled_values = utils.reshape_to_2d(scaled_values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    return scaled_values
コード例 #2
0
def _validate_array(
    values: np.ndarray,
    periodicity: Periodicity,
) -> np.ndarray:
    """

    :param values:
    :param periodicity:
    :return:
    """

    # validate (and possibly reshape) the input array
    if len(values.shape) == 1:

        if periodicity is None:
            message = "1-D input array requires a corresponding periodicity "\
                      "argument, none provided"
            _logger.error(message)
            raise ValueError(message)

        elif periodicity is Periodicity.monthly:
            # we've been passed a 1-D array with shape (months),
            # reshape it to 2-D with shape (years, 12)
            values = utils.reshape_to_2d(values, 12)

        elif periodicity is Periodicity.daily:
            # we've been passed a 1-D array with shape (days),
            # reshape it to 2-D with shape (years, 366)
            values = utils.reshape_to_2d(values, 366)

        else:
            message = "Unsupported periodicity argument: '{0}'".format(
                periodicity)
            _logger.error(message)
            raise ValueError(message)

    elif (len(values.shape) != 2) or \
            ((values.shape[1] != 12) and (values.shape[1] != 366)):

        # neither a 1-D nor a 2-D array with valid shape was passed in
        message = "Invalid input array with shape: {0}".format(values.shape)
        _logger.error(message)
        raise ValueError(message)

    return values
コード例 #3
0
ファイル: eto.py プロジェクト: yusuke61/climate_indices
def eto_hargreaves(daily_tmin_celsius: np.ndarray,
                   daily_tmax_celsius: np.ndarray,
                   daily_tmean_celsius: np.ndarray,
                   latitude_degrees: float):
    """
    Compute daily potential evapotranspiration (PET) using the Hargreaves
    (1985) method. Based on equation 52 in Allen et al (1998).

    Input arrays are assumed to be 1-D (an arbitrary number of days) or 2-D
    (years x 366 days per year).

    :param daily_tmin_celsius: array of daily minimum temperature values,
        in degrees Celsius
    :param daily_tmax_celsius: array of daily maximum temperature values,
        in degrees Celsius
    :param daily_tmean_celsius: array of daily mean temperature values,
        in degrees Celsius
    :param latitude_degrees: latitude of location, in degrees north
    :return: potential evapotranspiration over grass (ETo), in millimeters per day
    """

    # validate the input data arrays
    if daily_tmin_celsius.size != daily_tmax_celsius != daily_tmean_celsius:
        message = "Incompatible array sizes"
        _logger.error(message)
        raise ValueError(message)

    # keep the original length for conversion back to original size
    original_length = daily_tmean_celsius.size

    # reshape to 2-D with 366 days per year, if not already in this shape
    daily_tmean_celsius = utils.reshape_to_2d(daily_tmean_celsius, 366)

    # at this point we assume that our dataset array has shape (years, 366)
    # where each row is a year with 366 columns of daily values

    # convert the latitude from degrees to radians
    latitude = math.radians(latitude_degrees)

    # allocate the PET array we'll fill
    pet = np.full(daily_tmean_celsius.shape, np.NaN)
    for day_of_year in range(1, daily_tmean_celsius.shape[1] + 1):

        # calculate the angle of solar declination and sunset hour angle
        solar_declination = _solar_declination(day_of_year)
        sunset_hour_angle = _sunset_hour_angle(latitude, solar_declination)

        # calculate the inverse relative distance between earth and sun
        # from the day of the year, based on FAO equation 23 in
        # Allen et al (1998).
        inv_rel_distance = 1 + (0.033 * math.cos((2.0 * math.pi / 365.0) * day_of_year))

        # extraterrestrial radiation
        tmp1 = (24.0 * 60.0) / math.pi
        tmp2 = sunset_hour_angle * math.sin(latitude) * math.sin(solar_declination)
        tmp3 = (
            math.cos(latitude)
            * math.cos(solar_declination)
            * math.sin(sunset_hour_angle)
        )
        et_radiation = tmp1 * _SOLAR_CONSTANT * inv_rel_distance * (tmp2 + tmp3)

        for year in range(daily_tmean_celsius.shape[0]):

            # calculate the Hargreaves equation
            tmin = daily_tmin_celsius[year, day_of_year - 1]
            tmax = daily_tmax_celsius[year, day_of_year - 1]
            tmean = daily_tmean_celsius[year, day_of_year - 1]
            pet[year, day_of_year - 1] = (
                0.0023 * (tmean + 17.8) * (tmax - tmin) ** 0.5 * 0.408 * et_radiation
            )

    # reshape the dataset from (years, 366) into (total days),
    # i.e. convert from 2-D to 1-D, and truncate to the original length
    return pet.reshape(-1)[0:original_length]
コード例 #4
0
ファイル: eto.py プロジェクト: yusuke61/climate_indices
def eto_thornthwaite(monthly_temps_celsius: np.ndarray,
                     latitude_degrees: float,
                     data_start_year: int):
    """
    Compute monthly potential evapotranspiration (PET) using the
    Thornthwaite (1948) method.

    Thornthwaite's equation:

        *PET* = 1.6 (*L*/12) (*N*/30) (10*Ta* / *I*)***a*

    where:

    * *Ta* is the mean daily air temperature, in degrees Celsius (if negative
        then use 0.0), of the month being calculated
    * *N* is the number of days in the month being calculated
    * *L* is the mean day length, in hours, of the month being calculated
    * *a* = (6.75 x 10-7)*I***3 - (7.71 x 10-5)*I***2 + (1.792 x 10-2)*I* + 0.49239
    * *I* is a heat index which depends on the 12 monthly mean temperatures and
        is calculated as the sum of (*Tai* / 5)**1.514 for each month, where
        *Tai* is the air temperature for each month in the year

    Reference:
    Thornthwaite, C.W. (1948) An approach toward a rational classification
    of climate. Geographical Review, Vol. 38, 55-94.
    https://www.jstor.org/stable/210739

    :param monthly_temps_celsius: array containing a time series (monthly time
        steps) of mean daily air temperatures in degrees Celsius. This input
        dataset is assumed to start at January of the initial year, and can have
        any length. Both 1-D (months) and 2-D (years, 12) input datasets
        are supported.
    :param latitude_degrees: latitude of the location, in degrees north (-90..90)
    :param data_start_year: year corresponding to the start of the dataset
    :return: estimated potential evapotranspiration, in millimeters/month
    :rtype: 1-D numpy.ndarray of floats with shape: (total # of months)

    """

    original_length = monthly_temps_celsius.size

    # validate the input data array
    monthly_temps_celsius = utils.reshape_to_2d(monthly_temps_celsius, 12)

    # at this point we assume that our dataset array has shape (years, 12) where
    # each row is a year with 12 columns of monthly values (Jan, Feb, ..., Dec)

    # convert the latitude from degrees to radians
    latitude_radians = math.radians(latitude_degrees)

    # adjust negative temperature values to zero, since negative
    # values aren't allowed (no evaporation below freezing)
    # TODO this sometimes throws a RuntimeWarning for invalid value,
    #  perhaps as a result of a NaN, somehow use masking and/or NaN
    #  pre-check to eliminate the cause of this warning
    monthly_temps_celsius[monthly_temps_celsius < 0] = 0.0

    # mean the monthly temperature values over the month axis,
    # giving us 12 monthly means for the period of record
    mean_monthly_temps = np.nanmean(monthly_temps_celsius, axis=0)

    # calculate the heat index (I)
    heat_index = np.sum(np.power(mean_monthly_temps / 5.0, 1.514))

    # calculate the a coefficient
    a = ((6.75e-07 * heat_index ** 3)
         - (7.71e-05 * heat_index ** 2)
         + (1.792e-02 * heat_index)
         + 0.49239)

    # get mean daylight hours for both normal and leap years
    mean_daylight_hours_nonleap = \
        np.array(_monthly_mean_daylight_hours(latitude_radians, False))
    mean_daylight_hours_leap = \
        np.array(_monthly_mean_daylight_hours(latitude_radians, True))

    # allocate the PET array we'll fill
    pet = np.full(monthly_temps_celsius.shape, np.NaN)
    for year in range(monthly_temps_celsius.shape[0]):

        if calendar.isleap(data_start_year + year):
            month_days = _MONTH_DAYS_LEAP
            mean_daylight_hours = mean_daylight_hours_leap
        else:
            month_days = _MONTH_DAYS_NONLEAP
            mean_daylight_hours = mean_daylight_hours_nonleap

        # calculate the Thornthwaite equation
        pet[year, :] = (
            16
            * (mean_daylight_hours / 12.0)
            * (month_days / 30.0)
            * ((10.0 * monthly_temps_celsius[year, :] / heat_index) ** a)
        )

    # reshape the dataset from (years, 12) into (months),
    # i.e. convert from 2-D to 1-D, and truncate to the original length
    return pet.reshape(-1)[0:original_length]
コード例 #5
0
ファイル: indices.py プロジェクト: yusuke61/climate_indices
def spi(values: np.ndarray,
        scale: int,
        distribution,
        data_start_year: int,
        calibration_year_initial: int,
        calibration_year_final: int,
        periodicity):
    """
    Computes SPI (Standardized Precipitation Index).

    :param values: 1-D numpy array of precipitation values, in any units,
        first value assumed to correspond to January of the initial year if
        the periodicity is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled
        before the index is computed
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :return SPI values fitted to the gamma distribution at the specified time
        step scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array
        of precipitation values
    """

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = f"Invalid shape of input array: {shape} -- " + \
                  "only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)):
        return values

    # remember the original length of the array, in order to facilitate
    # returning an array of the same size
    original_length = values.size

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    values = compute.sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is compute.Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is compute.Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    if distribution is Distribution.gamma:

        # fit the scaled values to a gamma distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_gamma(values,
                                                data_start_year,
                                                calibration_year_initial,
                                                calibration_year_final,
                                                periodicity)
    elif distribution is Distribution.pearson:

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_pearson(values,
                                                  data_start_year,
                                                  calibration_year_initial,
                                                  calibration_year_final,
                                                  periodicity)

    else:

        message = f"Unsupported distribution argument: {distribution}"
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
コード例 #6
0
ファイル: compute.py プロジェクト: nsashi/climate_indices
def transform_fitted_gamma(values, data_start_year, calibration_start_year,
                           calibration_end_year, periodicity):
    """
    Fit values to a gamma distribution and transform the values to corresponding normalized sigmas.

    :param values: 2-D array of values, with each row typically representing a year containing
                   twelve columns representing the respective calendar months, or 366 days per column
                   as if all years were leap years
    :param data_start_year: the initial year of the input values array
    :param calibration_start_year: the initial year to use for the calibration period
    :param calibration_end_year: the final year to use for the calibration period
    :param periodicity: the type of time series represented by the input data, valid values are 'monthly' or 'daily'
                             'monthly': array of monthly values, assumed to span full years, i.e. the first value
                             corresponds to January of the initial year and any missing final months of the final
                             year filled with NaN values, with size == # of years * 12
                             'daily': array of full years of daily values with 366 days per year, as if each year were
                             a leap year and any missing final months of the final year filled with NaN values,
                             with array size == (# years * 366)
    :return: 2-D array of transformed/fitted values, corresponding in size and shape of the input array
    :rtype: numpy.ndarray of floats
    """

    # if we're passed all missing values then we can't compute anything, return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(
            np.isnan(values)):
        return values

    # validate (and possibly reshape) the input array
    if len(values.shape) == 1:

        if periodicity is None:
            message = '1-D input array requires a corresponding periodicity argument, none provided'
            _logger.error(message)
            raise ValueError(message)

        elif periodicity is Periodicity.monthly:
            # we've been passed a 1-D array with shape (months), reshape it to 2-D with shape (years, 12)
            values = utils.reshape_to_2d(values, 12)

        elif periodicity is Periodicity.daily:
            # we've been passed a 1-D array with shape (days), reshape it to 2-D with shape (years, 366)
            values = utils.reshape_to_2d(values, 366)

        else:
            message = 'Unsupported periodicity argument: \'{0}\''.format(
                periodicity)
            _logger.error(message)
            raise ValueError(message)

    elif (len(values.shape) != 2) or (values.shape[1] != 12
                                      and values.shape[1] != 366):

        # neither a 1-D nor a 2-D array with valid shape was passed in
        message = 'Invalid input array with shape: {0}'.format(values.shape)
        _logger.error(message)
        raise ValueError(message)

    # find the percentage of zero values for each time step
    zeros = (values == 0).sum(axis=0)
    probabilities_of_zero = zeros / values.shape[0]

    # replace zeros with NaNs
    values[values == 0] = np.NaN

    # determine the end year of the values array
    data_end_year = data_start_year + values.shape[0]

    # make sure that we have data within the full calibration period, otherwise use the full period of record
    if (calibration_start_year < data_start_year) or (calibration_end_year >
                                                      data_end_year):
        _logger.info(
            'Insufficient data for the specified calibration period ({0}-{1}),'
            .format(calibration_start_year, calibration_end_year) +
            ' instead using the full period of record ({0}-{1})'.format(
                data_start_year, data_end_year))
        calibration_start_year = data_start_year
        calibration_end_year = data_end_year

    # get the year axis indices corresponding to the calibration start and end years
    calibration_begin_index = (calibration_start_year - data_start_year)
    calibration_end_index = (calibration_end_year - data_start_year) + 1

    # get the values for the current calendar time step that fall within the calibration years period
    calibration_values = values[
        calibration_begin_index:calibration_end_index, :]

    # compute the gamma distribution's shape and scale parameters, alpha and beta
    # TODO explain this better
    means = np.nanmean(calibration_values, axis=0)
    log_means = np.log(means)
    logs = np.log(calibration_values)
    mean_logs = np.nanmean(logs, axis=0)
    a = log_means - mean_logs
    alphas = (1 + np.sqrt(1 + 4 * a / 3)) / (4 * a)
    betas = means / alphas

    # find the gamma probability values using the gamma CDF
    gamma_probabilities = scipy.stats.gamma.cdf(values, a=alphas, scale=betas)

    # TODO explain this
    # (normalize including the probability of zero, putting into the range [0..1]?)
    probabilities = probabilities_of_zero + (
        (1 - probabilities_of_zero) * gamma_probabilities)

    # the values we'll return are the values at which the probabilities of a normal distribution
    # are less than or equal to the computed probabilities, as determined by the normal distribution's
    # quantile (or inverse cumulative distribution) function
    return scipy.stats.norm.ppf(probabilities)
コード例 #7
0
ファイル: compute.py プロジェクト: nsashi/climate_indices
def transform_fitted_pearson(values, data_start_year, calibration_start_year,
                             calibration_end_year, periodicity):
    """
    Fit values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas.

    :param values: 2-D array of values, with each row representing a year containing
                   twelve columns representing the respective calendar months, or 366 columns representing days
                   as if all years were leap years
    :param data_start_year: the initial year of the input values array
    :param calibration_start_year: the initial year to use for the calibration period
    :param calibration_end_year: the final year to use for the calibration period
    :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are
                        'monthly' and 'daily'
                        'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first
                        value corresponds to January of the initial year and any missing final months of the final
                        year filled with NaN values, with size == # of years * 12
                        'daily' indicates an array of full years of daily values with 366 days per year, as if each
                        year were a leap year and any missing final months of the final year filled with NaN values,
                        with array size == (# years * 366)
    :return: 2-D array of transformed/fitted values, corresponding in size and shape of the input array
    :rtype: numpy.ndarray of floats
    """

    # if we're passed all missing values then we can't compute anything, return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(
            np.isnan(values)):
        return values

    # validate (and possibly reshape) the input array
    if len(values.shape) == 1:

        if periodicity is None:
            message = '1-D input array requires a corresponding periodicity argument, none provided'
            _logger.error(message)
            raise ValueError(message)

        elif periodicity is Periodicity.monthly:
            # we've been passed a 1-D array with shape (months), reshape it to 2-D with shape (years, 12)
            values = utils.reshape_to_2d(values, 12)

        elif periodicity is Periodicity.daily:
            # we've been passed a 1-D array with shape (days), reshape it to 2-D with shape (years, 366)
            values = utils.reshape_to_2d(values, 366)

        else:
            message = 'Unsupported periodicity argument: \'{0}\''.format(
                periodicity)
            _logger.error(message)
            raise ValueError(message)

    elif (len(values.shape) != 2) or ((values.shape[1] != 12) and
                                      (values.shape[1] != 366)):

        # neither a 1-D nor a 2-D array with valid shape was passed in
        message = 'Invalid input array with shape: {0}'.format(values.shape)
        _logger.error(message)
        raise ValueError(message)

    # determine the end year of the values array
    data_end_year = data_start_year + values.shape[0]

    # make sure that we have data within the full calibration period, otherwise use the full period of record
    if (calibration_start_year < data_start_year) or (calibration_end_year >
                                                      data_end_year):
        _logger.info(
            'Insufficient data for the specified calibration period ({0}-{1}),'
            .format(calibration_start_year, calibration_end_year) +
            ' instead using the full period of record ({0}-{1})'.format(
                data_start_year, data_end_year))
        calibration_start_year = data_start_year
        calibration_end_year = data_end_year

    # get the year axis indices corresponding to the calibration start and end years
    calibration_begin_index = (calibration_start_year - data_start_year)
    calibration_end_index = (calibration_end_year - data_start_year) + 1

    # get the values for the current calendar time step that fall within the calibration years period
    calibration_values = values[
        calibration_begin_index:calibration_end_index, :]

    # compute the values we'll use to fit to the Pearson Type III distribution
    pearson_values = _pearson3_fitting_values(calibration_values)

    pearson_param_1 = pearson_values[1]  # first Pearson Type III parameter
    pearson_param_2 = pearson_values[2]  # second Pearson Type III parameter
    pearson_param_3 = pearson_values[3]  # third Pearson Type III parameter
    probability_of_zero = pearson_values[0]

    # fit each value using the Pearson Type III fitting universal function in a broadcast fashion
    fitted_values = _pearson_fit_ufunc(values, pearson_param_1,
                                       pearson_param_2, pearson_param_3,
                                       probability_of_zero)

    return fitted_values
コード例 #8
0
    def test_reshape_to_2d(self):
        '''
        Test for the utils.reshape_to_2d() function
        '''

        # an array of monthly values
        values_1d = np.array([
            3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4,
            6, 2, 1, 3, 5, 8, 5, 6
        ])

        # the expected rearrangement of the above values from 1-D to 2-D if using 12 as the second axis size
        values_2d_by_12_expected = np.array([
            [3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4],
            [6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2],
            [1, 3, 5, 8, 5, 6, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN]
        ])

        # exercise the function
        values_2d_reshaped = utils.reshape_to_2d(values_1d, 12)

        # verify that the function performed as expected
        np.testing.assert_equal(
            values_2d_by_12_expected, values_2d_reshaped,
            'Not rearranging the 1-D array into 2-D year increments of 12 as expected'
        )

        # the expected rearrangement of the above values from 1-D to 2-D if using 8 as the second axis size
        values_2d_by_8_expected = np.array([[3, 4, 6, 2, 1, 3, 5, 8],
                                            [5, 6, 3, 4, 6, 2, 1, 3],
                                            [5, 8, 5, 6, 3, 4, 6, 2],
                                            [1, 3, 5, 8, 5, 6, np.NaN,
                                             np.NaN]])

        # exercise the function
        values_2d_reshaped = utils.reshape_to_2d(values_1d, 8)

        # verify that the function performed as expected
        np.testing.assert_equal(
            values_2d_by_8_expected, values_2d_reshaped,
            'Not rearranging the 1-D array into 2-D increments of 8 as expected'
        )

        # a 3-D array that should be returned as-is if using 12 as the second axis size
        values_2d = np.array([[3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4],
                              [6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2],
                              [1, 3, 5, 8, 5, 6, 3, 5, 1, 2, 8, 4]])

        # exercise the function
        values_2d_reshaped = utils.reshape_to_2d(values_2d, 12)

        # verify that the function performed as expected
        np.testing.assert_equal(
            values_2d, values_2d_reshaped,
            'Not returning an already valid 2-D array as expected')

        # a 2-D array that's in an invalid shape for the function
        values_2d = np.array([[3, 4, 6, 2, 1, 3, 5, 3, 4],
                              [6, 2, 1, 3, 5, 8, 5, 6, 2],
                              [1, 3, 5, 8, 5, 6, 3, 8, 4]])

        # make sure that the function croaks with a ValueError when expected
        np.testing.assert_raises(ValueError, utils.reshape_to_2d, values_2d,
                                 12)
        np.testing.assert_raises(ValueError, utils.reshape_to_2d,
                                 values_2d.reshape((3, 3, 3)), 6)
コード例 #9
0
def spi(
    values: np.ndarray,
    scale: int,
    distribution: Distribution,
    data_start_year: int,
    calibration_year_initial: int,
    calibration_year_final: int,
    periodicity: compute.Periodicity,
    fitting_params: Dict = None,
) -> np.ndarray:
    """
    Computes SPI (Standardized Precipitation Index).

    :param values: 1-D numpy array of precipitation values, in any units,
        first value assumed to correspond to January of the initial year if
        the periodicity is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled
        before the index is computed
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :param fitting_params: optional dictionary of pre-computed distribution
        fitting parameters, if the distribution is gamma then this dict should
        contain two arrays, keyed as "alphas" and "betas", and if the
        distribution is Pearson then this dict should contain four arrays keyed
        as "probabilities_of_zero", "locs", "scales", and "skews"
    :return SPI values fitted to the gamma distribution at the specified time
        step scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array
        of precipitation values
    """

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = "Invalid shape of input array: {shape}".format(shape=shape) + \
                  " -- only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(
            np.isnan(values)):
        return values

    # clip any negative values to zero
    if np.amin(values) < 0.0:
        _logger.warn(
            "Input contains negative values -- all negatives clipped to zero")
        values = np.clip(values, a_min=0.0, a_max=None)

    # remember the original length of the array, in order to facilitate
    # returning an array of the same size
    original_length = values.size

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    values = compute.sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is compute.Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is compute.Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    if distribution is Distribution.gamma:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            alphas = fitting_params["alpha"]
            betas = fitting_params["beta"]
        else:
            alphas = None
            betas = None

        # fit the scaled values to a gamma distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_gamma(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
            alphas,
            betas,
        )
    elif distribution is Distribution.pearson:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            probabilities_of_zero = fitting_params["prob_zero"]
            locs = fitting_params["loc"]
            scales = fitting_params["scale"]
            skews = fitting_params["skew"]
        else:
            probabilities_of_zero = None
            locs = None
            scales = None
            skews = None

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_pearson(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
            probabilities_of_zero,
            locs,
            scales,
            skews,
        )

    else:

        message = "Unsupported distribution argument: " + \
                  "{dist}".format(dist=distribution)
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = np.clip(values, _FITTED_INDEX_VALID_MIN,
                     _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
コード例 #10
0
ファイル: compute.py プロジェクト: shivam14300/hydro-drought
def pearson_parameters(
    values: np.ndarray,
    data_start_year: int,
    calibration_start_year: int,
    calibration_end_year: int,
    periodicity: Periodicity,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
    """
    This function computes the probability of zero and Pearson Type III
    distribution parameters corresponding to an array of values.

    :param values: 2-D array of values, with each row representing a year
        containing either 12 values corresponding to the calendar months of
        that year, or 366 values corresponding to the days of the year
        (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for
        non-leap years) and assuming that the first value of the array is
        January of the initial year for an input array of monthly values or
        Jan. 1st of initial year for an input array daily values
    :param periodicity: monthly or daily
    :return: four 1-D array of fitting values for the Pearson Type III
        distribution, with shape (12,) for monthly or (366,) for daily

        returned array 1: probability of zero
        returned array 2: first Pearson Type III distribution parameter (loc)
        returned array 3 :second Pearson Type III distribution parameter (scale)
        returned array 4: third Pearson Type III distribution parameter (skew)
    """

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily
    if len(values.shape) != 2:
        message = "Invalid shape of input data array: {shape}".format(
            shape=values.shape)
        _logger.error(message)
        raise ValueError(message)

    else:

        time_steps_per_year = values.shape[1]
        if (time_steps_per_year != 12) and (time_steps_per_year != 366):
            message = "Invalid shape of input data array: {shape}".format(
                shape=values.shape)
            _logger.error(message)
            raise ValueError(message)

    # determine the end year of the values array
    data_end_year = data_start_year + values.shape[0]

    # make sure that we have data within the full calibration period,
    # otherwise use the full period of record
    if (calibration_start_year < data_start_year) or \
            (calibration_end_year > data_end_year):
        calibration_start_year = data_start_year
        calibration_end_year = data_end_year

    # get the year axis indices corresponding to
    # the calibration start and end years
    calibration_begin_index = calibration_start_year - data_start_year
    calibration_end_index = (calibration_end_year - data_start_year) + 1

    # get the values for the current calendar time step
    # that fall within the calibration years period
    calibration_values = values[
        calibration_begin_index:calibration_end_index, :]

    # the values we'll compute and return
    probabilities_of_zero = np.zeros((time_steps_per_year, ))
    locs = np.zeros((time_steps_per_year, ))
    scales = np.zeros((time_steps_per_year, ))
    skews = np.zeros((time_steps_per_year, ))

    # compute the probability of zero and Pearson
    # parameters for each calendar time step
    # TODO vectorize the below loop? create a @numba.vectorize() ufunc
    #  for application over the second axis
    for time_step_index in range(time_steps_per_year):

        # get the values for the current calendar time step
        time_step_values = calibration_values[:, time_step_index]

        # count the number of zeros and valid (non-missing/non-NaN) values
        number_of_zeros, number_of_non_missing = \
            utils.count_zeros_and_non_missings(time_step_values)

        # make sure we have at least four values that are both non-missing (i.e. non-NaN)
        # and non-zero, otherwise use the entire period of record
        if (number_of_non_missing - number_of_zeros) < 4:

            # we can't proceed, bail out using zeros
            continue

        # calculate the probability of zero for the calendar time step
        probability_of_zero = 0.0
        if number_of_zeros > 0:

            probability_of_zero = number_of_zeros / number_of_non_missing

        # get the estimated L-moments, if we have
        # more than three non-missing/non-zero values
        if (number_of_non_missing - number_of_zeros) > 3:

            # get the Pearson Type III parameters for this time
            # step's values within the calibration period
            params = lmoments.fit(time_step_values)
            probabilities_of_zero[time_step_index] = probability_of_zero
            locs[time_step_index] = params["loc"]
            scales[time_step_index] = params["scale"]
            skews[time_step_index] = params["skew"]

    return probabilities_of_zero, locs, scales, skews
コード例 #11
0
ファイル: indices.py プロジェクト: NESII/climate_indices
def spi_pearson(precips, 
                scale,
                data_start_year,
                calibration_year_initial,
                calibration_year_final,
                time_series_type):
    '''
    Computes SPI using a fitting to the Pearson Type III distribution.
    
    :param precips: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January
                    of the initial year if the time series type is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled before the index is computed
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param time_series_type: the type of time series represented by the input data, valid values are 'monthly' or 'daily'
                             'monthly': array of monthly values, assumed to span full years, i.e. the first value 
                             corresponds to January of the initial year and any missing final months of the final 
                             year filled with NaN values, with size == # of years * 12
                             'daily': array of full years of daily values with 366 days per year, as if each year were 
                             a leap year and any missing final months of the final year filled with NaN values, 
                             with array size == (# years * 366)
    :return SPI values fitted to the Pearson Type III distribution at the specified time scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values
    '''

    # remember the original length of the array, in order to facilitate returning an array of the same size
    original_length = precips.size
    
    # get a sliding sums array, with each time step's value scaled by the specified number of time steps
    scaled_precips = compute.sum_to_scale(precips, scale)

    # reshape precipitation values to (years, 12) for monthly, or to (years, 366) for daily (representing all years as leap)
    if time_series_type == 'monthly':
        
        scaled_precips = utils.reshape_to_2d(scaled_precips, 12)

    elif time_series_type == 'daily':
        
        scaled_precips = utils.reshape_to_2d(scaled_precips, 366)
        
    else:
        
        raise ValueError('Invalid time series type argument: %s' % time_series_type)
    
    # fit the scaled values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas 
#     transformed_fitted_values = compute.transform_fitted_pearson_new(scaled_precips, 
#                                                                      data_start_year,
#                                                                      calibration_year_initial,
#                                                                      calibration_year_final)
    transformed_fitted_values = compute.transform_fitted_pearson(scaled_precips, 
                                                                 data_start_year,
                                                                 calibration_year_initial,
                                                                 calibration_year_final,
                                                                 time_series_type)
        
    # clip values to within the valid range, reshape the array back to 1-D
    spi = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()
    
    # return the original size array 
    return spi[0:original_length]