예제 #1
0
def test_sum_to_scale():
    """
    Test for the compute.sum_to_scale() function
    """

    # test an input array with no missing values
    values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5])
    computed_values = compute.sum_to_scale(values, 3)
    expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18])
    np.testing.assert_allclose(computed_values,
                               expected_values,
                               err_msg="Sliding sums not computed as expected")
    computed_values = compute.sum_to_scale(values, 4)
    expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21])
    np.testing.assert_allclose(computed_values,
                               expected_values,
                               err_msg="Sliding sums not computed as expected")

    # test an input array with missing values on the end
    values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN])
    computed_values = compute.sum_to_scale(values, 3)
    expected_values = np.array(
        [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN]
    )
    np.testing.assert_allclose(computed_values,
                               expected_values,
                               err_msg="Sliding sums not computed as expected when "
                                       "missing values appended to end of input array")

    # test an input array with missing values within the array
    values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6])
    computed_values = compute.sum_to_scale(values, 3)
    expected_values = np.array(
        [np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19]
    )
    np.testing.assert_allclose(computed_values,
                               expected_values,
                               err_msg="Sliding sums not computed as expected when "
                                       "missing values appended to end of input array")

    test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8])
    sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11])
    sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26])
    sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33])
    np.testing.assert_equal(compute.sum_to_scale(test_values, 2),
                            sum_by2,
                            err_msg="Sliding sums not computed as expected")
    np.testing.assert_equal(compute.sum_to_scale(test_values, 4),
                            sum_by4,
                            err_msg="Sliding sums not computed as expected")
    np.testing.assert_equal(compute.sum_to_scale(test_values, 6),
                            sum_by6,
                            err_msg="Sliding sums not computed as expected")
예제 #2
0
    def test_sum_to_scale(self):
        '''
        Test for the compute.sum_to_scale() function
        '''

        # test an input array with no missing values
        values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5])
        computed_values = compute.sum_to_scale(values, 3)
        expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18])
        np.testing.assert_allclose(
            computed_values,
            expected_values,
            err_msg='Sliding sums not computed as expected')

        # test an input array with missing values on the end
        values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN])
        computed_values = compute.sum_to_scale(values, 3)
        expected_values = np.array(
            [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN])
        np.testing.assert_allclose(
            computed_values,
            expected_values,
            err_msg=
            'Sliding sums not computed as expected when missing values appended to end of input array'
        )

        # test an input array with missing values within the array
        values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6])
        computed_values = compute.sum_to_scale(values, 3)
        expected_values = np.array(
            [np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19])
        np.testing.assert_allclose(
            computed_values,
            expected_values,
            err_msg=
            'Sliding sums not computed as expected when missing values appended to end of input array'
        )
예제 #3
0
def spi(values: np.ndarray,
        scale: int,
        distribution,
        data_start_year: int,
        calibration_year_initial: int,
        calibration_year_final: int,
        periodicity):
    """
    Computes SPI (Standardized Precipitation Index).

    :param values: 1-D numpy array of precipitation values, in any units,
        first value assumed to correspond to January of the initial year if
        the periodicity is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled
        before the index is computed
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :return SPI values fitted to the gamma distribution at the specified time
        step scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array
        of precipitation values
    """

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = f"Invalid shape of input array: {shape} -- " + \
                  "only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)):
        return values

    # remember the original length of the array, in order to facilitate
    # returning an array of the same size
    original_length = values.size

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    values = compute.sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is compute.Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is compute.Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    if distribution is Distribution.gamma:

        # fit the scaled values to a gamma distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_gamma(values,
                                                data_start_year,
                                                calibration_year_initial,
                                                calibration_year_final,
                                                periodicity)
    elif distribution is Distribution.pearson:

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_pearson(values,
                                                  data_start_year,
                                                  calibration_year_initial,
                                                  calibration_year_final,
                                                  periodicity)

    else:

        message = f"Unsupported distribution argument: {distribution}"
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
예제 #4
0
def percentage_of_normal(values: np.ndarray,
                         scale: int,
                         data_start_year: int,
                         calibration_start_year: int,
                         calibration_end_year: int,
                         periodicity):
    """
    This function finds the percent of normal values (average of each calendar
    month or day over a specified calibration period of years) for a specified
    time steps scale. The normal precipitation for each calendar time step is
    computed for the specified time steps scale, and then each time step's
    scaled value is compared against the corresponding calendar time step's
    average to determine the percentage of normal. The period that defines the
    normal is described by the calibration start and end years arguments.
    The calibration period typically used for US climate monitoring is 1981-2010.

    :param values: 1-D numpy array of precipitation values, any length, initial
        value assumed to be January of the data start year (January 1st of the
        start year if daily periodicity), see the description of the
        *periodicity* argument below for further clarification
    :param scale: integer number of months over which the normal value is
        computed (eg 3-months, 6-months, etc.)
    :param data_start_year: the initial year of the input monthly values array
    :param calibration_start_year: the initial year of the calibration period
        over which the normal average for each calendar time step is computed
    :param calibration_end_year: the final year of the calibration period over
        which the normal average for each calendar time step is computed
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :return: percent of normal precipitation values corresponding to the
        scaled precipitation values array
    :rtype: numpy.ndarray of type float
    """

    # validate the scale argument
    if (scale is None) or (scale < 1):
        message = "Invalid scale argument: '{0}'".format(scale)
        _logger.error(message)
        raise ValueError(message)

    # if doing monthly then we'll use 12 periods, corresponding to calendar
    # months, if daily assume years w/366 days
    if periodicity is compute.Periodicity.monthly:
        periodicity = 12
    elif periodicity is compute.Periodicity.daily:
        periodicity = 366
    else:
        message = "Invalid periodicity argument: '{0}'".format(periodicity)
        _logger.error(message)
        raise ValueError(message)

    # bypass processing if all values are masked
    if np.ma.is_masked(values) and values.mask.all():
        return values

    # make sure we've been provided with sane calibration limits
    if data_start_year > calibration_start_year:
        raise ValueError("Invalid start year arguments (data and/or "
                         "calibration): calibration start year is before "
                         "the data start year"
        )
    elif ((calibration_end_year - calibration_start_year + 1) * 12) > values.size:
        raise ValueError("Invalid calibration period specified: total "
                         "calibration years exceeds the actual "
                         "number of years of data"
        )

    # get an array containing a sliding sum on the specified time step
    # scale -- i.e. if the scale is 3 then the first two elements will be
    # np.NaN, since we need 3 elements to get a sum, and then from the third
    # element to the end the values will equal the sum of the corresponding
    # time step plus the values of the two previous time steps
    scale_sums = compute.sum_to_scale(values, scale)

    # extract the timesteps over which we'll compute the normal
    # average for each time step of the year
    calibration_years = calibration_end_year - calibration_start_year + 1
    calibration_start_index = (calibration_start_year - data_start_year) * periodicity
    calibration_end_index = calibration_start_index + (calibration_years * periodicity)
    calibration_period_sums = scale_sums[calibration_start_index:calibration_end_index]

    # for each time step in the calibration period, get the average of
    # the scale sum for that calendar time step (i.e. average all January sums,
    # then all February sums, etc.)
    averages = np.full((periodicity,), np.nan)
    for i in range(periodicity):
        averages[i] = np.nanmean(calibration_period_sums[i::periodicity])

    # TODO replace the below loop with a vectorized implementation
    # for each time step of the scale_sums array find its corresponding
    # percentage of the time steps scale average for its respective calendar time step
    percentages_of_normal = np.full(scale_sums.shape, np.nan)
    for i in range(scale_sums.size):

        # make sure we don't have a zero divisor
        divisor = averages[i % periodicity]
        if divisor > 0.0:

            percentages_of_normal[i] = scale_sums[i] / divisor

    return percentages_of_normal
예제 #5
0
def spei(precips_mm: np.ndarray,
         pet_mm: np.ndarray,
         scale: int,
         distribution,
         periodicity,
         data_start_year: int,
         calibration_year_initial: int,
         calibration_year_final: int):
    """
    Compute SPEI fitted to the gamma distribution.

    PET values are subtracted from the precipitation values to come up with an array
    of (P - PET) values, which is then scaled to the specified months scale and
    finally fitted/transformed to SPEI values corresponding to the input
    precipitation time series.

    :param precips_mm: an array of monthly total precipitation values,
        in millimeters, should be of the same size (and shape?) as the input PET array
    :param pet_mm: an array of monthly PET values, in millimeters,
        should be of the same size (and shape?) as the input precipitation array
    :param scale: the number of months over which the values should be scaled
        before computing the indicator
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :param data_start_year: the initial year of the input datasets (assumes that
        the two inputs cover the same period)
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :return: an array of SPEI values
    :rtype: numpy.ndarray of type float, of the same size and shape as the input
        PET and precipitation arrays
    """

    # if we're passed all missing values then we can't compute anything,
    # so we return the same array of missing values
    if (np.ma.is_masked(precips_mm) and precips_mm.mask.all()) \
            or np.all(np.isnan(precips_mm)):
        return precips_mm

    # validate that the two input arrays are compatible
    if precips_mm.size != pet_mm.size:
        message = "Incompatible precipitation and PET arrays"
        _logger.error(message)
        raise ValueError(message)

    # subtract the PET from precipitation, adding an offset
    # to ensure that all values are positive
    p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0

    # remember the original length of the input array, in order to facilitate
    # returning an array of the same size
    original_length = precips_mm.size

    # get a sliding sums array, with each element's value
    # scaled by the specified number of time steps
    scaled_values = compute.sum_to_scale(p_minus_pet, scale)

    if distribution is Distribution.gamma:

        # fit the scaled values to a gamma distribution and
        # transform to corresponding normalized sigmas
        transformed_fitted_values = \
            compute.transform_fitted_gamma(scaled_values,
                                           data_start_year,
                                           calibration_year_initial,
                                           calibration_year_final,
                                           periodicity)

    elif distribution is Distribution.pearson:

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        transformed_fitted_values = \
            compute.transform_fitted_pearson(scaled_values,
                                             data_start_year,
                                             calibration_year_initial,
                                             calibration_year_final,
                                             periodicity)

    else:
        message = f"Unsupported distribution argument: {distribution}"
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = \
        np.clip(transformed_fitted_values,
                _FITTED_INDEX_VALID_MIN,
                _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
예제 #6
0
def spei(scale,
         distribution,
         periodicity,
         data_start_year,
         calibration_year_initial,
         calibration_year_final,
         precips_mm,
         pet_mm=None,
         temps_celsius=None,
         latitude_degrees=None):
    '''
    Compute SPEI fitted to the gamma distribution.
    
    PET values are subtracted from the precipitation values to come up with an array of (P - PET) values, which is 
    then scaled to the specified months scale and finally fitted/transformed to SPEI values corresponding to the
    input precipitation time series.

    If an input array of temperature values is provided then PET values are computed internally using the input 
    temperature array, data start year, and latitude value (all three of which are required in combination). 
    In this case an input array of PET values should not be specified and if so will result in an error being 
    raised indicating invalid arguments.
    
    If an input array of PET values is provided then neither an input array of temperature values nor a latitude 
    should be specified, and if so will result in an error being raised indicating invalid arguments.
        
    :param scale: the number of months over which the values should be scaled before computing the indicator
    :param distribution: distribution type to be used for the internal fitting/transform computation
    :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 
                        'monthly' and 'daily'
                        'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first 
                        value corresponds to January of the initial year and any missing final months of the final 
                        year filled with NaN values, with size == # of years * 12
                        'daily' indicates an array of full years of daily values with 366 days per year, as if each
                        year were a leap year and any missing final months of the final year filled with NaN values, 
                        with array size == (# years * 366)
    :param precips_mm: an array of monthly total precipitation values, in millimeters, should be of the same size 
                       (and shape?) as the input temperature array
    :param pet_mm: an array of monthly PET values, in millimeters, should be of the same size (and shape?) as the input 
                   precipitation array, must be unspecified or None if using an array of temperature values as input
    :param temps_celsius: an array of monthly average temperature values, in degrees Celsius, should be of the same size 
                          (and shape?) as the input precipitation array, must be unspecified or None if using an array 
                          of PET values as input
    :param data_start_year: the initial year of the input datasets (assumes that the two inputs cover the same period)
    :param latitude_degrees: the latitude of the location, in degrees north, must be unspecified or None if using 
                             an array of PET values as an input, and must be specified if using an array of temperatures 
                             as input, valid range is -90.0 to 90.0 (inclusive)
    :return: an array of SPEI values
    :rtype: numpy.ndarray of type float, of the same size and shape as the input temperature and precipitation arrays
    '''
                    
    # if we're passed all missing values then we can't compute anything, return the same array of missing values
    if np.ma.is_masked(precips_mm) and precips_mm.mask.all():
        return precips_mm
    elif np.all(np.isnan(precips_mm)):
        return precips_mm

    # validate the function's argument combinations
    if temps_celsius is not None:
        
        # since we have temperature then it's expected that we'll compute PET internally, so we shouldn't have PET as an input
        if pet_mm is not None:
            message = 'Incompatible arguments: either temperature or PET arrays can be specified as arguments, but not both' 
            _logger.error(message)
            raise ValueError(message)
        
        # we'll need both the latitude and data start year in order to compute PET 
        elif (latitude_degrees is None) or (data_start_year is None):
            message = 'Missing arguments: since temperature is provided as an input then both latitude ' + \
                      'and the data start year must also be specified, and one or both is not'
            _logger.error(message)
            raise ValueError(message)

        # validate that the two input arrays are compatible
        elif precips_mm.size != temps_celsius.size:
            message = 'Incompatible precipitation and temperature arrays'
            _logger.error(message)
            raise ValueError(message)

        elif periodicity != 'monthly':
            # our PET currently uses a monthly version of Thornthwaite's equation and therefore's only valid for monthly 
            message = 'Unsupported periodicity: \'{0}\' '.format(periodicity) + \
                      '-- only monthly time series is supported when providing temperature and latitude inputs' 
            _logger.error(message)
            raise ValueError(message)

        # compute PET
        pet_mm = pet(temps_celsius, latitude_degrees, data_start_year)

    elif pet_mm is not None:
        
        # make sure there's no confusion by not allowing a user to specify unnecessary parameters 
        if latitude_degrees is not None:
            message = 'Invalid argument: since PET is provided as an input then latitude must be absent'
            _logger.error(message)
            raise ValueError(message)
            
        # validate that the two input arrays are compatible
        elif precips_mm.size != pet_mm.size:
            message = 'Incompatible precipitation and PET arrays'
            _logger.error(message)
            raise ValueError(message)

    else:
        
        message = 'Neither temperature nor PET array was specified, one or the other is required for SPEI'
        _logger.error(message)
        raise ValueError(message)

    # subtract the PET from precipitation, adding an offset to ensure that all values are positive
    p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0
        
    # remember the original length of the input array, in order to facilitate returning an array of the same size
    original_length = precips_mm.size
    
    # get a sliding sums array, with each element's value scaled by the specified number of time steps
    scaled_values = compute.sum_to_scale(p_minus_pet, scale)

    if distribution is Distribution.gamma:

        # fit the scaled values to a gamma distribution and transform to corresponding normalized sigmas 
        transformed_fitted_values = compute.transform_fitted_gamma(scaled_values,
                                                                   data_start_year, 
                                                                   calibration_year_initial,
                                                                   calibration_year_final,
                                                                   periodicity)
    
    elif distribution is Distribution.pearson_type3:
    
        # fit the scaled values to a Pearson Type III distribution and transform to corresponding normalized sigmas 
        transformed_fitted_values = compute.transform_fitted_pearson(scaled_values, 
                                                                     data_start_year,
                                                                     calibration_year_initial,
                                                                     calibration_year_final,
                                                                     periodicity)
        
    # clip values to within the valid range, reshape the array back to 1-D
    spei = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()
    
    # return the original size array 
    return spei[0:original_length]
예제 #7
0
def spi(
    values: np.ndarray,
    scale: int,
    distribution: Distribution,
    data_start_year: int,
    calibration_year_initial: int,
    calibration_year_final: int,
    periodicity: compute.Periodicity,
    fitting_params: Dict = None,
) -> np.ndarray:
    """
    Computes SPI (Standardized Precipitation Index).

    :param values: 1-D numpy array of precipitation values, in any units,
        first value assumed to correspond to January of the initial year if
        the periodicity is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled
        before the index is computed
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :param fitting_params: optional dictionary of pre-computed distribution
        fitting parameters, if the distribution is gamma then this dict should
        contain two arrays, keyed as "alphas" and "betas", and if the
        distribution is Pearson then this dict should contain four arrays keyed
        as "probabilities_of_zero", "locs", "scales", and "skews"
    :return SPI values fitted to the gamma distribution at the specified time
        step scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array
        of precipitation values
    """

    # we expect to operate upon a 1-D array, so if we've been passed a 2-D array
    # then we flatten it, otherwise raise an error
    shape = values.shape
    if len(shape) == 2:
        values = values.flatten()
    elif len(shape) != 1:
        message = "Invalid shape of input array: {shape}".format(shape=shape) + \
                  " -- only 1-D and 2-D arrays are supported"
        _logger.error(message)
        raise ValueError(message)

    # if we're passed all missing values then we can't compute
    # anything, so we return the same array of missing values
    if (np.ma.is_masked(values) and values.mask.all()) or np.all(
            np.isnan(values)):
        return values

    # clip any negative values to zero
    if np.amin(values) < 0.0:
        _logger.warn(
            "Input contains negative values -- all negatives clipped to zero")
        values = np.clip(values, a_min=0.0, a_max=None)

    # remember the original length of the array, in order to facilitate
    # returning an array of the same size
    original_length = values.size

    # get a sliding sums array, with each time step's value scaled
    # by the specified number of time steps
    values = compute.sum_to_scale(values, scale)

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is compute.Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is compute.Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    if distribution is Distribution.gamma:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            alphas = fitting_params["alpha"]
            betas = fitting_params["beta"]
        else:
            alphas = None
            betas = None

        # fit the scaled values to a gamma distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_gamma(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
            alphas,
            betas,
        )
    elif distribution is Distribution.pearson:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            probabilities_of_zero = fitting_params["prob_zero"]
            locs = fitting_params["loc"]
            scales = fitting_params["scale"]
            skews = fitting_params["skew"]
        else:
            probabilities_of_zero = None
            locs = None
            scales = None
            skews = None

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        values = compute.transform_fitted_pearson(
            values,
            data_start_year,
            calibration_year_initial,
            calibration_year_final,
            periodicity,
            probabilities_of_zero,
            locs,
            scales,
            skews,
        )

    else:

        message = "Unsupported distribution argument: " + \
                  "{dist}".format(dist=distribution)
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = np.clip(values, _FITTED_INDEX_VALID_MIN,
                     _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
예제 #8
0
def spei(
    precips_mm: np.ndarray,
    pet_mm: np.ndarray,
    scale: int,
    distribution: Distribution,
    periodicity: compute.Periodicity,
    data_start_year: int,
    calibration_year_initial: int,
    calibration_year_final: int,
    fitting_params: dict = None,
) -> np.ndarray:
    """
    Compute SPEI fitted to the gamma distribution.

    PET values are subtracted from the precipitation values to come up with an array
    of (P - PET) values, which is then scaled to the specified months scale and
    finally fitted/transformed to SPEI values corresponding to the input
    precipitation time series.

    :param precips_mm: an array of monthly total precipitation values,
        in millimeters, should be of the same size (and shape?) as the input PET array
    :param pet_mm: an array of monthly PET values, in millimeters,
        should be of the same size (and shape?) as the input precipitation array
    :param scale: the number of months over which the values should be scaled
        before computing the indicator
    :param distribution: distribution type to be used for the internal
        fitting/transform computation
    :param periodicity: the periodicity of the time series represented by the
        input data, valid/supported values are 'monthly' and 'daily'
        'monthly' indicates an array of monthly values, assumed to span full
         years, i.e. the first value corresponds to January of the initial year
         and any missing final months of the final year filled with NaN values,
         with size == # of years * 12
         'daily' indicates an array of full years of daily values with 366 days
         per year, as if each year were a leap year and any missing final months
         of the final year filled with NaN values, with array size == (# years * 366)
    :param data_start_year: the initial year of the input datasets (assumes that
        the two inputs cover the same period)
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param fitting_params: optional dictionary of pre-computed distribution
        fitting parameters, if the distribution is gamma then this dict should
        contain two arrays, keyed as "alphas" and "betas", and if the
        distribution is Pearson then this dict should contain four arrays keyed
        as "probabilities_of_zero", "locs", "scales", and "skews"
    :return: an array of SPEI values
    :rtype: numpy.ndarray of type float, of the same size and shape as the input
        PET and precipitation arrays
    """

    # if we're passed all missing values then we can't compute anything,
    # so we return the same array of missing values
    if (np.ma.is_masked(precips_mm) and precips_mm.mask.all()) \
            or np.all(np.isnan(precips_mm)):
        return precips_mm

    # validate that the two input arrays are compatible
    if precips_mm.size != pet_mm.size:
        message = "Incompatible precipitation and PET arrays"
        _logger.error(message)
        raise ValueError(message)

    # clip any negative values to zero
    if np.amin(precips_mm) < 0.0:
        _logger.warn(
            "Input contains negative values -- all negatives clipped to zero")
        precips_mm = np.clip(precips_mm, a_min=0.0, a_max=None)

    # subtract the PET from precipitation, adding an offset
    # to ensure that all values are positive
    p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0

    # remember the original length of the input array, in order to facilitate
    # returning an array of the same size
    original_length = precips_mm.size

    # get a sliding sums array, with each element's value
    # scaled by the specified number of time steps
    scaled_values = compute.sum_to_scale(p_minus_pet, scale)

    if distribution is Distribution.gamma:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            alphas = fitting_params["alphas"]
            betas = fitting_params["betas"]
        else:
            alphas = None
            betas = None

        # fit the scaled values to a gamma distribution and
        # transform to corresponding normalized sigmas
        transformed_fitted_values = \
            compute.transform_fitted_gamma(
                scaled_values,
                data_start_year,
                calibration_year_initial,
                calibration_year_final,
                periodicity,
                alphas,
                betas,
            )

    elif distribution is Distribution.pearson:

        # get (optional) fitting parameters if provided
        if fitting_params is not None:
            probabilities_of_zero = fitting_params["probabilities_of_zero"]
            locs = fitting_params["locs"]
            scales = fitting_params["scales"]
            skews = fitting_params["skews"]
        else:
            probabilities_of_zero = None
            locs = None
            scales = None
            skews = None

        # fit the scaled values to a Pearson Type III distribution
        # and transform to corresponding normalized sigmas
        transformed_fitted_values = \
            compute.transform_fitted_pearson(
                scaled_values,
                data_start_year,
                calibration_year_initial,
                calibration_year_final,
                periodicity,
                probabilities_of_zero,
                locs,
                scales,
                skews,
            )

    else:
        message = "Unsupported distribution argument: " + \
                  "{dist}".format(dist=distribution)
        _logger.error(message)
        raise ValueError(message)

    # clip values to within the valid range, reshape the array back to 1-D
    values = \
        np.clip(transformed_fitted_values,
                _FITTED_INDEX_VALID_MIN,
                _FITTED_INDEX_VALID_MAX).flatten()

    # return the original size array
    return values[0:original_length]
예제 #9
0
def spi_pearson(precips, 
                scale,
                data_start_year,
                calibration_year_initial,
                calibration_year_final,
                time_series_type):
    '''
    Computes SPI using a fitting to the Pearson Type III distribution.
    
    :param precips: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January
                    of the initial year if the time series type is monthly, or January 1st of the initial year if daily
    :param scale: number of time steps over which the values should be scaled before the index is computed
    :param data_start_year: the initial year of the input precipitation dataset
    :param calibration_year_initial: initial year of the calibration period
    :param calibration_year_final: final year of the calibration period
    :param time_series_type: the type of time series represented by the input data, valid values are 'monthly' or 'daily'
                             'monthly': array of monthly values, assumed to span full years, i.e. the first value 
                             corresponds to January of the initial year and any missing final months of the final 
                             year filled with NaN values, with size == # of years * 12
                             'daily': array of full years of daily values with 366 days per year, as if each year were 
                             a leap year and any missing final months of the final year filled with NaN values, 
                             with array size == (# years * 366)
    :return SPI values fitted to the Pearson Type III distribution at the specified time scale, unitless
    :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values
    '''

    # remember the original length of the array, in order to facilitate returning an array of the same size
    original_length = precips.size
    
    # get a sliding sums array, with each time step's value scaled by the specified number of time steps
    scaled_precips = compute.sum_to_scale(precips, scale)

    # reshape precipitation values to (years, 12) for monthly, or to (years, 366) for daily (representing all years as leap)
    if time_series_type == 'monthly':
        
        scaled_precips = utils.reshape_to_2d(scaled_precips, 12)

    elif time_series_type == 'daily':
        
        scaled_precips = utils.reshape_to_2d(scaled_precips, 366)
        
    else:
        
        raise ValueError('Invalid time series type argument: %s' % time_series_type)
    
    # fit the scaled values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas 
#     transformed_fitted_values = compute.transform_fitted_pearson_new(scaled_precips, 
#                                                                      data_start_year,
#                                                                      calibration_year_initial,
#                                                                      calibration_year_final)
    transformed_fitted_values = compute.transform_fitted_pearson(scaled_precips, 
                                                                 data_start_year,
                                                                 calibration_year_initial,
                                                                 calibration_year_final,
                                                                 time_series_type)
        
    # clip values to within the valid range, reshape the array back to 1-D
    spi = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten()
    
    # return the original size array 
    return spi[0:original_length]