def scale_values( values: np.ndarray, scale: int, periodicity: Periodicity, ): # we expect to operate upon a 1-D array, so if we've been passed a 2-D array # then we flatten it, otherwise raise an error shape = values.shape if len(shape) == 2: values = values.flatten() elif len(shape) != 1: message = "Invalid shape of input array: {shape}".format(shape=shape) + \ " -- only 1-D and 2-D arrays are supported" _logger.error(message) raise ValueError(message) # if we're passed all missing values then we can't compute # anything, so we return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all( np.isnan(values)): return values # clip any negative values to zero if np.amin(values) < 0.0: _logger.warn( "Input contains negative values -- all negatives clipped to zero") values = np.clip(values, a_min=0.0, a_max=None) # get a sliding sums array, with each time step's value scaled # by the specified number of time steps scaled_values = sum_to_scale(values, scale) # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is Periodicity.monthly: scaled_values = utils.reshape_to_2d(scaled_values, 12) elif periodicity is Periodicity.daily: scaled_values = utils.reshape_to_2d(scaled_values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) return scaled_values
def _validate_array( values: np.ndarray, periodicity: Periodicity, ) -> np.ndarray: """ :param values: :param periodicity: :return: """ # validate (and possibly reshape) the input array if len(values.shape) == 1: if periodicity is None: message = "1-D input array requires a corresponding periodicity "\ "argument, none provided" _logger.error(message) raise ValueError(message) elif periodicity is Periodicity.monthly: # we've been passed a 1-D array with shape (months), # reshape it to 2-D with shape (years, 12) values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: # we've been passed a 1-D array with shape (days), # reshape it to 2-D with shape (years, 366) values = utils.reshape_to_2d(values, 366) else: message = "Unsupported periodicity argument: '{0}'".format( periodicity) _logger.error(message) raise ValueError(message) elif (len(values.shape) != 2) or \ ((values.shape[1] != 12) and (values.shape[1] != 366)): # neither a 1-D nor a 2-D array with valid shape was passed in message = "Invalid input array with shape: {0}".format(values.shape) _logger.error(message) raise ValueError(message) return values
def eto_hargreaves(daily_tmin_celsius: np.ndarray, daily_tmax_celsius: np.ndarray, daily_tmean_celsius: np.ndarray, latitude_degrees: float): """ Compute daily potential evapotranspiration (PET) using the Hargreaves (1985) method. Based on equation 52 in Allen et al (1998). Input arrays are assumed to be 1-D (an arbitrary number of days) or 2-D (years x 366 days per year). :param daily_tmin_celsius: array of daily minimum temperature values, in degrees Celsius :param daily_tmax_celsius: array of daily maximum temperature values, in degrees Celsius :param daily_tmean_celsius: array of daily mean temperature values, in degrees Celsius :param latitude_degrees: latitude of location, in degrees north :return: potential evapotranspiration over grass (ETo), in millimeters per day """ # validate the input data arrays if daily_tmin_celsius.size != daily_tmax_celsius != daily_tmean_celsius: message = "Incompatible array sizes" _logger.error(message) raise ValueError(message) # keep the original length for conversion back to original size original_length = daily_tmean_celsius.size # reshape to 2-D with 366 days per year, if not already in this shape daily_tmean_celsius = utils.reshape_to_2d(daily_tmean_celsius, 366) # at this point we assume that our dataset array has shape (years, 366) # where each row is a year with 366 columns of daily values # convert the latitude from degrees to radians latitude = math.radians(latitude_degrees) # allocate the PET array we'll fill pet = np.full(daily_tmean_celsius.shape, np.NaN) for day_of_year in range(1, daily_tmean_celsius.shape[1] + 1): # calculate the angle of solar declination and sunset hour angle solar_declination = _solar_declination(day_of_year) sunset_hour_angle = _sunset_hour_angle(latitude, solar_declination) # calculate the inverse relative distance between earth and sun # from the day of the year, based on FAO equation 23 in # Allen et al (1998). inv_rel_distance = 1 + (0.033 * math.cos((2.0 * math.pi / 365.0) * day_of_year)) # extraterrestrial radiation tmp1 = (24.0 * 60.0) / math.pi tmp2 = sunset_hour_angle * math.sin(latitude) * math.sin(solar_declination) tmp3 = ( math.cos(latitude) * math.cos(solar_declination) * math.sin(sunset_hour_angle) ) et_radiation = tmp1 * _SOLAR_CONSTANT * inv_rel_distance * (tmp2 + tmp3) for year in range(daily_tmean_celsius.shape[0]): # calculate the Hargreaves equation tmin = daily_tmin_celsius[year, day_of_year - 1] tmax = daily_tmax_celsius[year, day_of_year - 1] tmean = daily_tmean_celsius[year, day_of_year - 1] pet[year, day_of_year - 1] = ( 0.0023 * (tmean + 17.8) * (tmax - tmin) ** 0.5 * 0.408 * et_radiation ) # reshape the dataset from (years, 366) into (total days), # i.e. convert from 2-D to 1-D, and truncate to the original length return pet.reshape(-1)[0:original_length]
def eto_thornthwaite(monthly_temps_celsius: np.ndarray, latitude_degrees: float, data_start_year: int): """ Compute monthly potential evapotranspiration (PET) using the Thornthwaite (1948) method. Thornthwaite's equation: *PET* = 1.6 (*L*/12) (*N*/30) (10*Ta* / *I*)***a* where: * *Ta* is the mean daily air temperature, in degrees Celsius (if negative then use 0.0), of the month being calculated * *N* is the number of days in the month being calculated * *L* is the mean day length, in hours, of the month being calculated * *a* = (6.75 x 10-7)*I***3 - (7.71 x 10-5)*I***2 + (1.792 x 10-2)*I* + 0.49239 * *I* is a heat index which depends on the 12 monthly mean temperatures and is calculated as the sum of (*Tai* / 5)**1.514 for each month, where *Tai* is the air temperature for each month in the year Reference: Thornthwaite, C.W. (1948) An approach toward a rational classification of climate. Geographical Review, Vol. 38, 55-94. https://www.jstor.org/stable/210739 :param monthly_temps_celsius: array containing a time series (monthly time steps) of mean daily air temperatures in degrees Celsius. This input dataset is assumed to start at January of the initial year, and can have any length. Both 1-D (months) and 2-D (years, 12) input datasets are supported. :param latitude_degrees: latitude of the location, in degrees north (-90..90) :param data_start_year: year corresponding to the start of the dataset :return: estimated potential evapotranspiration, in millimeters/month :rtype: 1-D numpy.ndarray of floats with shape: (total # of months) """ original_length = monthly_temps_celsius.size # validate the input data array monthly_temps_celsius = utils.reshape_to_2d(monthly_temps_celsius, 12) # at this point we assume that our dataset array has shape (years, 12) where # each row is a year with 12 columns of monthly values (Jan, Feb, ..., Dec) # convert the latitude from degrees to radians latitude_radians = math.radians(latitude_degrees) # adjust negative temperature values to zero, since negative # values aren't allowed (no evaporation below freezing) # TODO this sometimes throws a RuntimeWarning for invalid value, # perhaps as a result of a NaN, somehow use masking and/or NaN # pre-check to eliminate the cause of this warning monthly_temps_celsius[monthly_temps_celsius < 0] = 0.0 # mean the monthly temperature values over the month axis, # giving us 12 monthly means for the period of record mean_monthly_temps = np.nanmean(monthly_temps_celsius, axis=0) # calculate the heat index (I) heat_index = np.sum(np.power(mean_monthly_temps / 5.0, 1.514)) # calculate the a coefficient a = ((6.75e-07 * heat_index ** 3) - (7.71e-05 * heat_index ** 2) + (1.792e-02 * heat_index) + 0.49239) # get mean daylight hours for both normal and leap years mean_daylight_hours_nonleap = \ np.array(_monthly_mean_daylight_hours(latitude_radians, False)) mean_daylight_hours_leap = \ np.array(_monthly_mean_daylight_hours(latitude_radians, True)) # allocate the PET array we'll fill pet = np.full(monthly_temps_celsius.shape, np.NaN) for year in range(monthly_temps_celsius.shape[0]): if calendar.isleap(data_start_year + year): month_days = _MONTH_DAYS_LEAP mean_daylight_hours = mean_daylight_hours_leap else: month_days = _MONTH_DAYS_NONLEAP mean_daylight_hours = mean_daylight_hours_nonleap # calculate the Thornthwaite equation pet[year, :] = ( 16 * (mean_daylight_hours / 12.0) * (month_days / 30.0) * ((10.0 * monthly_temps_celsius[year, :] / heat_index) ** a) ) # reshape the dataset from (years, 12) into (months), # i.e. convert from 2-D to 1-D, and truncate to the original length return pet.reshape(-1)[0:original_length]
def spi(values: np.ndarray, scale: int, distribution, data_start_year: int, calibration_year_initial: int, calibration_year_final: int, periodicity): """ Computes SPI (Standardized Precipitation Index). :param values: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the periodicity is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param distribution: distribution type to be used for the internal fitting/transform computation :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return SPI values fitted to the gamma distribution at the specified time step scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values """ # we expect to operate upon a 1-D array, so if we've been passed a 2-D array # then we flatten it, otherwise raise an error shape = values.shape if len(shape) == 2: values = values.flatten() elif len(shape) != 1: message = f"Invalid shape of input array: {shape} -- " + \ "only 1-D and 2-D arrays are supported" _logger.error(message) raise ValueError(message) # if we're passed all missing values then we can't compute # anything, so we return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)): return values # remember the original length of the array, in order to facilitate # returning an array of the same size original_length = values.size # get a sliding sums array, with each time step's value scaled # by the specified number of time steps values = compute.sum_to_scale(values, scale) # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is compute.Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is compute.Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) if distribution is Distribution.gamma: # fit the scaled values to a gamma distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_gamma(values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) elif distribution is Distribution.pearson: # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_pearson(values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) else: message = f"Unsupported distribution argument: {distribution}" _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def transform_fitted_gamma(values, data_start_year, calibration_start_year, calibration_end_year, periodicity): """ Fit values to a gamma distribution and transform the values to corresponding normalized sigmas. :param values: 2-D array of values, with each row typically representing a year containing twelve columns representing the respective calendar months, or 366 days per column as if all years were leap years :param data_start_year: the initial year of the input values array :param calibration_start_year: the initial year to use for the calibration period :param calibration_end_year: the final year to use for the calibration period :param periodicity: the type of time series represented by the input data, valid values are 'monthly' or 'daily' 'monthly': array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily': array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return: 2-D array of transformed/fitted values, corresponding in size and shape of the input array :rtype: numpy.ndarray of floats """ # if we're passed all missing values then we can't compute anything, return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all( np.isnan(values)): return values # validate (and possibly reshape) the input array if len(values.shape) == 1: if periodicity is None: message = '1-D input array requires a corresponding periodicity argument, none provided' _logger.error(message) raise ValueError(message) elif periodicity is Periodicity.monthly: # we've been passed a 1-D array with shape (months), reshape it to 2-D with shape (years, 12) values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: # we've been passed a 1-D array with shape (days), reshape it to 2-D with shape (years, 366) values = utils.reshape_to_2d(values, 366) else: message = 'Unsupported periodicity argument: \'{0}\''.format( periodicity) _logger.error(message) raise ValueError(message) elif (len(values.shape) != 2) or (values.shape[1] != 12 and values.shape[1] != 366): # neither a 1-D nor a 2-D array with valid shape was passed in message = 'Invalid input array with shape: {0}'.format(values.shape) _logger.error(message) raise ValueError(message) # find the percentage of zero values for each time step zeros = (values == 0).sum(axis=0) probabilities_of_zero = zeros / values.shape[0] # replace zeros with NaNs values[values == 0] = np.NaN # determine the end year of the values array data_end_year = data_start_year + values.shape[0] # make sure that we have data within the full calibration period, otherwise use the full period of record if (calibration_start_year < data_start_year) or (calibration_end_year > data_end_year): _logger.info( 'Insufficient data for the specified calibration period ({0}-{1}),' .format(calibration_start_year, calibration_end_year) + ' instead using the full period of record ({0}-{1})'.format( data_start_year, data_end_year)) calibration_start_year = data_start_year calibration_end_year = data_end_year # get the year axis indices corresponding to the calibration start and end years calibration_begin_index = (calibration_start_year - data_start_year) calibration_end_index = (calibration_end_year - data_start_year) + 1 # get the values for the current calendar time step that fall within the calibration years period calibration_values = values[ calibration_begin_index:calibration_end_index, :] # compute the gamma distribution's shape and scale parameters, alpha and beta # TODO explain this better means = np.nanmean(calibration_values, axis=0) log_means = np.log(means) logs = np.log(calibration_values) mean_logs = np.nanmean(logs, axis=0) a = log_means - mean_logs alphas = (1 + np.sqrt(1 + 4 * a / 3)) / (4 * a) betas = means / alphas # find the gamma probability values using the gamma CDF gamma_probabilities = scipy.stats.gamma.cdf(values, a=alphas, scale=betas) # TODO explain this # (normalize including the probability of zero, putting into the range [0..1]?) probabilities = probabilities_of_zero + ( (1 - probabilities_of_zero) * gamma_probabilities) # the values we'll return are the values at which the probabilities of a normal distribution # are less than or equal to the computed probabilities, as determined by the normal distribution's # quantile (or inverse cumulative distribution) function return scipy.stats.norm.ppf(probabilities)
def transform_fitted_pearson(values, data_start_year, calibration_start_year, calibration_end_year, periodicity): """ Fit values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas. :param values: 2-D array of values, with each row representing a year containing twelve columns representing the respective calendar months, or 366 columns representing days as if all years were leap years :param data_start_year: the initial year of the input values array :param calibration_start_year: the initial year to use for the calibration period :param calibration_end_year: the final year to use for the calibration period :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return: 2-D array of transformed/fitted values, corresponding in size and shape of the input array :rtype: numpy.ndarray of floats """ # if we're passed all missing values then we can't compute anything, return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all( np.isnan(values)): return values # validate (and possibly reshape) the input array if len(values.shape) == 1: if periodicity is None: message = '1-D input array requires a corresponding periodicity argument, none provided' _logger.error(message) raise ValueError(message) elif periodicity is Periodicity.monthly: # we've been passed a 1-D array with shape (months), reshape it to 2-D with shape (years, 12) values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: # we've been passed a 1-D array with shape (days), reshape it to 2-D with shape (years, 366) values = utils.reshape_to_2d(values, 366) else: message = 'Unsupported periodicity argument: \'{0}\''.format( periodicity) _logger.error(message) raise ValueError(message) elif (len(values.shape) != 2) or ((values.shape[1] != 12) and (values.shape[1] != 366)): # neither a 1-D nor a 2-D array with valid shape was passed in message = 'Invalid input array with shape: {0}'.format(values.shape) _logger.error(message) raise ValueError(message) # determine the end year of the values array data_end_year = data_start_year + values.shape[0] # make sure that we have data within the full calibration period, otherwise use the full period of record if (calibration_start_year < data_start_year) or (calibration_end_year > data_end_year): _logger.info( 'Insufficient data for the specified calibration period ({0}-{1}),' .format(calibration_start_year, calibration_end_year) + ' instead using the full period of record ({0}-{1})'.format( data_start_year, data_end_year)) calibration_start_year = data_start_year calibration_end_year = data_end_year # get the year axis indices corresponding to the calibration start and end years calibration_begin_index = (calibration_start_year - data_start_year) calibration_end_index = (calibration_end_year - data_start_year) + 1 # get the values for the current calendar time step that fall within the calibration years period calibration_values = values[ calibration_begin_index:calibration_end_index, :] # compute the values we'll use to fit to the Pearson Type III distribution pearson_values = _pearson3_fitting_values(calibration_values) pearson_param_1 = pearson_values[1] # first Pearson Type III parameter pearson_param_2 = pearson_values[2] # second Pearson Type III parameter pearson_param_3 = pearson_values[3] # third Pearson Type III parameter probability_of_zero = pearson_values[0] # fit each value using the Pearson Type III fitting universal function in a broadcast fashion fitted_values = _pearson_fit_ufunc(values, pearson_param_1, pearson_param_2, pearson_param_3, probability_of_zero) return fitted_values
def test_reshape_to_2d(self): ''' Test for the utils.reshape_to_2d() function ''' # an array of monthly values values_1d = np.array([ 3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2, 1, 3, 5, 8, 5, 6 ]) # the expected rearrangement of the above values from 1-D to 2-D if using 12 as the second axis size values_2d_by_12_expected = np.array([ [3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4], [6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2], [1, 3, 5, 8, 5, 6, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN] ]) # exercise the function values_2d_reshaped = utils.reshape_to_2d(values_1d, 12) # verify that the function performed as expected np.testing.assert_equal( values_2d_by_12_expected, values_2d_reshaped, 'Not rearranging the 1-D array into 2-D year increments of 12 as expected' ) # the expected rearrangement of the above values from 1-D to 2-D if using 8 as the second axis size values_2d_by_8_expected = np.array([[3, 4, 6, 2, 1, 3, 5, 8], [5, 6, 3, 4, 6, 2, 1, 3], [5, 8, 5, 6, 3, 4, 6, 2], [1, 3, 5, 8, 5, 6, np.NaN, np.NaN]]) # exercise the function values_2d_reshaped = utils.reshape_to_2d(values_1d, 8) # verify that the function performed as expected np.testing.assert_equal( values_2d_by_8_expected, values_2d_reshaped, 'Not rearranging the 1-D array into 2-D increments of 8 as expected' ) # a 3-D array that should be returned as-is if using 12 as the second axis size values_2d = np.array([[3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4], [6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2], [1, 3, 5, 8, 5, 6, 3, 5, 1, 2, 8, 4]]) # exercise the function values_2d_reshaped = utils.reshape_to_2d(values_2d, 12) # verify that the function performed as expected np.testing.assert_equal( values_2d, values_2d_reshaped, 'Not returning an already valid 2-D array as expected') # a 2-D array that's in an invalid shape for the function values_2d = np.array([[3, 4, 6, 2, 1, 3, 5, 3, 4], [6, 2, 1, 3, 5, 8, 5, 6, 2], [1, 3, 5, 8, 5, 6, 3, 8, 4]]) # make sure that the function croaks with a ValueError when expected np.testing.assert_raises(ValueError, utils.reshape_to_2d, values_2d, 12) np.testing.assert_raises(ValueError, utils.reshape_to_2d, values_2d.reshape((3, 3, 3)), 6)
def spi( values: np.ndarray, scale: int, distribution: Distribution, data_start_year: int, calibration_year_initial: int, calibration_year_final: int, periodicity: compute.Periodicity, fitting_params: Dict = None, ) -> np.ndarray: """ Computes SPI (Standardized Precipitation Index). :param values: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the periodicity is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param distribution: distribution type to be used for the internal fitting/transform computation :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :param fitting_params: optional dictionary of pre-computed distribution fitting parameters, if the distribution is gamma then this dict should contain two arrays, keyed as "alphas" and "betas", and if the distribution is Pearson then this dict should contain four arrays keyed as "probabilities_of_zero", "locs", "scales", and "skews" :return SPI values fitted to the gamma distribution at the specified time step scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values """ # we expect to operate upon a 1-D array, so if we've been passed a 2-D array # then we flatten it, otherwise raise an error shape = values.shape if len(shape) == 2: values = values.flatten() elif len(shape) != 1: message = "Invalid shape of input array: {shape}".format(shape=shape) + \ " -- only 1-D and 2-D arrays are supported" _logger.error(message) raise ValueError(message) # if we're passed all missing values then we can't compute # anything, so we return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all( np.isnan(values)): return values # clip any negative values to zero if np.amin(values) < 0.0: _logger.warn( "Input contains negative values -- all negatives clipped to zero") values = np.clip(values, a_min=0.0, a_max=None) # remember the original length of the array, in order to facilitate # returning an array of the same size original_length = values.size # get a sliding sums array, with each time step's value scaled # by the specified number of time steps values = compute.sum_to_scale(values, scale) # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is compute.Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is compute.Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) if distribution is Distribution.gamma: # get (optional) fitting parameters if provided if fitting_params is not None: alphas = fitting_params["alpha"] betas = fitting_params["beta"] else: alphas = None betas = None # fit the scaled values to a gamma distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_gamma( values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, alphas, betas, ) elif distribution is Distribution.pearson: # get (optional) fitting parameters if provided if fitting_params is not None: probabilities_of_zero = fitting_params["prob_zero"] locs = fitting_params["loc"] scales = fitting_params["scale"] skews = fitting_params["skew"] else: probabilities_of_zero = None locs = None scales = None skews = None # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_pearson( values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, probabilities_of_zero, locs, scales, skews, ) else: message = "Unsupported distribution argument: " + \ "{dist}".format(dist=distribution) _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def pearson_parameters( values: np.ndarray, data_start_year: int, calibration_start_year: int, calibration_end_year: int, periodicity: Periodicity, ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray): """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :param periodicity: monthly or daily :return: four 1-D array of fitting values for the Pearson Type III distribution, with shape (12,) for monthly or (366,) for daily returned array 1: probability of zero returned array 2: first Pearson Type III distribution parameter (loc) returned array 3 :second Pearson Type III distribution parameter (scale) returned array 4: third Pearson Type III distribution parameter (skew) """ # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # determine the end year of the values array data_end_year = data_start_year + values.shape[0] # make sure that we have data within the full calibration period, # otherwise use the full period of record if (calibration_start_year < data_start_year) or \ (calibration_end_year > data_end_year): calibration_start_year = data_start_year calibration_end_year = data_end_year # get the year axis indices corresponding to # the calibration start and end years calibration_begin_index = calibration_start_year - data_start_year calibration_end_index = (calibration_end_year - data_start_year) + 1 # get the values for the current calendar time step # that fall within the calibration years period calibration_values = values[ calibration_begin_index:calibration_end_index, :] # the values we'll compute and return probabilities_of_zero = np.zeros((time_steps_per_year, )) locs = np.zeros((time_steps_per_year, )) scales = np.zeros((time_steps_per_year, )) skews = np.zeros((time_steps_per_year, )) # compute the probability of zero and Pearson # parameters for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = calibration_values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros continue # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have # more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # get the Pearson Type III parameters for this time # step's values within the calibration period params = lmoments.fit(time_step_values) probabilities_of_zero[time_step_index] = probability_of_zero locs[time_step_index] = params["loc"] scales[time_step_index] = params["scale"] skews[time_step_index] = params["skew"] return probabilities_of_zero, locs, scales, skews
def spi_pearson(precips, scale, data_start_year, calibration_year_initial, calibration_year_final, time_series_type): ''' Computes SPI using a fitting to the Pearson Type III distribution. :param precips: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the time series type is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param time_series_type: the type of time series represented by the input data, valid values are 'monthly' or 'daily' 'monthly': array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily': array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return SPI values fitted to the Pearson Type III distribution at the specified time scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values ''' # remember the original length of the array, in order to facilitate returning an array of the same size original_length = precips.size # get a sliding sums array, with each time step's value scaled by the specified number of time steps scaled_precips = compute.sum_to_scale(precips, scale) # reshape precipitation values to (years, 12) for monthly, or to (years, 366) for daily (representing all years as leap) if time_series_type == 'monthly': scaled_precips = utils.reshape_to_2d(scaled_precips, 12) elif time_series_type == 'daily': scaled_precips = utils.reshape_to_2d(scaled_precips, 366) else: raise ValueError('Invalid time series type argument: %s' % time_series_type) # fit the scaled values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas # transformed_fitted_values = compute.transform_fitted_pearson_new(scaled_precips, # data_start_year, # calibration_year_initial, # calibration_year_final) transformed_fitted_values = compute.transform_fitted_pearson(scaled_precips, data_start_year, calibration_year_initial, calibration_year_final, time_series_type) # clip values to within the valid range, reshape the array back to 1-D spi = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return spi[0:original_length]