def _pearson3_fitting_values(values: np.ndarray, ) -> np.ndarray: """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :return: a 2-D array of fitting values for the Pearson Type III distribution, with shape (4, 12) for monthly or (4, 366) for daily returned_array[0] == probability of zero for each of the calendar time steps returned_array[1] == the first Pearson Type III distribution parameter for each of the calendar time steps returned_array[2] == the second Pearson Type III distribution parameter for each of the calendar time steps returned_array[3] == the third Pearson Type III distribution parameter for each of the calendar time steps """ # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # the values we'll compute and return fitting_values = np.zeros((4, time_steps_per_year)) # compute the probability of zero and Pearson # parameters for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros return fitting_values # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have # more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # # remove NaN values from the array, as this invalidates # # the calculation within the lmoments fitting function # time_step_values = time_step_values[~np.isnan(time_step_values)] # get the Pearson Type III parameters for this time # step's values within the calibration period params = lmoments.fit(time_step_values) fitting_values[0, time_step_index] = probability_of_zero fitting_values[1, time_step_index] = params["loc"] fitting_values[2, time_step_index] = params["scale"] fitting_values[3, time_step_index] = params["skew"] return fitting_values
def pearson_parameters( values: np.ndarray, data_start_year: int, calibration_start_year: int, calibration_end_year: int, periodicity: Periodicity, ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray): """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :param periodicity: monthly or daily :return: four 1-D array of fitting values for the Pearson Type III distribution, with shape (12,) for monthly or (366,) for daily returned array 1: probability of zero returned array 2: first Pearson Type III distribution parameter (loc) returned array 3 :second Pearson Type III distribution parameter (scale) returned array 4: third Pearson Type III distribution parameter (skew) """ # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # determine the end year of the values array data_end_year = data_start_year + values.shape[0] # make sure that we have data within the full calibration period, # otherwise use the full period of record if (calibration_start_year < data_start_year) or \ (calibration_end_year > data_end_year): calibration_start_year = data_start_year calibration_end_year = data_end_year # get the year axis indices corresponding to # the calibration start and end years calibration_begin_index = calibration_start_year - data_start_year calibration_end_index = (calibration_end_year - data_start_year) + 1 # get the values for the current calendar time step # that fall within the calibration years period calibration_values = values[ calibration_begin_index:calibration_end_index, :] # the values we'll compute and return probabilities_of_zero = np.zeros((time_steps_per_year, )) locs = np.zeros((time_steps_per_year, )) scales = np.zeros((time_steps_per_year, )) skews = np.zeros((time_steps_per_year, )) # compute the probability of zero and Pearson # parameters for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = calibration_values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros continue # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have # more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # get the Pearson Type III parameters for this time # step's values within the calibration period params = lmoments.fit(time_step_values) probabilities_of_zero[time_step_index] = probability_of_zero locs[time_step_index] = params["loc"] scales[time_step_index] = params["scale"] skews[time_step_index] = params["skew"] return probabilities_of_zero, locs, scales, skews