def test_count_zeros_and_non_missings(self): ''' Test for the utils.count_zeros_and_non_missings() function ''' # vanilla use case values_list = [ 3, 4, 0, 2, 3.1, 5, np.NaN, 8, 5, 6, 0.0, np.NaN, 5.6, 2 ] values = np.array(values_list) zeros, non_missings = utils.count_zeros_and_non_missings(values) self.assertEqual(zeros, 2, 'Failed to correctly count zero values') self.assertEqual(non_missings, 12, 'Failed to correctly count non-missing values') # test with lists values = values_list zeros, non_missings = utils.count_zeros_and_non_missings(values) self.assertEqual(zeros, 2, 'Failed to correctly count zero values') self.assertEqual(non_missings, 12, 'Failed to correctly count non-missing values') values = [[3, 4, 0, 2, 3.1, 5, np.NaN], [8, 5, 6, 0.0, np.NaN, 5.6, 2]] zeros, non_missings = utils.count_zeros_and_non_missings(values) self.assertEqual(zeros, 2, 'Failed to correctly count zero values') self.assertEqual(non_missings, 12, 'Failed to correctly count non-missing values') # using a list that can't be converted into an array should result in a TypeError values = [1, 2, 3, 0, 'abcxyz'] np.testing.assert_raises(TypeError, utils.count_zeros_and_non_missings, values)
def test_count_zeros_and_non_missings(): # Test for the utils.count_zeros_and_non_missings() function # vanilla use case values_list = [3, 4, 0, 2, 3.1, 5, np.NaN, 8, 5, 6, 0.0, np.NaN, 5.6, 2] values = np.array(values_list) zeros, non_missings = utils.count_zeros_and_non_missings(values) if zeros != 2: raise AssertionError("Failed to correctly count zero values") if non_missings != 12: raise AssertionError("Failed to correctly count non-missing values") # test with lists values = values_list zeros, non_missings = utils.count_zeros_and_non_missings(values) if zeros != 2: raise AssertionError("Failed to correctly count zero values") if non_missings != 12: raise AssertionError("Failed to correctly count non-missing values") values = [[3, 4, 0, 2, 3.1, 5, np.NaN], [8, 5, 6, 0.0, np.NaN, 5.6, 2]] zeros, non_missings = utils.count_zeros_and_non_missings(values) if zeros != 2: raise AssertionError("Failed to correctly count zero values") if non_missings != 12: raise AssertionError("Failed to correctly count non-missing values") # using a list that can't be converted into an array should result in a TypeError values = [1, 2, 3, 0, "abcxyz"] np.testing.assert_raises(TypeError, utils.count_zeros_and_non_missings, values)
def _probability_of_zero(values: np.ndarray, ) -> np.ndarray: """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :return: a 1-D array of probability of zero values, with shape (12,) for monthly or (366,) for daily """ # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: # determine the number of time steps per year # (we expect 12 for monthly, 366 for daiy) time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # the values we'll compute and return probabilities_of_zero = np.zeros((time_steps_per_year, )) # compute the probability of zero for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # calculate the probability of zero for the calendar time step if (number_of_zeros > 0) and (number_of_non_missing > 0): probabilities_of_zero[ time_step_index] = number_of_zeros / number_of_non_missing else: # fill with NaN probabilities_of_zero[time_step_index] = np.NaN return probabilities_of_zero
def _pearson3_fitting_values(values: np.ndarray, ) -> np.ndarray: """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :return: a 2-D array of fitting values for the Pearson Type III distribution, with shape (4, 12) for monthly or (4, 366) for daily returned_array[0] == probability of zero for each of the calendar time steps returned_array[1] == the first Pearson Type III distribution parameter for each of the calendar time steps returned_array[2] == the second Pearson Type III distribution parameter for each of the calendar time steps returned_array[3] == the third Pearson Type III distribution parameter for each of the calendar time steps """ # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # the values we'll compute and return fitting_values = np.zeros((4, time_steps_per_year)) # compute the probability of zero and Pearson # parameters for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros return fitting_values # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have # more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # # remove NaN values from the array, as this invalidates # # the calculation within the lmoments fitting function # time_step_values = time_step_values[~np.isnan(time_step_values)] # get the Pearson Type III parameters for this time # step's values within the calibration period params = lmoments.fit(time_step_values) fitting_values[0, time_step_index] = probability_of_zero fitting_values[1, time_step_index] = params["loc"] fitting_values[2, time_step_index] = params["scale"] fitting_values[3, time_step_index] = params["skew"] return fitting_values
def _pearson3_fitting_values(values): """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :return: a 2-D array of fitting values for the Pearson Type III distribution, with shape (4, 12) for monthly or (4, 366) for daily returned_array[0] == probability of zero for each of the calendar time steps returned_array[1] == the first Pearson Type III distribution parameter for each of the calendar time steps returned_array[2] == the second Pearson Type III distribution parameter for each of the calendar time steps returned_array[3] == the third Pearson Type III distribution parameter for each of the calendar time steps """ # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = 'Invalid shape of input data array: {0}'.format(values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = 'Invalid shape of input data array: {0}'.format( values.shape) _logger.error(message) raise ValueError(message) # the values we'll compute and return fitting_values = np.zeros((4, time_steps_per_year)) # compute the probability of zero and Pearson parameters for each calendar time step #TODO vectorize the below loop? create a @numba.vectorize() ufunc for application over the second axis of the values for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = utils.count_zeros_and_non_missings( time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros return fitting_values # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # estimate the L-moments of the calibration values lmoments = _estimate_lmoments(time_step_values) # if we have valid L-moments then we can proceed, otherwise # the fitting values for the time step will be all zeros if (lmoments[1] > 0.0) and (abs(lmoments[2]) < 1.0): # get the Pearson Type III parameters for the time step, based on the L-moments pearson_parameters = _estimate_pearson3_parameters(lmoments) fitting_values[0, time_step_index] = probability_of_zero fitting_values[1, time_step_index] = pearson_parameters[0] fitting_values[2, time_step_index] = pearson_parameters[1] fitting_values[3, time_step_index] = pearson_parameters[2] # else: # # FIXME/TODO there must be a better way to handle this, and/or is this as irrelevant # # as swallowing the error here assumes? Do we get similar results using lmoments3 module? # # How does the comparable NCSU SPI code (Cumbie et al?) handle this? # _logger.warn('Due to invalid L-moments the Pearson fitting values ' # 'for time step {0} are defaulting to zero'.format(time_step_index)) return fitting_values
def pearson_parameters( values: np.ndarray, data_start_year: int, calibration_start_year: int, calibration_end_year: int, periodicity: Periodicity, ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray): """ This function computes the probability of zero and Pearson Type III distribution parameters corresponding to an array of values. :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding to the calendar months of that year, or 366 values corresponding to the days of the year (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years) and assuming that the first value of the array is January of the initial year for an input array of monthly values or Jan. 1st of initial year for an input array daily values :param periodicity: monthly or daily :return: four 1-D array of fitting values for the Pearson Type III distribution, with shape (12,) for monthly or (366,) for daily returned array 1: probability of zero returned array 2: first Pearson Type III distribution parameter (loc) returned array 3 :second Pearson Type III distribution parameter (scale) returned array 4: third Pearson Type III distribution parameter (skew) """ # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily if len(values.shape) != 2: message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) else: time_steps_per_year = values.shape[1] if (time_steps_per_year != 12) and (time_steps_per_year != 366): message = "Invalid shape of input data array: {shape}".format( shape=values.shape) _logger.error(message) raise ValueError(message) # determine the end year of the values array data_end_year = data_start_year + values.shape[0] # make sure that we have data within the full calibration period, # otherwise use the full period of record if (calibration_start_year < data_start_year) or \ (calibration_end_year > data_end_year): calibration_start_year = data_start_year calibration_end_year = data_end_year # get the year axis indices corresponding to # the calibration start and end years calibration_begin_index = calibration_start_year - data_start_year calibration_end_index = (calibration_end_year - data_start_year) + 1 # get the values for the current calendar time step # that fall within the calibration years period calibration_values = values[ calibration_begin_index:calibration_end_index, :] # the values we'll compute and return probabilities_of_zero = np.zeros((time_steps_per_year, )) locs = np.zeros((time_steps_per_year, )) scales = np.zeros((time_steps_per_year, )) skews = np.zeros((time_steps_per_year, )) # compute the probability of zero and Pearson # parameters for each calendar time step # TODO vectorize the below loop? create a @numba.vectorize() ufunc # for application over the second axis for time_step_index in range(time_steps_per_year): # get the values for the current calendar time step time_step_values = calibration_values[:, time_step_index] # count the number of zeros and valid (non-missing/non-NaN) values number_of_zeros, number_of_non_missing = \ utils.count_zeros_and_non_missings(time_step_values) # make sure we have at least four values that are both non-missing (i.e. non-NaN) # and non-zero, otherwise use the entire period of record if (number_of_non_missing - number_of_zeros) < 4: # we can't proceed, bail out using zeros continue # calculate the probability of zero for the calendar time step probability_of_zero = 0.0 if number_of_zeros > 0: probability_of_zero = number_of_zeros / number_of_non_missing # get the estimated L-moments, if we have # more than three non-missing/non-zero values if (number_of_non_missing - number_of_zeros) > 3: # get the Pearson Type III parameters for this time # step's values within the calibration period params = lmoments.fit(time_step_values) probabilities_of_zero[time_step_index] = probability_of_zero locs[time_step_index] = params["loc"] scales[time_step_index] = params["scale"] skews[time_step_index] = params["skew"] return probabilities_of_zero, locs, scales, skews