def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False): """ Performs the Spatial Test on the Forecast using the Observed Catalogs. Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases transparency as no assumptions are being made about the length of the forecasts. This is particularly important for gridded forecasts that supply their forecasts as rates. Args: gridded_forecast: csep.core.forecasts.GriddedForecast observed_catalog: csep.core.catalogs.Catalog num_simulations (int): number of simulations used to compute the quantile score seed (int): used fore reproducibility, and testing random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing. Returns: evaluation_result: csep.core.evaluations.EvaluationResult """ gridded_catalog_data = observed_catalog.spatial_counts() # simply call likelihood test on catalog data and forecast qs, obs_ll, simulated_ll = _poisson_likelihood_test( gridded_forecast.spatial_counts(), gridded_catalog_data, num_simulations=num_simulations, seed=seed, random_numbers=random_numbers, use_observed_counts=True, verbose=verbose, normalize_likelihood=True) # populate result data structure result = EvaluationResult() result.test_distribution = simulated_ll result.name = 'Poisson S-Test' result.observed_statistic = obs_ll result.quantile = qs result.sim_name = gridded_forecast.name result.obs_name = observed_catalog.name result.status = 'normal' try: result.min_mw = numpy.min(gridded_forecast.magnitudes) except AttributeError: result.min_mw = -1 return result
def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False): """ Calculate the Single Sample Wilcoxon signed-rank test between two gridded forecasts. This test allows to test the null hypothesis that the median of Sample (X1(i)-X2(i)) is equal to a (N1-N2) / N_obs. where, N1, N2 = Sum of expected values of Forecast_1 and Forecast_2, respectively. The Wilcoxon signed-rank test tests the null hypothesis that difference of Xi and Yi come from the same distribution. In particular, it tests whether the distribution of the differences is symmetric around given mean. Parameters Args: gridded_forecast1: Forecast of a model_1 (Grided) (Numpy Array) A forecast has to be in terms of Average Number of Events in Each Bin It can be anything greater than zero gridded_forecast2: Forecast of model_2 (Grided) (Numpy Array) A forecast has to be in terms of Average Number of Events in Each Bin It can be anything greater than zero observation: Observed (Grided) seismicity (Numpy Array): An Observation has to be observed seismicity in each Bin It has to be a either zero or positive integer only (No Floating Point) Returns out: csep.core.evaluations.EvaluationResult """ # needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different # for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts. target_event_rate_forecast1, _ = gridded_forecast1.target_event_rates(observed_catalog, scale=scale) target_event_rate_forecast2, _ = gridded_forecast2.target_event_rates(observed_catalog, scale=scale) N = observed_catalog.event_count # Sum of all the observed earthquakes N1 = gridded_forecast1.event_count # Total number of Forecasted earthquakes by Model 1 N2 = gridded_forecast2.event_count # Total number of Forecasted earthquakes by Model 2 X1 = numpy.log(target_event_rate_forecast1) # Log of every element of Forecast 1 X2 = numpy.log(target_event_rate_forecast2) # Log of every element of Forecast 2 # this ratio is the same as long as we scale all the forecasts and catalog rates by the same value median_value = (N1 - N2) / N diff = X1 - X2 # w_test is One Sample Wilcoxon Signed Rank Test. It accepts the data only in 1D array. x = diff.ravel() # Converting 2D Difference to 1D w_test_dic = _w_test_ndarray(x, median_value) # configure test result result = EvaluationResult() result.name = 'W-Test' result.test_distribution = 'normal' result.observed_statistic = w_test_dic['z_statistic'] result.quantile = w_test_dic['probability'] result.sim_name = (gridded_forecast1.name, gridded_forecast2.name) result.obs_name = observed_catalog.name result.status = 'normal' result.min_mw = numpy.min(gridded_forecast1.magnitudes) return result
def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False): """Performs the conditional likelihood test on Gridded Forecast using an Observed Catalog. This test normalizes the forecast so the forecasted rate are consistent with the observations. This modification eliminates the strong impact differences in the number distribution have on the forecasted rates. Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases transparency as no assumptions are being made about the length of the forecasts. This is particularly important for gridded forecasts that supply their forecasts as rates. Args: gridded_forecast: csep.core.forecasts.GriddedForecast observed_catalog: csep.core.catalogs.Catalog num_simulations (int): number of simulations used to compute the quantile score seed (int): used fore reproducibility, and testing random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing. Returns: evaluation_result: csep.core.evaluations.EvaluationResult """ # grid catalog onto spatial grid try: _ = observed_catalog.region.magnitudes except CSEPCatalogException: observed_catalog.region = gridded_forecast.region gridded_catalog_data = observed_catalog.spatial_magnitude_counts() # simply call likelihood test on catalog data and forecast qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data, gridded_catalog_data, num_simulations=num_simulations, seed=seed, random_numbers=random_numbers, use_observed_counts=True, verbose=verbose) # populate result data structure result = EvaluationResult() result.test_distribution = simulated_ll result.name = 'Poisson CL-Test' result.observed_statistic = obs_ll result.quantile = qs result.sim_name = gridded_forecast.name result.obs_name = observed_catalog.name result.status = 'normal' result.min_mw = numpy.min(gridded_forecast.magnitudes) return result
def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, scale=False): """ Computes the t-test for gridded earthquake forecasts. This score is positively oriented, meaning that positive values of the information gain indicate that the forecast is performing better than the benchmark forecast. Args: forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude column benchmark_forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude column observed_catalog (csep.core.catalogs.AbstractBaseCatalog): number of observed earthquakes, should be whole number and >= zero. alpha (float): tolerance level for the type-i error rate of the statistical test scale (bool): if true, scale forecasted rates down to a single day Returns: evaluation_result: csep.core.evaluations.EvaluationResult """ # needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different # for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts. target_event_rate_forecast1, n_fore1 = forecast.target_event_rates( observed_catalog, scale=scale) target_event_rate_forecast2, n_fore2 = benchmark_forecast.target_event_rates( observed_catalog, scale=scale) # call the primative version operating on ndarray out = _t_test_ndarray(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count, n_fore1, n_fore2, alpha=alpha) # storing this for later result = EvaluationResult() result.name = 'Paired T-Test' result.test_distribution = (out['ig_lower'], out['ig_upper']) result.observed_statistic = out['information_gain'] result.quantile = (out['t_statistic'], out['t_critical']) result.sim_name = (forecast.name, benchmark_forecast.name) result.obs_name = observed_catalog.name result.status = 'normal' result.min_mw = numpy.min(forecast.magnitudes) return result
def number_test(gridded_forecast, observed_catalog): """Computes "N-Test" on a gridded forecast. author: @asim Computes Number (N) test for Observed and Forecasts. Both data sets are expected to be in terms of event counts. We find the Total number of events in Observed Catalog and Forecasted Catalogs. Which are then employed to compute the probablities of (i) At least no. of events (delta 1) (ii) At most no. of events (delta 2) assuming the poissonian distribution. Args: observation: Observed (Gridded) seismicity (Numpy Array): An Observation has to be Number of Events in Each Bin It has to be a either zero or positive integer only (No Floating Point) forecast: Forecast of a Model (Gridded) (Numpy Array) A forecast has to be in terms of Average Number of Events in Each Bin It can be anything greater than zero Returns: out (tuple): (delta_1, delta_2) """ result = EvaluationResult() # observed count obs_cnt = observed_catalog.event_count # forecasts provide the expeceted number of events during the time horizon of the forecast fore_cnt = gridded_forecast.event_count epsilon = 1e-6 # stores the actual result of the number test delta1, delta2 = _number_test_ndarray(fore_cnt, obs_cnt, epsilon=epsilon) # store results result.test_distribution = ('poisson', fore_cnt) result.name = 'Poisson N-Test' result.observed_statistic = obs_cnt result.quantile = (delta1, delta2) result.sim_name = gridded_forecast.name result.obs_name = observed_catalog.name result.status = 'normal' result.min_mw = numpy.min(gridded_forecast.magnitudes) return result