def test_GIVEN_divide_by_zeros_WHEN_rel_stddev_THEN_rel_stddev_correct(self): # Zeros in the first array cause undefined values in the relative calculations. data_with_zeros = self.data1 data_with_zeros.data[0][1] = 0 stats = StatsAnalyzer(data_with_zeros, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(5.8725242578, 1e-5))
def test_GIVEN_missing_values_WHEN_analyze_THEN_original_data_unchanged(self): # We perform some manipulation on the data masks, but we don't want the # original data to be changed. stats = StatsAnalyzer(self.missing1, self.missing2) results = stats.analyze() assert_that(len(self.missing1.data.compressed()), is_(7)) assert_that(len(self.missing2.data.compressed()), is_(7))
def test_GIVEN_divide_by_zeros_WHEN_rel_mean_THEN_rel_mean_correct(self): # Zeros in the first array cause undefined values in the relative calculations. data_with_zeros = self.data1 data_with_zeros.data[0][1] = 0 stats = StatsAnalyzer(data_with_zeros, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-2.3518849206, 1e-5))
def test_GIVEN_divide_by_zeros_WHEN_rel_mean_THEN_rel_mean_correct(self): # Zeros in the first array cause undefined values in the relative calculations. data_with_zeros = self.data1 data_with_zeros.data[0][1] = 0 stats = StatsAnalyzer(data_with_zeros, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-2.3518849206, 1e-5))
def stats_cmd(main_arguments): """ Main routine for handling calls to the statistics command. :param main_arguments: The command line arguments (minus the stats command) """ from cis.stats import StatsAnalyzer from cis.data_io.gridded_data import GriddedDataList data_reader = DataReader() data_list = data_reader.read_datagroups(main_arguments.datagroups) analyzer = StatsAnalyzer(*data_list) results = analyzer.analyze() header = "RESULTS OF STATISTICAL COMPARISON:" note = "Compared all points which have non-missing values in both variables" header_length = max(len(header), len(note)) print(header_length * '=') print(header) print(header_length * '-') print(note) print(header_length * '=') for result in results: print(result.pprint()) if main_arguments.output: cubes = GriddedDataList([result.as_cube() for result in results]) variables = [] filenames = [] for datagroup in main_arguments.datagroups: variables.extend(datagroup['variables']) filenames.extend(datagroup['filenames']) history = "Statistical comparison performed using CIS version " + __version__ + \ "\n variables: " + str(variables) + \ "\n from files: " + str(set(filenames)) cubes.add_history(history) cubes.save_data(main_arguments.output)
def test_GIVEN_missing_values_WHEN_analyze_THEN_original_data_unchanged( self): # We perform some manipulation on the data masks, but we don't want the # original data to be changed. stats = StatsAnalyzer(self.missing1, self.missing2) results = stats.analyze() assert_that(len(self.missing1.data.compressed()), is_(7)) assert_that(len(self.missing2.data.compressed()), is_(7))
def test_GIVEN_divide_by_zeros_WHEN_rel_stddev_THEN_rel_stddev_correct( self): # Zeros in the first array cause undefined values in the relative calculations. data_with_zeros = self.data1 data_with_zeros.data[0][1] = 0 stats = StatsAnalyzer(data_with_zeros, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(5.8725242578, 1e-5))
def test_GIVEN_flattened_and_unflattened_datasets_WHEN_analyze_THEN_StatisticsResults_returned(self): data1 = mock.make_regular_2d_ungridded_data() data2 = mock.make_regular_2d_ungridded_data() data2._data = data2.data_flattened for coord in data2.coords(): coord._data = coord.data_flattened stats = StatsAnalyzer(data1, data2) results = stats.analyze() assert_that(len(results), is_(14))
def test_GIVEN_missing_vals_WHEN_lin_regression_THEN_regression_correct( self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.linear_regression() expected_res = [ 1.1920369653, -0.6908343017, 0.999845219, 0.0104877890357 ] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_no_missing_vals_WHEN_lin_regression_THEN_regression_correct( self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.linear_regression() expected_res = [ 0.9912730184, 0.1345076061, 0.997485722, 0.0248994694107 ] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_one_masked_one_nparray_WHEN_lin_regression_THEN_regression_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.linear_regression() expected_res = [ -5.1404761905, 12.3595238095, -0.4079085869, 5.14561290806 ] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_flattened_and_unflattened_datasets_WHEN_analyze_THEN_StatisticsResults_returned( self): data1 = mock.make_regular_2d_ungridded_data() data2 = mock.make_regular_2d_ungridded_data() data2._data = data2.data_flattened for coord in data2.coords(): coord._data = coord.data_flattened stats = StatsAnalyzer(data1, data2) results = stats.analyze() assert_that(len(results), is_(14))
def test_GIVEN_missing_vals_WHEN_spearman_THEN_spearman_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(0.9428571429, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_rel_stddev_THEN_rel_stddev_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(0.1097392069, 1e-5))
def test_GIVEN_missing_vals_WHEN_rel_stddev_THEN_rel_stddev_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(0.1930820326, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_rel_stddev_THEN_rel_stddev_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(5.4371807462, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_abs_stddev_THEN_abs_stddev_correct( self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(0.2643650675, 1e-5))
def test_GIVEN_missing_vals_WHEN_abs_mean_THEN_abs_mean_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.abs_mean() assert_that(res[0].abs_mean, close_to(-3.2833333333, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_lin_regression_THEN_regression_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.linear_regression() expected_res = [-5.1404761905, 12.3595238095, -0.4079085869, 5.14561290806] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_missing_vals_WHEN_lin_regression_THEN_regression_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.linear_regression() expected_res = [1.1920369653, -0.6908343017, 0.999845219, 0.0104877890357] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_no_missing_vals_WHEN_lin_regression_THEN_regression_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.linear_regression() expected_res = [0.9912730184, 0.1345076061, 0.997485722, 0.0248994694107] actual_res = res[0].grad, res[1].intercept, res[2].r, res[3].stderr assert_that(np.allclose(actual_res, expected_res))
def test_GIVEN_one_masked_one_nparray_WHEN_spearman_THEN_spearman_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(0.2142857143, 1e-5))
def test_GIVEN_missing_vals_WHEN_spearman_THEN_spearman_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(0.9428571429, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_spearman_THEN_spearman_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(1.0, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_rel_mean_THEN_rel_mean_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-0.0153531746, 1e-5))
def test_GIVEN_datasets_WHEN_analyze_THEN_StatisticsResults_returned(self): stats = StatsAnalyzer(self.data1, self.data2) results = stats.analyze() assert_that(len(results), is_(14))
def test_GIVEN_one_masked_one_nparray_WHEN_abs_stddev_THEN_abs_stddev_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(48.7984582114, 1e-5))
def test_GIVEN_missing_vals_WHEN_rel_mean_THEN_rel_mean_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(0.0715277778, 1e-5))
def test_GIVEN_missing_vals_WHEN_rel_mean_THEN_rel_mean_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(0.0715277778, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_abs_mean_THEN_abs_mean_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.abs_mean() assert_that(res[0].abs_mean, close_to(-18.3428571429, 1e-5))
def test_GIVEN_missing_vals_WHEN_abs_stddev_THEN_abs_stddev_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(8.2120440005, 1e-5))
def test_GIVEN_missing_vals_WHEN_abs_stddev_THEN_abs_stddev_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(8.2120440005, 1e-5))
def test_GIVEN_missing_vals_WHEN_rel_stddev_THEN_rel_stddev_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(0.1930820326, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_rel_mean_THEN_rel_mean_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-0.0153531746, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_abs_stddev_THEN_abs_stddev_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(48.7984582114, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_rel_mean_THEN_rel_mean_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-2.0087585034, 1e-5))
def test_GIVEN_missing_vals_WHEN_count_THEN_points_count_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.points_count() assert_that(res[0].num_points, is_(6))
def test_GIVEN_no_missing_vals_WHEN_rel_stddev_THEN_rel_stddev_correct( self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(0.1097392069, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_points_count_THEN_count_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.points_count() assert_that(res[0].num_points, is_(7))
def test_GIVEN_one_masked_one_nparray_WHEN_rel_stddev_THEN_rel_stddev_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.rel_stddev() assert_that(res[0].rel_stddev, close_to(5.4371807462, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_mean_THEN_mean_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.means() assert_that(res[0].mean, close_to(5.1, 1e-5)) assert_that(res[1].mean, close_to(5.19, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_spearman_THEN_spearman_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(1.0, 1e-5))
def test_GIVEN_missing_vals_WHEN_count_THEN_mean_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.means() assert_that(res[0].mean, close_to(-13.5, 1e-5)) assert_that(res[1].mean, close_to(-16.783333333, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_spearman_THEN_spearman_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.spearmans_rank() assert_that(res[0].spearman, close_to(0.2142857143, 1e-5))
def test_GIVEN_missing_vals_WHEN_count_THEN_points_count_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.points_count() assert_that(res[0].num_points, is_(6))
def test_GIVEN_no_missing_vals_WHEN_mean_THEN_mean_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.means() assert_that(res[0].mean, close_to(5.1, 1e-5)) assert_that(res[1].mean, close_to(5.19, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_abs_stddev_THEN_abs_stddev_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.abs_stddev() assert_that(res[0].abs_stddev, close_to(0.2643650675, 1e-5))
def test_GIVEN_datasets_WHEN_analyze_THEN_StatisticsResults_returned(self): stats = StatsAnalyzer(self.data1, self.data2) results = stats.analyze() assert_that(len(results), is_(14))
def test_GIVEN_missing_vals_WHEN_abs_mean_THEN_abs_mean_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.abs_mean() assert_that(res[0].abs_mean, close_to(-3.2833333333, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_stddev_THEN_stddev_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.stddevs() assert_that(res[0].stddev, close_to(3.7252889523, 1e-5)) assert_that(res[1].stddev, close_to(3.7020864988, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_rel_mean_THEN_rel_mean_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.rel_mean() assert_that(res[0].rel_mean, close_to(-2.0087585034, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_points_count_THEN_count_correct( self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.points_count() assert_that(res[0].num_points, is_(7))
def test_GIVEN_missing_vals_WHEN_stddev_THEN_stddev_correct(self): stats = StatsAnalyzer(self.missing1, self.missing2) res = stats.stddevs() assert_that(res[0].stddev, close_to(42.5099988238, 1e-5)) assert_that(res[1].stddev, close_to(50.6813344997, 1e-5))
def test_GIVEN_no_missing_vals_WHEN_abs_mean_THEN_abs_mean_correct(self): stats = StatsAnalyzer(self.data1, self.data2) res = stats.abs_mean() assert_that(res[0].abs_mean, close_to(0.09, 1e-5))
def test_GIVEN_one_masked_one_nparray_WHEN_abs_mean_THEN_abs_mean_correct(self): stats = StatsAnalyzer(self.data1, self.missing2) res = stats.abs_mean() assert_that(res[0].abs_mean, close_to(-18.3428571429, 1e-5))