Exemple #1
0
 def test_error_metrics(self, mock_comparison_dataframe):
     accuracy = Accuracy(Mock(), Mock(), Mock(), Mock(), Mock(), Mock(),
                         Mock())
     accuracy.comparison_dataframe = self._mock_comparison_dataframe()
     self.assertEqual(accuracy.root_mean_squared_error(), (1.0, 1.0))
     self.assertListEqual(accuracy.root_squared_error().mean().tolist(),
                          [1.0, 1.0])
     self.assertListEqual(accuracy.absolute_pct_error().mean().tolist(),
                          [2.0, 0.66666666666666663])
Exemple #2
0
    def test_error_report(self, mock_comparison_datframe, mock_from_data_dir):
        accuracy = Accuracy(Mock(), Mock(), Mock(), Mock(), Mock(), Mock(),
                            Mock())
        accuracy.comparison_dataframe = self._mock_comparison_dataframe()
        accuracy.from_data_dir.return_value = accuracy

        state_puma = dict()
        state_puma['20'] = ['00500', '00602', '00604']
        state_puma['29'] = ['00901', '00902']

        expected_columns = ['marginal-pums', 'marginal-doppelganger']

        df_puma, df_variable, df_total =\
            accuracy.error_report(
                    state_puma, 'fake_dir',
                    marginal_variables=['num_people', 'num_vehicles', 'age'],
                    statistic=ErrorStat.ABSOLUTE_PCT_ERROR
                    )

        # Test df_total
        df_total_expected = pd.Series([2.00000, 0.666667],
                                      index=expected_columns)
        self.assertTrue(all((df_total - df_total_expected) < 1))

        # Test df_puma
        expected_puma_data = np.reshape([2.0, 2 / 3.0] * 5, (5, 2))
        df_expected_puma = pd.DataFrame(data=expected_puma_data,
                                        index=self._mock_state_puma(),
                                        columns=expected_columns)
        self.assertTrue((df_expected_puma == df_puma).all().all())

        # Test df_variable
        expected_variable_data = np.reshape([2.0, 2 / 3.0] * 12, (12, 2))
        df_expected_variable = pd.DataFrame(data=expected_variable_data,
                                            index=self._mock_variable_bins(),
                                            columns=expected_columns)
        self.assertTrue((df_expected_variable == df_variable).all().all())

        # Test unimplemented statistic name
        try:
            self.assertRaises(
                Exception,
                Accuracy.error_report(
                    state_puma,
                    'fake_dir',
                    marginal_variables=['num_people', 'num_vehicles', 'age'],
                    statistic='wrong-statistic-name'))
        except Exception:
            pass