Beispiel #1
0
    def test_by_month(self):
        """
        Tests the find_period_ranges function when the data is resampled by month.
        """

        test_treatment_in = [
            'Atlantic', 'Pike-Market', 'Belltown', 'International District',
            'Central Business District', 'First Hill', 'Yesler Terrace',
            'Pioneer Square', 'Interbay', 'Mann', 'Minor'
        ]
        transition_date = "2016-10-02"
        out_df = render_stats.pivot_by_treatment(
            DF_NEIGHBORHOODS,
            treatment_list=test_treatment_in,
            resample_by='M',
            agg_by='injuries')
        out = render_stats.find_period_ranges(out_df,
                                              transition_date=transition_date)

        #Test min date
        min_date = out_df.index.min()
        min_date = datetime.date(min_date.year, min_date.month, min_date.day)
        self.assertTrue(min_date.strftime('%Y-%m-%d') == out[0][0])

        #Test transition date
        rounded_transition_date = "2016-10-31"
        self.assertTrue(rounded_transition_date == out[1][0])

        #Test max date
        max_date = out_df.index.max()
        max_date = datetime.date(max_date.year, max_date.month, max_date.day)
        self.assertTrue(max_date.strftime('%Y-%m-%d') == out[1][1])
Beispiel #2
0
    def test_treatment_control(self):
        """
        Tests that the correct number of treatment and control values are returned from
            pivot_by_treatment given a list of treatment neighborhoods and a list of
            control neighborhoods.
        """

        test_treatment_in = [
            'Atlantic', 'Pike-Market', 'Belltown', 'International District',
            'Central Business District', 'First Hill', 'Yesler Terrace'
        ]
        test_control_in = ['Pioneer Square', 'Interbay', 'Mann', 'Minor']

        out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                              treatment_list=test_treatment_in,
                                              control_list=test_control_in,
                                              resample_by='D')

        change_speed_limit_object_count = int(len(test_treatment_in))
        same_speed_limit_object_count = int(len(test_control_in))

        self.assertTrue(
            int(out.sum()['SpeedLimitChange']) ==
            change_speed_limit_object_count)
        self.assertTrue(
            int(out.sum()['SpeedLimitSame']) == same_speed_limit_object_count)
Beispiel #3
0
    def test_value_errors(self):
        """
        Tests ValueError scenarios in pivot_by_treatment_value.

        These include:
            1) trying to pivot on a dataframe with no neighborhood object_id
            2) passing a bad path for the neighborhood json file
            3) attempting to resample the data by year
            4) trying to sum by a non-string column
            5) trying to sum by a column which doesn't exist
        """
        #Test dataframe without neighborhood
        with self.assertRaises(ValueError):
            render_stats.pivot_by_treatment(DF_NO_NEIGHBORHOODS,
                                            treatment_list=['Genesee'])

        #Test bad path
        with self.assertRaises(ValueError):
            render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                            treatment_list=['Genesee'],
                                            neighborhood_path='bad_path')

        #Test invalid resample_by
        with self.assertRaises(ValueError):
            render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                            treatment_list=['Genesee'],
                                            resample_by='Y')

        #Test agg_by not a string
        with self.assertRaises(ValueError):
            render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                            treatment_list=['Genesee'],
                                            agg_by=12)

        #Test agg_by not a column
        with self.assertRaises(ValueError):
            render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                            treatment_list=['Genesee'],
                                            agg_by='fake_column')
Beispiel #4
0
    def test_agg_by(self):
        """
        Tests the pivot_by_treatment functionality to resample the data by month, instead of day.
        """
        test_treatment_in = [
            'Atlantic', 'Pike-Market', 'Belltown', 'International District',
            'Central Business District', 'First Hill', 'Yesler Terrace',
            'Pioneer Square', 'Interbay', 'Mann', 'Minor'
        ]

        out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                              treatment_list=test_treatment_in,
                                              resample_by='D',
                                              agg_by='injuries')

        injury_count = int(out.sum()['SpeedLimitSame'])
        self.assertTrue(injury_count == 21)
Beispiel #5
0
    def test_resample_month(self):
        """
        Tests the pivot_by_treatment functionality to resample the data by month, instead of day.
        """

        test_treatment_in = [
            'Atlantic', 'Pike-Market', 'Belltown', 'International District',
            'Central Business District', 'First Hill', 'Yesler Terrace',
            'Pioneer Square', 'Interbay', 'Mann', 'Minor'
        ]

        out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS,
                                              treatment_list=test_treatment_in,
                                              resample_by='M')

        self.assertTrue(out.index.min().month == 3)
        self.assertTrue(out.index.min().day == 31)
Beispiel #6
0
 def test_transition_date_before_start(self):
     """
     Tests that a ValueError is returned when transition_date before falls before any date in the
         data.
     """
     test_treatment_in = [
         'Atlantic', 'Pike-Market', 'Belltown', 'International District',
         'Central Business District', 'First Hill', 'Yesler Terrace',
         'Pioneer Square', 'Interbay', 'Mann', 'Minor'
     ]
     transition_date = "1900-01-01"
     out_df = render_stats.pivot_by_treatment(
         DF_NEIGHBORHOODS,
         treatment_list=test_treatment_in,
         resample_by='D',
         agg_by='injuries')
     with self.assertRaises(ValueError):
         render_stats.find_period_ranges(out_df,
                                         transition_date=transition_date)
Beispiel #7
0
    def test_causal_impact_load(self):
        """
        Performs a smoke test where we load causal impact.
        """
        #Setup
        test_treatment_in = [
            'Atlantic', 'Pike-Market', 'Belltown', 'International District',
            'Central Business District', 'First Hill', 'Yesler Terrace',
            'Pioneer Square', 'Interbay', 'Mann', 'Minor'
        ]
        transition_date = "2016-12-22"
        out_df = render_stats.pivot_by_treatment(
            DF_NEIGHBORHOODS,
            treatment_list=test_treatment_in,
            resample_by='D',
            agg_by=None)
        out_date = render_stats.find_period_ranges(
            out_df, transition_date=transition_date)

        #Call causal impact package
        causal_impact_out = CausalImpact(out_df, out_date[0], out_date[1])

        self.assertTrue(isinstance(causal_impact_out, CausalImpact))