def test_by_month(self): """ Tests the find_period_ranges function when the data is resampled by month. """ test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace', 'Pioneer Square', 'Interbay', 'Mann', 'Minor' ] transition_date = "2016-10-02" out_df = render_stats.pivot_by_treatment( DF_NEIGHBORHOODS, treatment_list=test_treatment_in, resample_by='M', agg_by='injuries') out = render_stats.find_period_ranges(out_df, transition_date=transition_date) #Test min date min_date = out_df.index.min() min_date = datetime.date(min_date.year, min_date.month, min_date.day) self.assertTrue(min_date.strftime('%Y-%m-%d') == out[0][0]) #Test transition date rounded_transition_date = "2016-10-31" self.assertTrue(rounded_transition_date == out[1][0]) #Test max date max_date = out_df.index.max() max_date = datetime.date(max_date.year, max_date.month, max_date.day) self.assertTrue(max_date.strftime('%Y-%m-%d') == out[1][1])
def test_treatment_control(self): """ Tests that the correct number of treatment and control values are returned from pivot_by_treatment given a list of treatment neighborhoods and a list of control neighborhoods. """ test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace' ] test_control_in = ['Pioneer Square', 'Interbay', 'Mann', 'Minor'] out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=test_treatment_in, control_list=test_control_in, resample_by='D') change_speed_limit_object_count = int(len(test_treatment_in)) same_speed_limit_object_count = int(len(test_control_in)) self.assertTrue( int(out.sum()['SpeedLimitChange']) == change_speed_limit_object_count) self.assertTrue( int(out.sum()['SpeedLimitSame']) == same_speed_limit_object_count)
def test_value_errors(self): """ Tests ValueError scenarios in pivot_by_treatment_value. These include: 1) trying to pivot on a dataframe with no neighborhood object_id 2) passing a bad path for the neighborhood json file 3) attempting to resample the data by year 4) trying to sum by a non-string column 5) trying to sum by a column which doesn't exist """ #Test dataframe without neighborhood with self.assertRaises(ValueError): render_stats.pivot_by_treatment(DF_NO_NEIGHBORHOODS, treatment_list=['Genesee']) #Test bad path with self.assertRaises(ValueError): render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=['Genesee'], neighborhood_path='bad_path') #Test invalid resample_by with self.assertRaises(ValueError): render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=['Genesee'], resample_by='Y') #Test agg_by not a string with self.assertRaises(ValueError): render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=['Genesee'], agg_by=12) #Test agg_by not a column with self.assertRaises(ValueError): render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=['Genesee'], agg_by='fake_column')
def test_agg_by(self): """ Tests the pivot_by_treatment functionality to resample the data by month, instead of day. """ test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace', 'Pioneer Square', 'Interbay', 'Mann', 'Minor' ] out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=test_treatment_in, resample_by='D', agg_by='injuries') injury_count = int(out.sum()['SpeedLimitSame']) self.assertTrue(injury_count == 21)
def test_resample_month(self): """ Tests the pivot_by_treatment functionality to resample the data by month, instead of day. """ test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace', 'Pioneer Square', 'Interbay', 'Mann', 'Minor' ] out = render_stats.pivot_by_treatment(DF_NEIGHBORHOODS, treatment_list=test_treatment_in, resample_by='M') self.assertTrue(out.index.min().month == 3) self.assertTrue(out.index.min().day == 31)
def test_transition_date_before_start(self): """ Tests that a ValueError is returned when transition_date before falls before any date in the data. """ test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace', 'Pioneer Square', 'Interbay', 'Mann', 'Minor' ] transition_date = "1900-01-01" out_df = render_stats.pivot_by_treatment( DF_NEIGHBORHOODS, treatment_list=test_treatment_in, resample_by='D', agg_by='injuries') with self.assertRaises(ValueError): render_stats.find_period_ranges(out_df, transition_date=transition_date)
def test_causal_impact_load(self): """ Performs a smoke test where we load causal impact. """ #Setup test_treatment_in = [ 'Atlantic', 'Pike-Market', 'Belltown', 'International District', 'Central Business District', 'First Hill', 'Yesler Terrace', 'Pioneer Square', 'Interbay', 'Mann', 'Minor' ] transition_date = "2016-12-22" out_df = render_stats.pivot_by_treatment( DF_NEIGHBORHOODS, treatment_list=test_treatment_in, resample_by='D', agg_by=None) out_date = render_stats.find_period_ranges( out_df, transition_date=transition_date) #Call causal impact package causal_impact_out = CausalImpact(out_df, out_date[0], out_date[1]) self.assertTrue(isinstance(causal_impact_out, CausalImpact))