def test_store_key_used(self): summary_link = DailySummary(read_key='test_input', store_key='test_output', feature_cols=['a'], datetime_col='dt') summary_link.initialize() summary_link.execute() ds = process_manager.service(DataStore) self.assertIn('test_output', ds)
def test_partitionby_cols_kept(self): summary_link = DailySummary(read_key='test_input', store_key='test_output', feature_cols=['a'], datetime_col='dt', partitionby_cols=['b']) summary_link.initialize() summary_link.execute() ds = process_manager.service(DataStore) self.assertIn('b', ds['test_output'].columns)
def test_function_execution(self): summary_link = DailySummary(read_key='test_input', store_key='test_output', feature_cols={'a': [f.sum, f.count]}, datetime_col='dt') summary_link.initialize() summary_link.execute() ds = process_manager.service(DataStore) pdf = ds['test_output'].toPandas() self.assertEqual(list(pdf['a_sum_0d']), [3, 5]) self.assertEqual(list(pdf['a_count_0d']), [2, 1])
def test_specific_summary_column_dict(self): summary_link = DailySummary(read_key='test_input', store_key='test_output', feature_cols={'a': [f.sum]}, datetime_col='dt') summary_link.initialize() summary_link.execute() ds = process_manager.service(DataStore) self.assertNotIn('a_min_0d', ds['test_output'].columns) self.assertNotIn('a_mean_0d', ds['test_output'].columns) self.assertNotIn('a_max_0d', ds['test_output'].columns) self.assertNotIn('a_stddev_0d', ds['test_output'].columns) self.assertNotIn('a_count_0d', ds['test_output'].columns) self.assertIn('a_sum_0d', ds['test_output'].columns)
def test_partitionby_partitions(self): summary_link = DailySummary(read_key='test_input', store_key='test_output', feature_cols=['a'], datetime_col='dt', partitionby_cols=['b']) summary_link.initialize() summary_link.execute() ds = process_manager.service(DataStore) pdf = ds['test_output'].toPandas() self.assertIn(1, pdf['b']) self.assertIn(2, pdf['b']) self.assertEqual(len(pdf[pdf['b'] == 1]), 2) self.assertEqual(len(pdf[pdf['b'] == 2]), 1)