Exemple #1
0
    def test_store_key_used(self):
        summary_link = DailySummary(read_key='test_input',
                                    store_key='test_output',
                                    feature_cols=['a'],
                                    datetime_col='dt')

        summary_link.initialize()
        summary_link.execute()

        ds = process_manager.service(DataStore)
        self.assertIn('test_output', ds)
Exemple #2
0
    def test_partitionby_cols_kept(self):
        summary_link = DailySummary(read_key='test_input',
                                    store_key='test_output',
                                    feature_cols=['a'],
                                    datetime_col='dt',
                                    partitionby_cols=['b'])

        summary_link.initialize()
        summary_link.execute()

        ds = process_manager.service(DataStore)
        self.assertIn('b', ds['test_output'].columns)
Exemple #3
0
    def test_function_execution(self):
        summary_link = DailySummary(read_key='test_input',
                                    store_key='test_output',
                                    feature_cols={'a': [f.sum, f.count]},
                                    datetime_col='dt')

        summary_link.initialize()
        summary_link.execute()

        ds = process_manager.service(DataStore)
        pdf = ds['test_output'].toPandas()

        self.assertEqual(list(pdf['a_sum_0d']), [3, 5])
        self.assertEqual(list(pdf['a_count_0d']), [2, 1])
Exemple #4
0
    def test_specific_summary_column_dict(self):
        summary_link = DailySummary(read_key='test_input',
                                    store_key='test_output',
                                    feature_cols={'a': [f.sum]},
                                    datetime_col='dt')

        summary_link.initialize()
        summary_link.execute()

        ds = process_manager.service(DataStore)
        self.assertNotIn('a_min_0d', ds['test_output'].columns)
        self.assertNotIn('a_mean_0d', ds['test_output'].columns)
        self.assertNotIn('a_max_0d', ds['test_output'].columns)
        self.assertNotIn('a_stddev_0d', ds['test_output'].columns)
        self.assertNotIn('a_count_0d', ds['test_output'].columns)
        self.assertIn('a_sum_0d', ds['test_output'].columns)
Exemple #5
0
    def test_partitionby_partitions(self):
        summary_link = DailySummary(read_key='test_input',
                                    store_key='test_output',
                                    feature_cols=['a'],
                                    datetime_col='dt',
                                    partitionby_cols=['b'])

        summary_link.initialize()
        summary_link.execute()

        ds = process_manager.service(DataStore)
        pdf = ds['test_output'].toPandas()

        self.assertIn(1, pdf['b'])
        self.assertIn(2, pdf['b'])
        self.assertEqual(len(pdf[pdf['b'] == 1]), 2)
        self.assertEqual(len(pdf[pdf['b'] == 2]), 1)