def test_subset(self):
        '''
        Subset use case - identify self.feature to keep
        '''
        subset = ['dayofweek', 'minofday']
        dfnew = clean_df_subset(self.df, subset)
        data = BorderData(dfnew)

        # Test self.feature
        self.assertEqual(len(subset), len(data.X.columns.values))
        for f in subset:
            self.assertIn(f, data.X.columns.values)
        # Test y self.label
        for tries in range(20):
            i = randint(0, len(self.df) - 1)
            self.assertEqual(self.df[self.label].values[i], data.y.values[i])
Esempio n. 2
0
    def test_subset(self):
        '''
        Subset use case - identify self.feature to keep
        '''
        subset = ['dayofweek', 'minofday']
        dfnew = clean_df_subset(self.df, subset)
        data = BorderData(dfnew)

        # Test self.feature
        self.assertEqual(len(subset), len(data.X.columns.values))
        for f in subset:
            self.assertIn(f, data.X.columns.values)
        # Test y self.label
        for tries in range(20):
            i = randint(0, len(self.df) - 1)
            self.assertEqual(self.df[self.label].values[i], data.y.values[i])
    def test_subset_new_label(self):
        '''
        Subset use case - identify self.feature to keep
        '''
        dfnew = self.df.copy()
        dfnew['wt2'] = self.df.waittime * 2
        subset = ['dayofweek', 'minofday']
        dfnew = clean_df_subset(dfnew, subset, label='wt2')
        data = BorderData(dfnew, label='wt2')

        # Test self.feature
        self.assertEqual(len(subset), len(data.X.columns.values))
        for f in subset:
            self.assertIn(f, data.X.columns.values)
        # Test y self.label
        for tries in range(20):
            i = randint(0, len(self.df) - 1)
            self.assertEqual(dfnew['wt2'].values[i], data.y.values[i])
Esempio n. 4
0
    def test_subset_new_label(self):
        '''
        Subset use case - identify self.feature to keep
        '''
        dfnew = self.df.copy()
        dfnew['wt2'] = self.df.waittime * 2
        subset = ['dayofweek', 'minofday']
        dfnew = clean_df_subset(dfnew, subset, label='wt2')
        data = BorderData(dfnew, label='wt2')

        # Test self.feature
        self.assertEqual(len(subset), len(data.X.columns.values))
        for f in subset:
            self.assertIn(f, data.X.columns.values)
        # Test y self.label
        for tries in range(20):
            i = randint(0, len(self.df) - 1)
            self.assertEqual(dfnew['wt2'].values[i], data.y.values[i])