Example #1
0
    def test_from_csv(self):
        df = read.from_csv('../../../data/adult.csv', ['#NAME?'])

        # 1. check the data shape
        self.assertEqual(df.shape, (5000, 15), 'assert the data shape')

        # 2. check the null value
        self.assertTrue(np.isnan(df.iloc[16]['age']), 'assert the null value')
Example #2
0
    def test_split_train_val_test(self):
        df = read.from_csv('../../../data/adult.csv', ['#NAME?'])

        train, val, test = split.split_train_val_test(df,
                                                      val_percentage=0.2,
                                                      test_percentage=0.2)
        self.assertEqual(train.shape, (3000, 15))
        self.assertEqual(val.shape, (1000, 15))
        self.assertEqual(test.shape, (1000, 15))

        X, y = split.separate_x_y(df, 'income')
        X_train, y_train, X_val, y_val, X_test, y_test = split.split_train_val_test(
            X, y, val_percentage=0.1, test_percentage=0.1)
        self.assertEqual(X_train.shape, (4000, 14))
        self.assertEqual(y_train.shape, (4000, ))
        self.assertEqual(X_val.shape, (500, 14))
        self.assertEqual(y_val.shape, (500, ))
        self.assertEqual(X_test.shape, (500, 14))
        self.assertEqual(y_test.shape, (500, ))
Example #3
0
    def test_to_npz(self):
        data_name = 'mnist'
        data_path = '../../../data/' + data_name + '.csv'
        npz_path = '../../../data/' + data_name + '.npz'

        df = read.from_csv(data_path)
        # df = read.from_csv(data_path, '#NAME?')

        X, y = split.separate_x_y(df, 'label')
        # X, y = split.separate_x_y(df, 'income')
        X_train, y_train, X_val, y_val, X_test, y_test = split.split_train_val_test(
            X, y, val_percentage=0.1, test_percentage=0.1)

        save.train_val_test_to_npz(npz_path, X_train, y_train, X_val, y_val,
                                   X_test, y_test)
        X_train_2, y_train_2, X_val_2, y_val_2, X_test_2, y_test_2 = read.from_npz_train_val_test_x_y(
            npz_path)

        np.testing.assert_array_equal(X_train, X_train_2)
Example #4
0
    def test_split_data(self):
        df = read.from_csv('../../../data/adult.csv', ['#NAME?'])

        train, val = split.split_data(df, percentage=0.2)
        self.assertEqual(train.shape, (4000, 15))
        self.assertEqual(val.shape, (1000, 15))
Example #5
0
 def test_separate_x_y(self):
     df = read.from_csv('../../../data/adult.csv', ['#NAME?'])
     X, y = split.separate_x_y(df, 'income')
     self.assertEqual(X.shape, (5000, 14), 'assert x shape')
     self.assertEqual(y.shape, (5000, ), 'assert y shape')