def test_from_csv(self): df = read.from_csv('../../../data/adult.csv', ['#NAME?']) # 1. check the data shape self.assertEqual(df.shape, (5000, 15), 'assert the data shape') # 2. check the null value self.assertTrue(np.isnan(df.iloc[16]['age']), 'assert the null value')
def test_split_train_val_test(self): df = read.from_csv('../../../data/adult.csv', ['#NAME?']) train, val, test = split.split_train_val_test(df, val_percentage=0.2, test_percentage=0.2) self.assertEqual(train.shape, (3000, 15)) self.assertEqual(val.shape, (1000, 15)) self.assertEqual(test.shape, (1000, 15)) X, y = split.separate_x_y(df, 'income') X_train, y_train, X_val, y_val, X_test, y_test = split.split_train_val_test( X, y, val_percentage=0.1, test_percentage=0.1) self.assertEqual(X_train.shape, (4000, 14)) self.assertEqual(y_train.shape, (4000, )) self.assertEqual(X_val.shape, (500, 14)) self.assertEqual(y_val.shape, (500, )) self.assertEqual(X_test.shape, (500, 14)) self.assertEqual(y_test.shape, (500, ))
def test_to_npz(self): data_name = 'mnist' data_path = '../../../data/' + data_name + '.csv' npz_path = '../../../data/' + data_name + '.npz' df = read.from_csv(data_path) # df = read.from_csv(data_path, '#NAME?') X, y = split.separate_x_y(df, 'label') # X, y = split.separate_x_y(df, 'income') X_train, y_train, X_val, y_val, X_test, y_test = split.split_train_val_test( X, y, val_percentage=0.1, test_percentage=0.1) save.train_val_test_to_npz(npz_path, X_train, y_train, X_val, y_val, X_test, y_test) X_train_2, y_train_2, X_val_2, y_val_2, X_test_2, y_test_2 = read.from_npz_train_val_test_x_y( npz_path) np.testing.assert_array_equal(X_train, X_train_2)
def test_split_data(self): df = read.from_csv('../../../data/adult.csv', ['#NAME?']) train, val = split.split_data(df, percentage=0.2) self.assertEqual(train.shape, (4000, 15)) self.assertEqual(val.shape, (1000, 15))
def test_separate_x_y(self): df = read.from_csv('../../../data/adult.csv', ['#NAME?']) X, y = split.separate_x_y(df, 'income') self.assertEqual(X.shape, (5000, 14), 'assert x shape') self.assertEqual(y.shape, (5000, ), 'assert y shape')