def load_UCI_Credit_Card_data(infile=None, balanced=True, seed=5): X = [] y = [] sids = [] with open(infile, "r") as fi: fi.readline() reader = csv.reader(fi) for row in reader: sids.append(row[0]) X.append(row[1:-1]) y0 = int(row[-1]) if y0 == 0: y0 = -1 y.append(y0) y = np.array(y) if balanced: X, y = balance_X_y(X, y, seed) X = np.array(X, dtype=np.float32) y = np.array(y, dtype=np.float32) encoder = OneHotEncoder(categorical_features=[1, 2, 3]) encoder.fit(X) X = encoder.transform(X).toarray() X, y = shuffle_X_y(X, y, seed) scale_model = StandardScaler() X = scale_model.fit_transform(X) return X, np.expand_dims(y, axis=1)
def test_one_hot_encoder_sparse(): """Test OneHotEncoder's fit and transform.""" X = [[3, 2, 1], [0, 1, 1]] enc = OneHotEncoder() # discover max values automatically X_trans = enc.fit_transform(X).toarray() assert_equal(X_trans.shape, (2, 5)) assert_array_equal(enc.active_features_, np.where([1, 0, 0, 1, 0, 1, 1, 0, 1])[0]) assert_array_equal(enc.feature_indices_, [0, 4, 7, 9]) # check outcome assert_array_equal(X_trans, [[0., 1., 0., 1., 1.], [1., 0., 1., 0., 1.]]) # max value given as 3 enc = OneHotEncoder(n_values=4) X_trans = enc.fit_transform(X) assert_equal(X_trans.shape, (2, 4 * 3)) assert_array_equal(enc.feature_indices_, [0, 4, 8, 12]) # max value given per feature enc = OneHotEncoder(n_values=[3, 2, 2]) X = [[1, 0, 1], [0, 1, 1]] X_trans = enc.fit_transform(X) assert_equal(X_trans.shape, (2, 3 + 2 + 2)) assert_array_equal(enc.n_values_, [3, 2, 2]) # check that testing with larger feature works: X = np.array([[2, 0, 1], [0, 1, 1]]) enc.transform(X) # test that an error is raised when out of bounds: X_too_large = [[0, 2, 1], [0, 1, 1]] assert_raises(ValueError, enc.transform, X_too_large) assert_raises(ValueError, OneHotEncoder(n_values=2).fit_transform, X) # test that error is raised when wrong number of features assert_raises(ValueError, enc.transform, X[:, :-1]) # test that error is raised when wrong number of features in fit # with prespecified n_values assert_raises(ValueError, enc.fit, X[:, :-1]) # test exception on wrong init param assert_raises(TypeError, OneHotEncoder(n_values=np.int).fit, X) enc = OneHotEncoder() # test negative input to fit assert_raises(ValueError, enc.fit, [[0], [-1]]) # test negative input to transform enc.fit([[0], [1]]) assert_raises(ValueError, enc.transform, [[0], [-1]])
def train(self, X, Y, class_number=-1): class_count = max(np.unique(Y).size, class_number) feature_count = X.shape[1] self.__hpelm = ELM(feature_count, class_count, 'wc') self.__hpelm.add_neurons(feature_count, "sigm") Y_arr = Y.reshape(-1, 1) enc = OneHotEncoder() enc.fit(Y_arr) Y_OHE = enc.transform(Y_arr).toarray() out_fd = sys.stdout sys.stdout = open(os.devnull, 'w') self.__hpelm.train(X, Y_OHE) sys.stdout = out_fd
def test_one_hot_encoder_unknown_transform(): X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]]) y = np.array([[4, 1, 1]]) # Test that one hot encoder raises error for unknown features # present during transform. oh = OneHotEncoder(handle_unknown='error') oh.fit(X) assert_raises(ValueError, oh.transform, y) # Test the ignore option, ignores unknown features. oh = OneHotEncoder(handle_unknown='ignore') oh.fit(X) assert_array_equal( oh.transform(y).toarray(), np.array([[0., 0., 0., 0., 1., 0., 0.]])) # Raise error if handle_unknown is neither ignore or error. oh = OneHotEncoder(handle_unknown='42') oh.fit(X) assert_raises(ValueError, oh.transform, y)
def test_one_hot_encoder_unknown_transform(): X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]]) y = np.array([[4, 1, 1]]) # Test that one hot encoder raises error for unknown features # present during transform. oh = OneHotEncoder(handle_unknown='error') oh.fit(X) assert_raises(ValueError, oh.transform, y) # Test the ignore option, ignores unknown features. oh = OneHotEncoder(handle_unknown='ignore') oh.fit(X) assert_array_equal( oh.transform(y).toarray(), np.array([[ 0., 0., 0., 0., 1., 0., 0.]]) ) # Raise error if handle_unknown is neither ignore or error. oh = OneHotEncoder(handle_unknown='42') oh.fit(X) assert_raises(ValueError, oh.transform, y)