def test_binarizer(): X_ = np.array([[1, 0, 5], [2, 3, -1]]) for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix): X = init(X_.copy()) binarizer = Binarizer(threshold=2.0, copy=True) X_bin = toarray(binarizer.transform(X)) assert_equal(np.sum(X_bin == 0), 4) assert_equal(np.sum(X_bin == 1), 2) X_bin = binarizer.transform(X) assert_equal(sparse.issparse(X), sparse.issparse(X_bin)) binarizer = Binarizer(copy=True).fit(X) X_bin = toarray(binarizer.transform(X)) assert_true(X_bin is not X) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(copy=True) X_bin = binarizer.transform(X) assert_true(X_bin is not X) X_bin = toarray(X_bin) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(copy=False) X_bin = binarizer.transform(X) if init is not list: assert_true(X_bin is X) X_bin = toarray(X_bin) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(threshold=-0.5, copy=True) for init in (np.array, list): X = init(X_.copy()) X_bin = toarray(binarizer.transform(X)) assert_equal(np.sum(X_bin == 0), 1) assert_equal(np.sum(X_bin == 1), 5) X_bin = binarizer.transform(X) # Cannot use threshold < 0 for sparse assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
def test_binarizer(): X_ = np.array([[1, 0, 5], [2, 3, -1]]) for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix): X = init(X_.copy()) binarizer = Binarizer(threshold=2.0, copy=True) X_bin = toarray(binarizer.transform(X)) assert_equal(np.sum(X_bin == 0), 4) assert_equal(np.sum(X_bin == 1), 2) X_bin = binarizer.transform(X) assert_equal(sparse.issparse(X), sparse.issparse(X_bin)) binarizer = Binarizer(copy=True).fit(X) X_bin = toarray(binarizer.transform(X)) assert_true(X_bin is not X) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(copy=True) X_bin = binarizer.transform(X) assert_true(X_bin is not X) X_bin = toarray(X_bin) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(copy=False) X_bin = binarizer.transform(X) if init is not list: assert_true(X_bin is X) X_bin = toarray(X_bin) assert_equal(np.sum(X_bin == 0), 2) assert_equal(np.sum(X_bin == 1), 4) binarizer = Binarizer(threshold=-0.5, copy=True) for init in (np.array, list): X = init(X_.copy()) X_bin = toarray(binarizer.transform(X)) assert_equal(np.sum(X_bin == 0), 1) assert_equal(np.sum(X_bin == 1), 5) X_bin = binarizer.transform(X) # Cannot use threshold < 0 for sparse assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
class CreateBinarizer(CreateModel): def fit(self, data, args): self.model = Binarizer() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval def test(self, data): assert self.model is not None return self.model.transform(data.X_test) def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval
data.loc[data['Region'] == i, 'expensive than average region'] = data.loc[data['Region'] == i, 'Price'] - \ data.loc[data['Region'] == i, 'Price'].mean() for i in range(1, 8): data.loc[data['Weekday'] == i, 'expensive than average weekday'] = data.loc[data['Weekday'] == i, 'Price'] - \ data.loc[data['Weekday'] == i, 'Price'].mean() for i in range(1, 366): data.loc[data['Date'] == i, 'expensive than average date'] = data.loc[data['Date'] == i, 'Price'] - \ data.loc[data['Date'] == i, 'Price'].mean() for i in range(2): data.loc[data['Apartment'] == i, 'expensive than average apartment'] = data.loc[data['Apartment'] == i, 'Price'] - \ data.loc[data['Apartment'] == i, 'Price'].mean() for i in range(1, 5): data.loc[data['Beds'] == i, 'expensive than average bed'] = data.loc[data['Beds'] == i, 'Price'] - \ data.loc[data['Beds'] == i, 'Price'].mean() threshold1 = Binarizer(threshold=3.0) res1 = pd.DataFrame(threshold1.transform(data['Review'].values.reshape(-1, 1))) threshold2 = Binarizer(threshold=80) res2 = pd.DataFrame(threshold2.transform(data['Price'].values.reshape(-1, 1))) pf = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False) res3 = pd.DataFrame( pf.fit_transform( data[['Apartment', 'Beds', 'Review', 'Pic Quality', 'Price']])) encoder = OneHotEncoder() data_region1hot = encoder.fit_transform(data['Region'].values.reshape(-1, 1)) data_region = pd.DataFrame(data_region1hot.toarray()) data_weekday1hot = encoder.fit_transform(data['Weekday'].values.reshape(-1, 1)) data_weekday = pd.DataFrame(data_weekday1hot.toarray()) data_reformed = pd.concat( [data.drop(columns=['ID']), data_region, data_weekday, res1, res2, res3],