def test_normalizer_l1(): rng = np.random.RandomState(0) X_dense = rng.randn(4, 5) X_sparse_unpruned = sparse.csr_matrix(X_dense) # set the row number 3 to zero X_dense[3, :] = 0.0 # set the row number 3 to zero without pruning (can happen in real life) indptr_3 = X_sparse_unpruned.indptr[3] indptr_4 = X_sparse_unpruned.indptr[4] X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0 # build the pruned variant using the regular constructor X_sparse_pruned = sparse.csr_matrix(X_dense) # check inputs that support the no-copy optim for X in (X_dense, X_sparse_pruned, X_sparse_unpruned): normalizer = Normalizer(norm='l1', copy=True) X_norm = normalizer.transform(X) assert_true(X_norm is not X) X_norm1 = toarray(X_norm) normalizer = Normalizer(norm='l1', copy=False) X_norm = normalizer.transform(X) assert_true(X_norm is X) X_norm2 = toarray(X_norm) for X_norm in (X_norm1, X_norm2): row_sums = np.abs(X_norm).sum(axis=1) for i in range(3): assert_almost_equal(row_sums[i], 1.0) assert_almost_equal(row_sums[3], 0.0) # check input for which copy=False won't prevent a copy for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix): X = init(X_dense) X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X) assert_true(X_norm is not X) assert_true(isinstance(X_norm, sparse.csr_matrix)) X_norm = toarray(X_norm) for i in range(3): assert_almost_equal(row_sums[i], 1.0) assert_almost_equal(la.norm(X_norm[3]), 0.0)
class NormalizerImpl(): def __init__(self, norm='l2', copy=True): self._hyperparams = {'norm': norm, 'copy': copy} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
class DFNormalizer(TransformerMixin): # Row wise transformer? - Can be removed if so def __init__(self, norm='l2', copy=True): self.norm = norm self.copy = copy self.ss_ = None def fit(self, X, y=None): return self def transform(self, X): # assumes X is a DataFrame self.ss_ = Normalizer() Xss = self.ss_.transform(X) Xscaled = pd.DataFrame(Xss, index=X.index, columns=X.columns) return Xscaled
class CreateNormalizer(CreateModel): def fit(self, data, args): self.model = Normalizer(norm="l2") with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval def test(self, data): assert self.model is not None return self.model.transform(data.X_test) def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval