def test_normalize_sparse(): X_sparse = tosparse(X) for norm in ["l1", "l2", "max"]: assert np.allclose( normalize_pure(X_sparse, norm=norm, axis=1).todense(), normalize(X, norm=norm, axis=1), )
def test_max_abs_scaler_sparse(): X_sparse = tosparse(X) tform = MaxAbsScaler() tform.fit(X) tform_ = convert_estimator(tform) X_t = tform.transform(X) X_t_ = tform_.transform(X_sparse) np.allclose(X_t, X_t_.todense())
def test_standard_scaler_sparse(): X_sparse = tosparse(X) for with_std in [True, False]: tform = StandardScaler(with_mean=False, with_std=with_std) tform.fit(X) tform_ = convert_estimator(tform) X_t = tform.transform(X) X_t_ = tform_.transform(X_sparse) np.allclose(X_t, X_t_.todense())
def test_normalizer_sparse(): X_sparse = tosparse(X) for norm in ["l1", "l2", "max"]: tform = Normalizer(norm=norm) tform.fit(X) tform_ = convert_estimator(tform) X_t = tform.transform(X) X_t_ = tform_.transform(X_sparse) np.allclose(X_t, X_t_.todense())
def test_feature_union_sparse(): X, y = load_iris(return_X_y=True) X_ = tosparse(X.tolist()) union = FeatureUnion([("ss", StandardScaler(with_mean=False)), ("mms", MaxAbsScaler())]) union.fit(X, y) union_ = convert_estimator(union) assert np.allclose(union.transform(X), union_.transform(X_).todense())
def test_decision_tree_clf(): X, y = load_iris(return_X_y=True) X_ = X.tolist() X_sparse = tosparse(X_) for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]: for max_depth in [5, 10, None]: clf = DecisionTreeClassifier(max_depth=max_depth, random_state=5) clf.fit(X, y_) clf_ = convert_estimator(clf) for method in METHODS: with warnings.catch_warnings(): warnings.simplefilter("ignore") scores = getattr(clf, method)(X) scores_ = getattr(clf_, method)(X_) scores_sparse = getattr(clf_, method)(X_sparse) assert np.allclose(scores, scores_, equal_nan=True) assert np.allclose(scores, scores_sparse, equal_nan=True)
def test_logistic(): X, y = load_iris(return_X_y=True) X_ = X.tolist() X_sparse = tosparse(X_) for y_ in [y, (y == 0).astype(int), (y == 2).astype(int)]: for multi_class in ["ovr", "multinomial"]: for fit_intercept in [True, False]: clf = LogisticRegression( solver=SOLVER, multi_class=multi_class, fit_intercept=fit_intercept, max_iter=MAX_ITER, ) clf.fit(X, y_) clf_ = convert_estimator(clf) for method in METHODS: scores = getattr(clf, method)(X) scores_ = getattr(clf_, method)(X_) scores_sparse = getattr(clf_, method)(X_sparse) assert np.allclose(scores, scores_) assert np.allclose(scores, scores_sparse, equal_nan=True)
def test_sfmax_sparse(): A_sparse = tosparse([A])[0] lst = sfmax(A) assert sfmax(A_sparse) == {0: lst[0], 1: lst[1]}
def test_matmult_sparse(): sparse_B = tosparse(B) X = matmult_same_dim(sparse_B, sparse_B) assert np.allclose(X.todense(), [[1, 4], [9, 16]])
def test_dot_2d_sparse(): A_sparse = tosparse([A]) assert dot_2d(A_sparse, B) == [[5, 11]]
def test_dot_sparse(): A_sparse = tosparse([A])[0] assert dot(A_sparse, B) == [5, 11]
def test_slice_array_sparse(): assert slice_column(tosparse(B), [0, 1]).todense() == B
def test_slice_sparse(): assert slice_column(tosparse(B), 0) == [1, 3]
def test_apply_axis_2d_sparse(): B_sparse = tosparse(B) assert apply_axis_2d(B_sparse, sum) == [3, 7]
def test_apply_2d_sparse(): B_sparse = tosparse(B) B_applied = apply_2d_sparse(B_sparse, lambda x: 2 * x).todense() assert B_applied == [[2, 4], [6, 8]]