Example #1
0
def test_check_numeric_inplace():
    X = data.load_10X()
    matrix.test_matrix_types(
        X,
        utils.assert_transform_unchanged,
        matrix._scipy_matrix_types
        + matrix._numpy_matrix_types
        + matrix._pandas_dense_matrix_types
        + [matrix.SparseDataFrame],
        transform=scprep.sanitize.check_numeric,
        copy=False,
    )
    if matrix._pandas_0:
        matrix._ignore_pandas_sparse_warning()
        utils.assert_raises_message(
            TypeError,
            "pd.SparseDataFrame does not support " "copy=False. Please use copy=True.",
            scprep.sanitize.check_numeric,
            data=matrix.SparseDataFrame_deprecated(X),
            copy=False,
        )
        matrix._reset_warnings()

    class TypeErrorClass(object):
        def astype(self, dtype):
            return

    X = TypeErrorClass()
    utils.assert_raises_message(
        TypeError,
        "astype() got an unexpected keyword argument 'copy'",
        scprep.sanitize.check_numeric,
        data=X,
        copy=None,
    )
Example #2
0
def test_toarray_vector():
    X = data.generate_positive_sparse_matrix(shape=(50,))

    def test_fun(X):
        assert isinstance(scprep.utils.toarray(X), np.ndarray)

    matrix.test_matrix_types(X, test_fun, matrix._pandas_vector_types)
Example #3
0
def test_combine_batches():
    X = data.load_10X()
    Y = pd.concat([X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))])
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1])
    assert utils.assert_matrix_class_equivalent(Y, Y2)
    utils.assert_all_equal(Y, Y2)
    assert np.all(Y.index == Y2.index)
    assert np.all(sample_labels == np.concatenate(
        [np.repeat(0, X.shape[0]),
         np.repeat(1, X.shape[0] // 2)]))
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1],
        append_to_cell_names=True)
    assert np.all(Y.index == np.array([i[:-2] for i in Y2.index]))
    assert np.all(
        np.core.defchararray.add("_", sample_labels.astype(str)) == np.array(
            [i[-2:] for i in Y2.index], dtype=str))
    transform = lambda X: scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1])[0]
    matrix.test_matrix_types(X,
                             utils.assert_transform_equals,
                             matrix._indexable_matrix_types,
                             Y=Y,
                             transform=transform,
                             check=utils.assert_all_equal)
Example #4
0
    def test_sparse_dataframe_fill_value(self):
        def test_fun(X):
            Y = scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))
            for col in Y.columns:
                assert X[col].dtype == Y[col].dtype, (X[col].dtype,
                                                      Y[col].dtype)
            Y = scprep.select.select_cols(X, idx=np.arange(X.shape[1] // 2))
            for col in Y.columns:
                assert X[col].dtype == Y[col].dtype, (X[col].dtype,
                                                      Y[col].dtype)

        matrix.test_matrix_types(self.X.astype(float), test_fun,
                                 matrix._pandas_sparse_matrix_types)
Example #5
0
def test_is_sparse_series():
    X = data.load_10X(sparse=True)
    assert scprep.utils.is_sparse_series(X[X.columns[0]])

    def test_fun(X):
        if scprep.utils.is_SparseDataFrame(X):
            x = X[X.columns[0]]
        else:
            x = scprep.select.select_cols(X, idx=0)
        assert not scprep.utils.is_sparse_series(x)

    types = (matrix._scipy_matrix_types + matrix._numpy_matrix_types +
             matrix._pandas_dense_matrix_types)
    if matrix._pandas_0:
        types.append(matrix.SparseDataFrame_deprecated)
    matrix.test_matrix_types(X.to_numpy(), test_fun, types)
Example #6
0
def test_is_sparse_dataframe():
    X = data.load_10X(sparse=False)
    Y = X.astype(pd.SparseDtype(float, fill_value=0.0))
    assert scprep.utils.is_sparse_dataframe(Y)

    def test_fun(X):
        assert not scprep.utils.is_sparse_dataframe(X)

    types = (matrix._scipy_matrix_types + matrix._numpy_matrix_types +
             matrix._pandas_dense_matrix_types)
    if matrix._pandas_0:
        types.append(matrix.SparseDataFrame_deprecated)
    matrix.test_matrix_types(
        X,
        test_fun,
        types,
    )
Example #7
0
def test_combine_batches():
    X = data.load_10X()
    Y = pd.concat(
        [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
        axis=0,
        sort=True,
    )
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1],
        append_to_cell_names=False,
    )
    assert utils.assert_matrix_class_equivalent(Y, Y2)
    utils.assert_all_equal(Y, Y2)
    assert np.all(Y.index == Y2.index)
    assert np.all(
        sample_labels
        == np.concatenate([np.repeat(0, X.shape[0]), np.repeat(1, X.shape[0] // 2)])
    )
    assert np.all(sample_labels.index == Y2.index)
    assert sample_labels.name == "sample_labels"
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1],
        append_to_cell_names=True,
    )
    assert np.all(Y.index == np.array([i[:-2] for i in Y2.index]))
    assert np.all(
        np.core.defchararray.add("_", sample_labels.astype(str))
        == np.array([i[-2:] for i in Y2.index], dtype=str)
    )
    assert np.all(sample_labels.index == Y2.index)
    assert sample_labels.name == "sample_labels"

    def transform(X):
        return scprep.utils.combine_batches(
            [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
            batch_labels=[0, 1],
        )[0]

    matrix.test_matrix_types(
        X,
        utils.assert_transform_equals,
        matrix._pandas_matrix_types,
        Y=Y,
        transform=transform,
        check=utils.assert_all_equal,
    )
    # don't sort for non pandas
    Y = pd.concat(
        [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
        axis=0,
        sort=False,
    )
    matrix.test_matrix_types(
        X,
        utils.assert_transform_equals,
        matrix._scipy_indexable_matrix_types + matrix._numpy_matrix_types,
        Y=Y,
        transform=transform,
        check=utils.assert_all_equal,
    )

    def test_fun(X):
        Y, sample_labels = scprep.utils.combine_batches(
            [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))],
            batch_labels=[0, 1],
        )
        assert np.all(sample_labels.index == Y.index)
        assert sample_labels.name == "sample_labels"

    matrix.test_pandas_matrix_types(X, test_fun)