Exemple #1
0
def test_multicol_getitem():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))
    # 'a' and 'b' label columns
    adata.smp[['a', 'b']] = [[0, 1], [2, 3]]
    assert 0 in adata.smp['a']
    assert adata.smp['b'].tolist() == [1, 3]
    assert adata.smp[['a', 'b']].tolist() == [[0, 1], [2, 3]]
Exemple #2
0
def test_profile_memory():
    import gc
    dim = 10  # increase this when profiling
    print()
    logg.print_memory_usage('start profiling')
    X = np.random.rand(dim, dim).astype('float32')
    logg.print_memory_usage('allocated X')
    var_filter = np.array([0, 1])
    X = X[:, var_filter]
    logg.print_memory_usage('sliced X')
    X = np.random.rand(dim, dim).astype('float32')
    logg.print_memory_usage('allocated X')
    adata = AnnData(X)
    logg.print_memory_usage('init adata with reference to X')
    adata.var['multi'] = np.random.rand(dim, 3)
    logg.print_memory_usage('added some annotation')
    # ------------------------------------------------
    # compare adata.__getitem__ with adata.filter_var
    # ------------------------------------------------
    # here, it doesn't make a difference in other scenarios
    # (e.g. sc.preprocess.weinreb16), filter_var seems to invoke earlier garbage
    # collection than slicing
    # adata.filter_var(var_filter)  # inplace
    adata = adata[:, var_filter]  # with copy
    logg.print_memory_usage('sliced adata')
    gc.collect()
    logg.print_memory_usage('after calling gc.collect()')
    return adata
Exemple #3
0
def test_multicol_single_key_setitem():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))
    # 'c' keeps the columns as should be
    adata.smp['c'] = np.array([[0, 1], [2, 3]])
    assert adata.smp.dtype.names == (SMP_INDEX, 'c001of002', 'c002of002')
    assert adata.smp.keys() == ['c']
    assert adata.smp['c'].tolist() == [[0, 1], [2, 3]]
Exemple #4
0
def test_structdict_index():
    adata = AnnData(np.array([[1, 2], [3, 4], [5, 6]]))

    assert adata.var.index.tolist() == ['0', '1']
    adata.var.index = ['1', '2']
    assert adata.var.index.tolist() == ['1', '2']
    adata.var_names = ['3', '4']
    assert adata.var.index.tolist() == ['3', '4']
Exemple #5
0
def test_append_add_col():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))

    adata.smp['new'] = [1, 2]
    adata.smp[['new2', 'new3']] = [['A', 'B'], ['c', 'd']]

    from pytest import raises
    with raises(ValueError):
        adata.smp['new4'] = 'far too long'.split()
Exemple #6
0
def test_names():
    adata = AnnData(np.array([[1, 2, 3], [4, 5,
                                          6]]), dict(smp_names=['A', 'B']),
                    dict(var_names=['a', 'b', 'c']))

    assert adata.smp_names.tolist() == 'A B'.split()
    assert adata.var_names.tolist() == 'a b c'.split()

    adata = AnnData(np.array([[1, 2], [3, 4], [5, 6]]),
                    var={'var_names': ['a', 'b']})
    assert adata.var_names.tolist() == ['a', 'b']
Exemple #7
0
def test_creation():
    AnnData(np.array([[1, 2], [3, 4]]))
    AnnData(ma.array([[1, 2], [3, 4]]), add={'mask': [0, 1, 1, 0]})
    AnnData(sp.eye(2))
    AnnData(np.array([[1, 2, 3], [4, 5, 6]]), dict(Smp=['A', 'B']),
            dict(Feat=['a', 'b', 'c']))

    assert AnnData(np.array([1, 2])).X.shape == (2, )

    from pytest import raises
    raises(ValueError, AnnData, np.array([[1, 2], [3, 4]]),
           dict(TooLong=[1, 2, 3, 4]))
Exemple #8
0
def test_structdict_keys():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))
    adata.smp['foo'] = np.array([[0, 1], [2, 3]])
    assert adata.smp_keys() == ['foo']
    assert adata.smp.keys() == ['foo']
    assert adata.smp.dtype.names == (SMP_INDEX, 'foo001of002', 'foo002of002')

    adata.smp['d'] = np.array([[0, 1], [2, 3]])
    assert adata.smp.keys() == ['foo', 'd']
    assert 'd' in adata.smp
    from pytest import raises
    with raises(KeyError):
        adata.smp['e']
Exemple #9
0
def test_indices_dtypes():
    adata = AnnData(np.array([[1, 2, 3], [4, 5,
                                          6]]), dict(smp_names=['A', 'B']),
                    dict(var_names=['a', 'b', 'c']))

    # this assignment is nice
    adata.smp_names = ['d', 'b']
    from pytest import raises
    with raises(ValueError):
        # this is not possible currently as we store
        # datatypes of fixed length
        adata.smp_names = ['hello', 'b']
        # unicode not allowed for annotation
        adata.smp_names = ['ö', 'a']
Exemple #10
0
def test_set_add():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))

    adata.smp = {SMP_INDEX: ['1', '2']}
    assert isinstance(adata.smp, BoundStructArray)
    assert len(adata.smp.dtype) == 1

    adata.smp = dict(a=[3, 4])
    assert isinstance(adata.smp, BoundStructArray)
    assert len(adata.smp.dtype) == 2
    assert adata.smp_names.tolist() == ['1', '2']  # still the same smp_names

    from pytest import raises
    with raises(ValueError):
        adata.smp = dict(a=[1, 2, 3])
Exemple #11
0
def test_compute_distribution():
    # set seed
    seed(1234)
    # create test object
    adata = AnnData(
        np.multiply(binomial(1, 0.15, (100, 20)),
                    negative_binomial(2, 0.25, (100, 20))))
    # adapt marker_genes for cluster (so as to have some form of reasonable input
    adata.X[0:10, 0:5] = np.multiply(binomial(1, 0.9, (10, 5)),
                                     negative_binomial(1, 0.5, (10, 5)))

    # Create cluster according to groups

    smp = 'true_groups'
    true_groups = np.zeros((2, 100), dtype=bool)
    true_groups[0, 0:10] = 1
    true_groups[1, 10:100] = 1
    adata.add[smp + '_masks'] = true_groups
    adata.add[smp + '_order'] = np.asarray(['0', '1'])
    # Now run the rank_genes_groups, test functioning.
    # Note: Default value is on copying = true.
    with open('objs_t_test.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
        true_scores_t_test, true_names_t_test = pickle.load(f)
    with open('objs_wilcoxon.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
        true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f)
    rank_genes_groups(adata,
                      'true_groups',
                      n_genes=20,
                      compute_distribution=True,
                      test_type='t_test')
    assert np.array_equal(true_scores_t_test,
                          adata.add['rank_genes_groups_gene_scores'])
    assert np.array_equal(true_names_t_test,
                          adata.add['rank_genes_groups_gene_names'])

    rank_genes_groups(adata,
                      'true_groups',
                      n_genes=20,
                      compute_distribution=True,
                      test_type='wilcoxon')
    assert np.array_equal(true_scores_wilcoxon,
                          adata.add['rank_genes_groups_gene_scores'])
    assert np.array_equal(true_names_wilcoxon,
                          adata.add['rank_genes_groups_gene_names'])
Exemple #12
0
def test_results_sparse():
    # set seed
    seed(1234)
    # The following construction is inefficient, but makes sure that the same data is used in the sparse case
    adata = AnnData(
        np.multiply(binomial(1, 0.15, (100, 20)),
                    negative_binomial(2, 0.25, (100, 20))))
    # adapt marker_genes for cluster (so as to have some form of reasonable input
    adata.X[0:10, 0:5] = np.multiply(binomial(1, 0.9, (10, 5)),
                                     negative_binomial(1, 0.5, (10, 5)))

    adata_sparse = AnnData(sp.csr_matrix(adata.X))

    # Create cluster according to groups

    smp = 'true_groups'
    true_groups = np.zeros((2, 100), dtype=bool)
    true_groups[0, 0:10] = 1
    true_groups[1, 10:100] = 1
    adata_sparse.add[smp + '_masks'] = true_groups
    adata_sparse.add[smp + '_order'] = np.asarray(['0', '1'])

    # Here, we have saved the true results

    # Now run the rank_genes_groups, test functioning.
    # Note: Default value is on copying = true.
    with open('objs_t_test.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
        true_scores_t_test, true_names_t_test = pickle.load(f)
    with open('objs_wilcoxon.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
        true_scores_wilcoxon, true_names_wilcoxon = pickle.load(f)
    rank_genes_groups(adata_sparse,
                      'true_groups',
                      n_genes=20,
                      test_type='t_test')
    # Here, we allow a minor error tolerance due to different multiplication for sparse/non-spars objects
    ERROR_TOLERANCE = 5e-7
    max_error = 0
    for i, k in enumerate(adata_sparse.add['rank_genes_groups_gene_scores']):
        max_error = max(
            max_error,
            abs(adata_sparse.add['rank_genes_groups_gene_scores'][i][0] -
                true_scores_t_test[i][0]))
        max_error = max(
            max_error,
            abs(adata_sparse.add['rank_genes_groups_gene_scores'][i][1] -
                true_scores_t_test[i][1]))
    # assert np.array_equal(true_scores_t_test,adata_sparse.add['rank_genes_groups_gene_scores'])
    assert max_error < ERROR_TOLERANCE
    rank_genes_groups(adata_sparse,
                      'true_groups',
                      n_genes=20,
                      test_type='wilcoxon')
    assert np.array_equal(true_scores_wilcoxon,
                          adata_sparse.add['rank_genes_groups_gene_scores'])
    assert np.array_equal(true_names_wilcoxon,
                          adata_sparse.add['rank_genes_groups_gene_names'])
Exemple #13
0
def test_slicing():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))

    assert np.all(adata[:, 0].X == adata.X[:, 0])

    assert adata[0, 0].X.tolist() == 1
    assert adata[0, :].X.tolist() == [1, 2, 3]
    assert adata[:, 0].X.tolist() == [1, 4]

    assert adata[:, [0, 1]].X.tolist() == [[1, 2], [4, 5]]
    assert adata[:, np.array([0, 2])].X.tolist() == [[1, 3], [4, 6]]
    assert adata[:, np.array([False, True, True])].X.tolist() == [[2, 3],
                                                                  [5, 6]]
    assert adata[:, 1:3].X.tolist() == [[2, 3], [5, 6]]
Exemple #14
0
def test_transpose():
    adata = AnnData(np.array([[1, 2, 3], [4, 5,
                                          6]]), dict(smp_names=['A', 'B']),
                    dict(var_names=['a', 'b', 'c']))

    adata1 = adata.T

    # make sure to not modify the original!
    assert adata.smp_names.tolist() == ['A', 'B']
    assert adata.var_names.tolist() == ['a', 'b', 'c']

    assert SMP_INDEX in adata1.smp.dtype.names
    assert adata1.smp_names.tolist() == ['a', 'b', 'c']
    assert adata1.var_names.tolist() == ['A', 'B']
    assert adata1.X.shape == adata.X.T.shape

    adata2 = adata.transpose()
    assert np.array_equal(adata1.X, adata2.X)
    assert np.array_equal(adata1.smp, adata2.smp)
    assert np.array_equal(adata1.var, adata2.var)

    assert adata1.smp._is_attr_of[1] == 'smp' == adata2.smp._is_attr_of[1]
    assert adata1.var._is_attr_of[1] == 'var' == adata2.var._is_attr_of[1]
Exemple #15
0
def test_slicing_strings():
    adata = AnnData(np.array([[1, 2, 3], [4, 5,
                                          6]]), dict(smp_names=['A', 'B']),
                    dict(var_names=['a', 'b', 'c']))

    assert adata['A', 'a'].X.tolist() == 1
    assert adata['A', :].X.tolist() == [1, 2, 3]
    assert adata[:, 'a'].X.tolist() == [1, 4]
    assert adata[:, ['a', 'b']].X.tolist() == [[1, 2], [4, 5]]
    assert adata[:, np.array(['a', 'c'])].X.tolist() == [[1, 3], [4, 6]]
    assert adata[:, 'b':'c'].X.tolist() == [[2, 3], [5, 6]]

    from pytest import raises
    with raises(IndexError):
        _ = adata[:, 'X']
    with raises(IndexError):
        _ = adata['X', :]
    with raises(IndexError):
        _ = adata['A':'X', :]
    with raises(IndexError):
        _ = adata[:, 'a':'X']
Exemple #16
0
def test_creation_from_vector():
    adata = AnnData(np.array([1, 2, 3]))
    adata = AnnData(np.array([[1], [2], [3]]))
Exemple #17
0
def test_print():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]), dict(foo=['A', 'B']),
                    dict(bar=['a', 'b', 'c']))
    print(adata)
    print('>>> print(adata.smp)')
    print(adata.smp)
Exemple #18
0
def test_ddata():
    ddata = dict(X=np.array([[1, 2, 3], [4, 5, 6]]),
                 row_names=['A', 'B'],
                 col_names=['a', 'b', 'c'])
    AnnData(ddata)
Exemple #19
0
def test_n_smps():
    adata = AnnData(np.array([[1, 2], [3, 4], [5, 6]]))
    assert adata.n_smps == 3
    adata1 = adata[:2, ]
    assert adata1.n_smps == 2
Exemple #20
0
def test_struct_dict_copy():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]))
    scp = adata.smp.copy()

    assert adata.smp.__dict__.keys() == scp.__dict__.keys()
Exemple #21
0
def test_get_subset_add():
    adata = AnnData(np.array([[1, 2, 3], [4, 5, 6]]), dict(Smp=['A', 'B']),
                    dict(Feat=['a', 'b', 'c']))

    assert adata[0, 0].smp['Smp'].tolist() == ['A']
    assert adata[0, 0].var['Feat'].tolist() == ['a']