Exemple #1
0
def test__mirror_ontology_real(testfiles):
    annotation = flatten_dict(testfiles, 'annotation').values()
    ontology = flatten_dict(testfiles, 'ontology').values()
    orig = [363, 470]
    for a, o, l in zip(annotation, ontology, orig):
        annot = samples_._mirror_ontology(a, o)
        assert len(annot) == l
Exemple #2
0
def test_mirror_samples_real(testfiles):
    annotation = flatten_dict(testfiles, 'annotation').values()
    ontology = flatten_dict(testfiles, 'ontology').values()
    orig = [363, 470]
    for an, on, o in zip(annotation, ontology, orig):
        out = samples_.mirror_samples(an, on)
        # there should be more than the original # of samples but less than or
        # equal to 2x that number (we can't MORE than duplicate)
        assert len(out) > o and len(out) <= o * 2
Exemple #3
0
def test_collapse_probes_donors(testfiles, donor_probes):
    out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                  flatten_dict(testfiles, 'annotation'),
                                  first_entry(testfiles, 'probes'),
                                  method='max_intensity',
                                  donor_probes=donor_probes)

    out = list(out.values())
    assert len(out) == 2  # number of donors
    assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])])
    assert np.all([len(exp.columns) == 29131 for exp in out])
Exemple #4
0
def test_collapse_probes(testfiles, method):
    # we've aleady tested the underlying methods so here we just want to do
    # some smoke tests to make sure the function returns what we expected
    # regardless of the provided method
    out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                  flatten_dict(testfiles, 'annotation'),
                                  first_entry(testfiles, 'probes'),
                                  method=method)

    out = list(out.values())
    assert len(out) == 2  # number of donors
    assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])])
    assert np.all([len(exp.columns) == 29131 for exp in out])
Exemple #5
0
def test_readrnaseq(rnafiles, key, columns):
    for d, fn in flatten_dict(rnafiles, key).items():
        func = getattr(io, 'read_{}'.format(key))

        # check file exists
        assert op.exists(fn)

        # check loading from filepath
        data = func(fn)
        assert isinstance(data, pd.DataFrame)

        # check loading from dataframe (should return same object)
        data2 = func(data)
        assert id(data) == id(data2)

        # check that copy parameter works as expected
        data3 = func(data, copy=True)
        assert isinstance(data3, pd.DataFrame)
        assert id(data) != id(data3)

        # confirm columns are as expected
        if columns is not None:
            assert np.all(columns == data.columns)

        # confirm errors
        with pytest.raises(TypeError):
            func(1)

        with pytest.raises(TypeError):
            func([1, 2, 3])

        with pytest.raises(FileNotFoundError):
            func('notafile')
Exemple #6
0
def test_readfiles(testfiles, key, has_parq, columns):
    for d, fn in flatten_dict(testfiles, key).items():
        func = getattr(io, 'read_{}'.format(key))

        # check file (CSV + parquet) exist
        assert op.exists(fn)
        if has_parq and io.use_parq:
            assert op.exists(fn.rpartition('.csv')[0] + '.parq')

        # check loading from filepath
        data = func(fn, parquet=True) if has_parq else func(fn)
        assert isinstance(data, pd.DataFrame)

        # check loading from dataframe (should return same object)
        data2 = func(data)
        assert id(data) == id(data2)

        # check that copy parameter works as expected
        data3 = func(data, copy=True)
        assert isinstance(data3, pd.DataFrame)
        assert id(data) != id(data3)

        # confirm columns are as expected
        if columns is not None:
            assert np.all(columns == data.columns)

        # confirm errors
        with pytest.raises(TypeError):
            func(1)

        with pytest.raises(TypeError):
            func([1, 2, 3])

        with pytest.raises(FileNotFoundError):
            func('notafile')
Exemple #7
0
def test_filter_probes(testfiles, threshold, expected_length):
    # set up a few useful variables
    pacall = flatten_dict(testfiles, 'pacall')
    probe_file = first_entry(testfiles, 'probes')
    samples = flatten_dict(testfiles, 'annotation')
    probe_df = abagen.io.read_probes(probe_file)

    # should work with either a filename _or_ a dataframe
    filtered = probes_.filter_probes(pacall,
                                     samples,
                                     probe_file,
                                     threshold=threshold)
    pd.testing.assert_frame_equal(
        filtered,
        probes_.filter_probes(pacall, samples, probe_df, threshold=threshold))

    # provided threshold returns expected output
    cols = [
        'probe_name', 'gene_id', 'gene_symbol', 'gene_name', 'entrez_id',
        'chromosome'
    ]
    assert np.all(filtered.columns == cols)
    assert filtered.index.name == 'probe_id'
    assert len(filtered) == expected_length
Exemple #8
0
def test_normalize_expression_real(testfiles, method):
    # load in data and add some NaN values for "realness"
    micro = [
        io.read_microarray(f).T
        for f in flatten_dict(testfiles, 'microarray').values()
    ]
    inds = [[5, 15, 25], [0, 10, 20]]
    for n, idx in enumerate(inds):
        micro[n].iloc[idx] = np.nan

    minmax = [
        'minmax', 'scaled_sigmoid', 'scaled_sigmoid_quantiles',
        'scaled_robust_sigmoid', 'mixed_sigmoid'
    ]

    out = correct.normalize_expression(micro, norm=method)
    for exp, idx in zip(out, inds):
        assert np.all(np.isnan(exp.iloc[idx]))
        exp = exp.dropna(axis=1, how='all')
        if method in minmax:
            assert np.allclose(exp.max(axis=0), 1)
            assert np.allclose(exp.min(axis=0), 0)
        elif method == 'robust_sigmoid':
            assert np.all(exp.max(axis=0) <= 1)
            assert np.all(exp.min(axis=0) >= 0)
        elif method in ['center', 'zscore']:
            assert np.allclose(exp.mean(axis=0), 0)
            if method == 'zscore':
                assert np.allclose(exp.std(axis=0, ddof=1), 1)

    # # batch correct: force means identical
    # out = correct.normalize_expression(micro, norm='batch')
    # assert np.allclose(*[e.mean(axis=0, skipna=True) for e in out])
    # # the NaN values should still be there, though
    # for exp, idx in zip(out, inds):
    #     assert np.all(np.isnan(exp.iloc[idx]))

    # invalid norm parameter
    with pytest.raises(ValueError):
        correct.normalize_expression(micro, norm='notanorm')
Exemple #9
0
def test_flatten_dict(indict, subkey, outdict):
    assert utils.flatten_dict(indict, subkey) == outdict
Exemple #10
0
def test_collapse_probes_errors(testfiles):
    with pytest.raises(ValueError):
        probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                flatten_dict(testfiles, 'annotation'),
                                first_entry(testfiles, 'probes'),
                                method='notamethod')
Exemple #11
0
def test_drop_mismatch_samples_real(testfiles):
    annotation = flatten_dict(testfiles, 'annotation').values()
    ontology = flatten_dict(testfiles, 'ontology').values()
    for an, on in zip(annotation, ontology):
        samples_.drop_mismatch_samples(an, on)
Exemple #12
0
def test_update_mni_coords_real(testfiles):
    for annotation in flatten_dict(testfiles, 'annotation').values():
        samples_.update_mni_coords(annotation)
Exemple #13
0
def test_update_mni_coords_real(testfiles, rawmri, mni, ns):
    for donor, annotation in flatten_dict(testfiles, 'annotation').items():
        ns = rawmri[donor]['t1w'] if ns else None
        samples_.update_coords(annotation, corrected_mni=mni, native_space=ns)