def test__mirror_ontology_real(testfiles): annotation = flatten_dict(testfiles, 'annotation').values() ontology = flatten_dict(testfiles, 'ontology').values() orig = [363, 470] for a, o, l in zip(annotation, ontology, orig): annot = samples_._mirror_ontology(a, o) assert len(annot) == l
def test_mirror_samples_real(testfiles): annotation = flatten_dict(testfiles, 'annotation').values() ontology = flatten_dict(testfiles, 'ontology').values() orig = [363, 470] for an, on, o in zip(annotation, ontology, orig): out = samples_.mirror_samples(an, on) # there should be more than the original # of samples but less than or # equal to 2x that number (we can't MORE than duplicate) assert len(out) > o and len(out) <= o * 2
def test_collapse_probes_donors(testfiles, donor_probes): out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method='max_intensity', donor_probes=donor_probes) out = list(out.values()) assert len(out) == 2 # number of donors assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])]) assert np.all([len(exp.columns) == 29131 for exp in out])
def test_collapse_probes(testfiles, method): # we've aleady tested the underlying methods so here we just want to do # some smoke tests to make sure the function returns what we expected # regardless of the provided method out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method=method) out = list(out.values()) assert len(out) == 2 # number of donors assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])]) assert np.all([len(exp.columns) == 29131 for exp in out])
def test_readrnaseq(rnafiles, key, columns): for d, fn in flatten_dict(rnafiles, key).items(): func = getattr(io, 'read_{}'.format(key)) # check file exists assert op.exists(fn) # check loading from filepath data = func(fn) assert isinstance(data, pd.DataFrame) # check loading from dataframe (should return same object) data2 = func(data) assert id(data) == id(data2) # check that copy parameter works as expected data3 = func(data, copy=True) assert isinstance(data3, pd.DataFrame) assert id(data) != id(data3) # confirm columns are as expected if columns is not None: assert np.all(columns == data.columns) # confirm errors with pytest.raises(TypeError): func(1) with pytest.raises(TypeError): func([1, 2, 3]) with pytest.raises(FileNotFoundError): func('notafile')
def test_readfiles(testfiles, key, has_parq, columns): for d, fn in flatten_dict(testfiles, key).items(): func = getattr(io, 'read_{}'.format(key)) # check file (CSV + parquet) exist assert op.exists(fn) if has_parq and io.use_parq: assert op.exists(fn.rpartition('.csv')[0] + '.parq') # check loading from filepath data = func(fn, parquet=True) if has_parq else func(fn) assert isinstance(data, pd.DataFrame) # check loading from dataframe (should return same object) data2 = func(data) assert id(data) == id(data2) # check that copy parameter works as expected data3 = func(data, copy=True) assert isinstance(data3, pd.DataFrame) assert id(data) != id(data3) # confirm columns are as expected if columns is not None: assert np.all(columns == data.columns) # confirm errors with pytest.raises(TypeError): func(1) with pytest.raises(TypeError): func([1, 2, 3]) with pytest.raises(FileNotFoundError): func('notafile')
def test_filter_probes(testfiles, threshold, expected_length): # set up a few useful variables pacall = flatten_dict(testfiles, 'pacall') probe_file = first_entry(testfiles, 'probes') samples = flatten_dict(testfiles, 'annotation') probe_df = abagen.io.read_probes(probe_file) # should work with either a filename _or_ a dataframe filtered = probes_.filter_probes(pacall, samples, probe_file, threshold=threshold) pd.testing.assert_frame_equal( filtered, probes_.filter_probes(pacall, samples, probe_df, threshold=threshold)) # provided threshold returns expected output cols = [ 'probe_name', 'gene_id', 'gene_symbol', 'gene_name', 'entrez_id', 'chromosome' ] assert np.all(filtered.columns == cols) assert filtered.index.name == 'probe_id' assert len(filtered) == expected_length
def test_normalize_expression_real(testfiles, method): # load in data and add some NaN values for "realness" micro = [ io.read_microarray(f).T for f in flatten_dict(testfiles, 'microarray').values() ] inds = [[5, 15, 25], [0, 10, 20]] for n, idx in enumerate(inds): micro[n].iloc[idx] = np.nan minmax = [ 'minmax', 'scaled_sigmoid', 'scaled_sigmoid_quantiles', 'scaled_robust_sigmoid', 'mixed_sigmoid' ] out = correct.normalize_expression(micro, norm=method) for exp, idx in zip(out, inds): assert np.all(np.isnan(exp.iloc[idx])) exp = exp.dropna(axis=1, how='all') if method in minmax: assert np.allclose(exp.max(axis=0), 1) assert np.allclose(exp.min(axis=0), 0) elif method == 'robust_sigmoid': assert np.all(exp.max(axis=0) <= 1) assert np.all(exp.min(axis=0) >= 0) elif method in ['center', 'zscore']: assert np.allclose(exp.mean(axis=0), 0) if method == 'zscore': assert np.allclose(exp.std(axis=0, ddof=1), 1) # # batch correct: force means identical # out = correct.normalize_expression(micro, norm='batch') # assert np.allclose(*[e.mean(axis=0, skipna=True) for e in out]) # # the NaN values should still be there, though # for exp, idx in zip(out, inds): # assert np.all(np.isnan(exp.iloc[idx])) # invalid norm parameter with pytest.raises(ValueError): correct.normalize_expression(micro, norm='notanorm')
def test_flatten_dict(indict, subkey, outdict): assert utils.flatten_dict(indict, subkey) == outdict
def test_collapse_probes_errors(testfiles): with pytest.raises(ValueError): probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method='notamethod')
def test_drop_mismatch_samples_real(testfiles): annotation = flatten_dict(testfiles, 'annotation').values() ontology = flatten_dict(testfiles, 'ontology').values() for an, on in zip(annotation, ontology): samples_.drop_mismatch_samples(an, on)
def test_update_mni_coords_real(testfiles): for annotation in flatten_dict(testfiles, 'annotation').values(): samples_.update_mni_coords(annotation)
def test_update_mni_coords_real(testfiles, rawmri, mni, ns): for donor, annotation in flatten_dict(testfiles, 'annotation').items(): ns = rawmri[donor]['t1w'] if ns else None samples_.update_coords(annotation, corrected_mni=mni, native_space=ns)