def test_similarity_threshold_real(testfiles): annotation = first_entry(testfiles, 'annotation') probes = first_entry(testfiles, 'probes') microarray = first_entry(testfiles, 'microarray') out1 = samples_.similarity_threshold(microarray, annotation, probes) out2 = samples_.similarity_threshold(microarray, annotation, probes, threshold=np.inf) assert out1.shape[0] < out2.shape[0]
def test_collapse_probes_donors(testfiles, donor_probes): out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method='max_intensity', donor_probes=donor_probes) out = list(out.values()) assert len(out) == 2 # number of donors assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])]) assert np.all([len(exp.columns) == 29131 for exp in out])
def test_collapse_probes(testfiles, method): # we've aleady tested the underlying methods so here we just want to do # some smoke tests to make sure the function returns what we expected # regardless of the provided method out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method=method) out = list(out.values()) assert len(out) == 2 # number of donors assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])]) assert np.all([len(exp.columns) == 29131 for exp in out])
def test_reannotate_probes(testfiles): # set up a few useful variables probe_file = first_entry(testfiles, 'probes') # should work with either a filename _or_ a dataframe reannot = probes_.reannotate_probes(probe_file) probe_df = abagen.io.read_probes(probe_file) pd.testing.assert_frame_equal(reannot, probes_.reannotate_probes(probe_df)) # expected output cols = ['probe_name', 'gene_symbol', 'entrez_id'] assert np.all(reannot.columns == cols) assert reannot.index.name == 'probe_id' assert reannot.shape == (45821, 3)
def test_filter_probes(testfiles, threshold, expected_length): # set up a few useful variables pacall = flatten_dict(testfiles, 'pacall') probe_file = first_entry(testfiles, 'probes') samples = flatten_dict(testfiles, 'annotation') probe_df = abagen.io.read_probes(probe_file) # should work with either a filename _or_ a dataframe filtered = probes_.filter_probes(pacall, samples, probe_file, threshold=threshold) pd.testing.assert_frame_equal( filtered, probes_.filter_probes(pacall, samples, probe_df, threshold=threshold)) # provided threshold returns expected output cols = [ 'probe_name', 'gene_id', 'gene_symbol', 'gene_name', 'entrez_id', 'chromosome' ] assert np.all(filtered.columns == cols) assert filtered.index.name == 'probe_id' assert len(filtered) == expected_length
def test_first_entry(indict, subkey, out): assert utils.first_entry(indict, subkey) == out
def test_collapse_probes_errors(testfiles): with pytest.raises(ValueError): probes_.collapse_probes(flatten_dict(testfiles, 'microarray'), flatten_dict(testfiles, 'annotation'), first_entry(testfiles, 'probes'), method='notamethod')
def test_label_samples_real(testfiles, atlas): out = samples_.label_samples(first_entry(testfiles, 'annotation'), atlas['image']) assert isinstance(out, pd.DataFrame) assert out.index.name == 'sample_id' assert out.columns == ['label']