예제 #1
0
def test_similarity_threshold_real(testfiles):
    annotation = first_entry(testfiles, 'annotation')
    probes = first_entry(testfiles, 'probes')
    microarray = first_entry(testfiles, 'microarray')

    out1 = samples_.similarity_threshold(microarray, annotation, probes)
    out2 = samples_.similarity_threshold(microarray,
                                         annotation,
                                         probes,
                                         threshold=np.inf)
    assert out1.shape[0] < out2.shape[0]
예제 #2
0
def test_collapse_probes_donors(testfiles, donor_probes):
    out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                  flatten_dict(testfiles, 'annotation'),
                                  first_entry(testfiles, 'probes'),
                                  method='max_intensity',
                                  donor_probes=donor_probes)

    out = list(out.values())
    assert len(out) == 2  # number of donors
    assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])])
    assert np.all([len(exp.columns) == 29131 for exp in out])
예제 #3
0
def test_collapse_probes(testfiles, method):
    # we've aleady tested the underlying methods so here we just want to do
    # some smoke tests to make sure the function returns what we expected
    # regardless of the provided method
    out = probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                  flatten_dict(testfiles, 'annotation'),
                                  first_entry(testfiles, 'probes'),
                                  method=method)

    out = list(out.values())
    assert len(out) == 2  # number of donors
    assert np.all([len(exp) == n_samp for exp, n_samp in zip(out, [363, 470])])
    assert np.all([len(exp.columns) == 29131 for exp in out])
예제 #4
0
def test_reannotate_probes(testfiles):
    # set up a few useful variables
    probe_file = first_entry(testfiles, 'probes')

    # should work with either a filename _or_ a dataframe
    reannot = probes_.reannotate_probes(probe_file)
    probe_df = abagen.io.read_probes(probe_file)
    pd.testing.assert_frame_equal(reannot, probes_.reannotate_probes(probe_df))

    # expected output
    cols = ['probe_name', 'gene_symbol', 'entrez_id']
    assert np.all(reannot.columns == cols)
    assert reannot.index.name == 'probe_id'
    assert reannot.shape == (45821, 3)
예제 #5
0
def test_filter_probes(testfiles, threshold, expected_length):
    # set up a few useful variables
    pacall = flatten_dict(testfiles, 'pacall')
    probe_file = first_entry(testfiles, 'probes')
    samples = flatten_dict(testfiles, 'annotation')
    probe_df = abagen.io.read_probes(probe_file)

    # should work with either a filename _or_ a dataframe
    filtered = probes_.filter_probes(pacall,
                                     samples,
                                     probe_file,
                                     threshold=threshold)
    pd.testing.assert_frame_equal(
        filtered,
        probes_.filter_probes(pacall, samples, probe_df, threshold=threshold))

    # provided threshold returns expected output
    cols = [
        'probe_name', 'gene_id', 'gene_symbol', 'gene_name', 'entrez_id',
        'chromosome'
    ]
    assert np.all(filtered.columns == cols)
    assert filtered.index.name == 'probe_id'
    assert len(filtered) == expected_length
예제 #6
0
def test_first_entry(indict, subkey, out):
    assert utils.first_entry(indict, subkey) == out
예제 #7
0
def test_collapse_probes_errors(testfiles):
    with pytest.raises(ValueError):
        probes_.collapse_probes(flatten_dict(testfiles, 'microarray'),
                                flatten_dict(testfiles, 'annotation'),
                                first_entry(testfiles, 'probes'),
                                method='notamethod')
예제 #8
0
def test_label_samples_real(testfiles, atlas):
    out = samples_.label_samples(first_entry(testfiles, 'annotation'),
                                 atlas['image'])
    assert isinstance(out, pd.DataFrame)
    assert out.index.name == 'sample_id'
    assert out.columns == ['label']