Example #1
0
def text_normalize_expression_structures():
    data = pd.DataFrame(dict(a=[np.nan, 1, 2, 3, 4], b=[np.nan, 1, 2, 3, 4]))

    same = pd.DataFrame(dict(structure='a'), index=data.index)
    same_expect = pd.DataFrame(
        dict(a=[np.nan, -1.5, -0.5, 0.5, 1.5],
             b=[np.nan, -1.5, -0.5, 0.5, 1.5]))
    nsame = correct.normalize_expression(data, norm='center', structures=same)
    pd.testing.assert_frame_equal(same_expect, nsame)

    diff = pd.DataFrame(dict(structure=['a', 'a', 'a', 'b', 'b']),
                        index=data.index)
    diff_expect = pd.DataFrame(
        dict(a=[np.nan, -0.5, 0.5, -0.5, 0.5],
             b=[np.nan, -0.5, 0.5, -0.5, 0.5]))
    ndiff = correct.normalize_expression(data, norm='center', structures=diff)
    pd.testing.assert_frame_equal(diff_expect, ndiff)
Example #2
0
def test_normalize_expression_real(testfiles, method):
    # load in data and add some NaN values for "realness"
    micro = [
        io.read_microarray(f).T
        for f in flatten_dict(testfiles, 'microarray').values()
    ]
    inds = [[5, 15, 25], [0, 10, 20]]
    for n, idx in enumerate(inds):
        micro[n].iloc[idx] = np.nan

    minmax = [
        'minmax', 'scaled_sigmoid', 'scaled_sigmoid_quantiles',
        'scaled_robust_sigmoid', 'mixed_sigmoid'
    ]

    out = correct.normalize_expression(micro, norm=method)
    for exp, idx in zip(out, inds):
        assert np.all(np.isnan(exp.iloc[idx]))
        exp = exp.dropna(axis=1, how='all')
        if method in minmax:
            assert np.allclose(exp.max(axis=0), 1)
            assert np.allclose(exp.min(axis=0), 0)
        elif method == 'robust_sigmoid':
            assert np.all(exp.max(axis=0) <= 1)
            assert np.all(exp.min(axis=0) >= 0)
        elif method in ['center', 'zscore']:
            assert np.allclose(exp.mean(axis=0), 0)
            if method == 'zscore':
                assert np.allclose(exp.std(axis=0, ddof=1), 1)

    # # batch correct: force means identical
    # out = correct.normalize_expression(micro, norm='batch')
    # assert np.allclose(*[e.mean(axis=0, skipna=True) for e in out])
    # # the NaN values should still be there, though
    # for exp, idx in zip(out, inds):
    #     assert np.all(np.isnan(exp.iloc[idx]))

    # invalid norm parameter
    with pytest.raises(ValueError):
        correct.normalize_expression(micro, norm='notanorm')
Example #3
0
def test_normalize_expression_real(testfiles):
    # load in data and add some NaN values for "realness"
    micro = [io.read_microarray(f).T for f in testfiles['microarray']]
    inds = [[5, 15, 25], [0, 10, 20]]
    for n, idx in enumerate(inds):
        micro[n].iloc[idx] = np.nan

    # min-max scaling (with some extra pizzazz)
    srs = correct.normalize_expression(micro, norm='srs')
    for exp, idx in zip(srs, inds):
        assert np.all(np.isnan(exp.iloc[idx]))
        exp = exp.dropna(axis=1, how='all')
        assert np.allclose(exp.max(axis=0), 1)
        assert np.allclose(exp.min(axis=0), 0)

    # z-scoring: mean = 0, std = 1
    zscore = correct.normalize_expression(micro, norm='zscore')
    for exp, idx in zip(zscore, inds):
        assert np.all(np.isnan(exp.iloc[idx]))
        exp = exp.dropna(axis=1, how='all')
        assert np.allclose(exp.mean(axis=0), 0)
        assert np.allclose(exp.std(axis=0, ddof=1), 1)

    # batch correct: force means identical
    batch = correct.normalize_expression(micro, norm='batch')
    assert np.allclose(*[e.mean(axis=0, skipna=True) for e in batch])
    # the NaN values should still be there, though
    for exp, idx in zip(batch, inds):
        assert np.all(np.isnan(exp.iloc[idx]))

    # invalid norm parameter
    with pytest.raises(ValueError):
        correct.normalize_expression(micro, norm='notanorm')

    # can't do batch correction with only one donor
    with pytest.raises(ValueError):
        correct.normalize_expression(micro[0], norm='batch')