def test_calculate_cumulative_relative_abundance():

    data1 = np.array([['', 'Step I'], ['219', 0.239709], ['218', 0.190986]])

    data2 = np.array([['', 'Step II'], ['219', 0.434289], ['218', 0.193835]])

    # create a dataframe, each sub list is one row
    df1 = pandas.DataFrame(data=data1[1:, 1:],
                           index=data1[1:, 0],
                           columns=data1[0, 1:]).astype(np.dtype(np.float64))

    df2 = pandas.DataFrame(data=data2[1:, 1:],
                           index=data2[1:, 0],
                           columns=data2[0, 1:]).astype(np.dtype(np.float64))

    samples = [df1, df2]

    result = calculate_cumulative_relative_abundance(samples)

    df1_res = result[0]
    df2_res = result[1]

    #    assert df1_res.columns.contains('Cum Rel Abund')
    #    assert df2_res.columns.contains('Cum Rel Abund')
    assert 'Cum Rel Abund' in df1_res.columns
    assert 'Cum Rel Abund' in df2_res.columns

    assert df1_res.loc['219', 'Cum Rel Abund'] == 0.239709
    assert df1_res.loc['218', 'Cum Rel Abund'] == 0.430695

    assert df2_res.loc['219', 'Cum Rel Abund'] == 0.434289
    assert df2_res.loc['218', 'Cum Rel Abund'] == 0.628124
Esempio n. 2
0
def test_calculate_cumulative_prop_trf():

    data1 = np.array([['', 'Step I'], ['219', 0.239709], ['218', 0.190986]])

    data2 = np.array([['', 'Step II'], ['219', 0.434289], ['218', 0.193835]])

    # create a dataframe, each sub list is one row
    df1 = pandas.DataFrame(data=data1[1:, 1:],
                           index=data1[1:, 0],
                           columns=data1[0, 1:]).astype(np.dtype(np.float64))

    df2 = pandas.DataFrame(data=data2[1:, 1:],
                           index=data2[1:, 0],
                           columns=data2[0, 1:]).astype(np.dtype(np.float64))

    samples = [df1, df2]

    samples = calculate_cumulative_relative_abundance(samples)
    samples = remove_cumulative_abundance_over_one(samples)
    result = calculate_cumulative_prop_trf(samples)

    df1_res = result[0]
    df2_res = result[1]

    assert 'Cum Prop TRFs' in df1_res.columns
    assert 'Cum Prop TRFs' in df2_res.columns

    assert df1_res.loc['219', 'Cum Prop TRFs'] == 0.5
    assert df1_res.loc['218', 'Cum Prop TRFs'] == 1.0

    assert df2_res.loc['219', 'Cum Prop TRFs'] == 0.5
    assert df2_res.loc['218', 'Cum Prop TRFs'] == 1.0
def setup_data():
    ''' setup some test data and
    remove all rows cumulative relative abundance has gone over 1
    '''
    # this should get the last item truncated
    data1 = np.array([['', 'Step I'],
                     ['219', 0.7],
                     ['218', 0.3],
                     ['217', 0.1]])

    # this should get no items truncacted as it adds to exactly 1
    data2 = np.array([['', 'Step II'],
                     ['219', 0.4],
                     ['218', 0.2],
                     ['217', 0.2]])

    # this should get no items truncated as it never reaches 1
    data3 = np.array([['', 'Step III'],
                     ['219', 0.1],
                     ['218', 0.2],
                     ['217', 0.2]])

    # create a dataframe, each sub list is one row
    df1 = pandas.DataFrame(data=data1[1:, 1:], index=data1[1:, 0],
                           columns=data1[0, 1:]).astype(np.dtype(np.float64))

    df2 = pandas.DataFrame(data=data2[1:, 1:], index=data2[1:, 0],
                           columns=data2[0, 1:]).astype(np.dtype(np.float64))

    df3 = pandas.DataFrame(data=data3[1:, 1:], index=data3[1:, 0],
                           columns=data3[0, 1:]).astype(np.dtype(np.float64))

    samples = [df1, df2, df3]

    samples = calculate_cumulative_relative_abundance(samples)
    result = remove_cumulative_abundance_over_one(samples)

    return result