def test_taxon_as_sample_name(self, get_testing_data): """Feature table has sample called Taxon""" table = get_testing_data.table table["Taxon"] = [np.random.randint(1, 10) for x in range(6)] table["Taxon"] = table["Taxon"].astype(np.float64) num_df, denom_df = filter_and_join_taxonomy( table, get_testing_data.taxonomy, "Char", "saur", ) # test that num/denom df accurately extract table values for Taxon num_features = ["F3", "F4", "F5"] denom_features = ["F0", "F1", "F2"] table_num_taxon_filt = table.loc[num_features] table_denom_taxon_filt = table.loc[denom_features] num_df_taxon_filt = num_df.loc[num_features] denom_df_taxon_filt = denom_df.loc[denom_features] _check_dataframe_equality( num_df_taxon_filt, table_num_taxon_filt, ) _check_dataframe_equality( denom_df_taxon_filt, table_denom_taxon_filt, )
def _check_missing_features_dfs(self, table, taxonomy): """Auxillary function to test missing features test_taxonomy_missing_features and test_feat_table_missing_features are essentially the same code, only differing in whether a feature is removed from the table or the taxonomy. """ num_df, denom_df = filter_and_join_taxonomy( table, taxonomy, "Char", "saur", ) assert "F5" not in num_df.index num_features = ["F3", "F4"] denom_features = ["F0", "F1", "F2"] sample_order = ["S{}".format(i) for i in range(5)] table_num_taxon_filt = pd.DataFrame(table.loc[num_features]) table_denom_taxon_filt = pd.DataFrame(table.loc[denom_features]) num_df_taxon_filt = num_df.loc[num_features] denom_df_taxon_filt = denom_df.loc[denom_features] # set operations in Qarcoal change order of samples # want to make sure order is the same when comparing table_num_taxon_filt = table_num_taxon_filt[sample_order] table_denom_taxon_filt = table_denom_taxon_filt[sample_order] num_df_taxon_filt = num_df_taxon_filt[sample_order] denom_df_taxon_filt = denom_df_taxon_filt[sample_order] # test that num/denom df accurately extract values from table assert table_num_taxon_filt.equals(num_df_taxon_filt) assert table_denom_taxon_filt.equals(denom_df_taxon_filt)
def test_no_common_samples(self): """No samples with both numerator and denominator features. ----------------------------------------------- | S0 S1 S2 S3 S4 | | F0/Charmander 0 1 10 0 0 | | F1/Charmeleon 0 2 15 0 0 | | F2/Charizard 0 4 7 0 0 | | F3/Bulbasaur 5 0 0 0 6 | | F4/Ivysaur 7 0 0 0 4 | | F5/Vensaur 9 0 0 0 2 | ----------------------------------------------- """ samps = ["S{}".format(i) for i in range(5)] feats = ["F{}".format(i) for i in range(6)] s0 = [0, 0, 0, 5, 7, 9] s1 = [1, 2, 4, 0, 0, 0] s2 = [10, 15, 7, 0, 0, 0] s3 = [0, 0, 0, 0, 0, 0] s4 = [0, 0, 0, 6, 4, 2] mat = np.matrix([s0, s1, s2, s3, s4]).T table = biom.table.Table(mat, feats, samps).to_dataframe() tax_labels = [ "Charmander", "Charmeleon", "Charizard", "Bulbasaur", "Ivysaur", "Venusaur", ] confidence = ["0.99"] * 6 taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T taxonomy.columns = ["feature-id", "Taxon", "Confidence"] taxonomy.set_index("feature-id", inplace=True, drop=True) with pytest.raises(ValueError) as excinfo: filter_and_join_taxonomy(table, taxonomy, "Char", "saur") assert ("No samples contain both numerator and denominator features!" == str(excinfo.value))
def test_overlapping_columns(self, get_testing_data): """Feature table and taxonomy have overlapping column(s) This test is basically making sure that Qarcoal drops all columns except Taxon in the taxonomy table. If there are overlapping columns between the feature table and the taxonomy table (i.e. a sample named Confidence), want to ensure that the feature table columns are retained without causing Pandas join errors. """ table = get_testing_data.table taxonomy = get_testing_data.taxonomy taxonomy["Overlap1"] = [np.random.randint(1, 10) for x in range(6)] table["Overlap1"] = [np.random.randint(1, 10) for x in range(6)] taxonomy["Overlap2"] = [np.random.randint(1, 10) for x in range(6)] table["Overlap2"] = [np.random.randint(1, 10) for x in range(6)] table = table.apply(lambda x: x.astype(np.float64)) num_df, denom_df = filter_and_join_taxonomy( table, taxonomy, "Char", "saur", ) num_features = ["F3", "F4", "F5"] denom_features = ["F0", "F1", "F2"] table_num_taxon_filt = table.loc[num_features] table_denom_taxon_filt = table.loc[denom_features] num_df_taxon_filt = num_df.loc[num_features] denom_df_taxon_filt = denom_df.loc[denom_features] # test that num/denom df accurately extract table values for # Overlap1 and Overlap2 _check_dataframe_equality( table_num_taxon_filt, num_df_taxon_filt, ) _check_dataframe_equality( table_denom_taxon_filt, denom_df_taxon_filt, )