Example #1
0
    def test_taxon_as_sample_name(self, get_testing_data):
        """Feature table has sample called Taxon"""
        table = get_testing_data.table
        table["Taxon"] = [np.random.randint(1, 10) for x in range(6)]
        table["Taxon"] = table["Taxon"].astype(np.float64)

        num_df, denom_df = filter_and_join_taxonomy(
            table,
            get_testing_data.taxonomy,
            "Char",
            "saur",
        )

        # test that num/denom df accurately extract table values for Taxon
        num_features = ["F3", "F4", "F5"]
        denom_features = ["F0", "F1", "F2"]

        table_num_taxon_filt = table.loc[num_features]
        table_denom_taxon_filt = table.loc[denom_features]

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        _check_dataframe_equality(
            num_df_taxon_filt,
            table_num_taxon_filt,
        )

        _check_dataframe_equality(
            denom_df_taxon_filt,
            table_denom_taxon_filt,
        )
Example #2
0
    def _check_missing_features_dfs(self, table, taxonomy):
        """Auxillary function to test missing features

        test_taxonomy_missing_features and test_feat_table_missing_features
        are essentially the same code, only differing in whether a feature
        is removed from the table or the taxonomy.
        """
        num_df, denom_df = filter_and_join_taxonomy(
            table,
            taxonomy,
            "Char",
            "saur",
        )
        assert "F5" not in num_df.index

        num_features = ["F3", "F4"]
        denom_features = ["F0", "F1", "F2"]
        sample_order = ["S{}".format(i) for i in range(5)]

        table_num_taxon_filt = pd.DataFrame(table.loc[num_features])
        table_denom_taxon_filt = pd.DataFrame(table.loc[denom_features])

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        # set operations in Qarcoal change order of samples
        # want to make sure order is the same when comparing
        table_num_taxon_filt = table_num_taxon_filt[sample_order]
        table_denom_taxon_filt = table_denom_taxon_filt[sample_order]
        num_df_taxon_filt = num_df_taxon_filt[sample_order]
        denom_df_taxon_filt = denom_df_taxon_filt[sample_order]

        # test that num/denom df accurately extract values from table
        assert table_num_taxon_filt.equals(num_df_taxon_filt)
        assert table_denom_taxon_filt.equals(denom_df_taxon_filt)
Example #3
0
    def test_no_common_samples(self):
        """No samples with both numerator and denominator features.

           -----------------------------------------------
          |                    S0    S1    S2    S3    S4 |
          | F0/Charmander       0     1    10     0     0 |
          | F1/Charmeleon       0     2    15     0     0 |
          | F2/Charizard        0     4     7     0     0 |
          | F3/Bulbasaur        5     0     0     0     6 |
          | F4/Ivysaur          7     0     0     0     4 |
          | F5/Vensaur          9     0     0     0     2 |
           -----------------------------------------------
        """
        samps = ["S{}".format(i) for i in range(5)]
        feats = ["F{}".format(i) for i in range(6)]
        s0 = [0, 0, 0, 5, 7, 9]
        s1 = [1, 2, 4, 0, 0, 0]
        s2 = [10, 15, 7, 0, 0, 0]
        s3 = [0, 0, 0, 0, 0, 0]
        s4 = [0, 0, 0, 6, 4, 2]
        mat = np.matrix([s0, s1, s2, s3, s4]).T
        table = biom.table.Table(mat, feats, samps).to_dataframe()

        tax_labels = [
            "Charmander",
            "Charmeleon",
            "Charizard",
            "Bulbasaur",
            "Ivysaur",
            "Venusaur",
        ]
        confidence = ["0.99"] * 6
        taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T
        taxonomy.columns = ["feature-id", "Taxon", "Confidence"]
        taxonomy.set_index("feature-id", inplace=True, drop=True)

        with pytest.raises(ValueError) as excinfo:
            filter_and_join_taxonomy(table, taxonomy, "Char", "saur")
        assert ("No samples contain both numerator and denominator features!"
                == str(excinfo.value))
Example #4
0
    def test_overlapping_columns(self, get_testing_data):
        """Feature table and taxonomy have overlapping column(s)

        This test is basically making sure that Qarcoal drops all columns
        except Taxon in the taxonomy table. If there are overlapping columns
        between the feature table and the taxonomy table (i.e. a sample named
        Confidence), want to ensure that the feature table columns are retained
        without causing Pandas join errors.
        """
        table = get_testing_data.table
        taxonomy = get_testing_data.taxonomy

        taxonomy["Overlap1"] = [np.random.randint(1, 10) for x in range(6)]
        table["Overlap1"] = [np.random.randint(1, 10) for x in range(6)]
        taxonomy["Overlap2"] = [np.random.randint(1, 10) for x in range(6)]
        table["Overlap2"] = [np.random.randint(1, 10) for x in range(6)]
        table = table.apply(lambda x: x.astype(np.float64))

        num_df, denom_df = filter_and_join_taxonomy(
            table,
            taxonomy,
            "Char",
            "saur",
        )

        num_features = ["F3", "F4", "F5"]
        denom_features = ["F0", "F1", "F2"]

        table_num_taxon_filt = table.loc[num_features]
        table_denom_taxon_filt = table.loc[denom_features]

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        # test that num/denom df accurately extract table values for
        # Overlap1 and Overlap2
        _check_dataframe_equality(
            table_num_taxon_filt,
            num_df_taxon_filt,
        )

        _check_dataframe_equality(
            table_denom_taxon_filt,
            denom_df_taxon_filt,
        )