Python filter_and_join_taxonomy Examples

Programming Language: Python

Namespace/Package Name: qurro.qarcoal

Method/Function: filter_and_join_taxonomy

Examples at hotexamples.com: 4

Python filter_and_join_taxonomy - 4 examples found. These are the top rated real world Python examples of qurro.qarcoal.filter_and_join_taxonomy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def test_taxon_as_sample_name(self, get_testing_data):
        """Feature table has sample called Taxon"""
        table = get_testing_data.table
        table["Taxon"] = [np.random.randint(1, 10) for x in range(6)]
        table["Taxon"] = table["Taxon"].astype(np.float64)

        num_df, denom_df = filter_and_join_taxonomy(
            table,
            get_testing_data.taxonomy,
            "Char",
            "saur",
        )

        # test that num/denom df accurately extract table values for Taxon
        num_features = ["F3", "F4", "F5"]
        denom_features = ["F0", "F1", "F2"]

        table_num_taxon_filt = table.loc[num_features]
        table_denom_taxon_filt = table.loc[denom_features]

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        _check_dataframe_equality(
            num_df_taxon_filt,
            table_num_taxon_filt,
        )

        _check_dataframe_equality(
            denom_df_taxon_filt,
            table_denom_taxon_filt,
        )

Example #2

Show file

    def _check_missing_features_dfs(self, table, taxonomy):
        """Auxillary function to test missing features

        test_taxonomy_missing_features and test_feat_table_missing_features
        are essentially the same code, only differing in whether a feature
        is removed from the table or the taxonomy.
        """
        num_df, denom_df = filter_and_join_taxonomy(
            table,
            taxonomy,
            "Char",
            "saur",
        )
        assert "F5" not in num_df.index

        num_features = ["F3", "F4"]
        denom_features = ["F0", "F1", "F2"]
        sample_order = ["S{}".format(i) for i in range(5)]

        table_num_taxon_filt = pd.DataFrame(table.loc[num_features])
        table_denom_taxon_filt = pd.DataFrame(table.loc[denom_features])

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        # set operations in Qarcoal change order of samples
        # want to make sure order is the same when comparing
        table_num_taxon_filt = table_num_taxon_filt[sample_order]
        table_denom_taxon_filt = table_denom_taxon_filt[sample_order]
        num_df_taxon_filt = num_df_taxon_filt[sample_order]
        denom_df_taxon_filt = denom_df_taxon_filt[sample_order]

        # test that num/denom df accurately extract values from table
        assert table_num_taxon_filt.equals(num_df_taxon_filt)
        assert table_denom_taxon_filt.equals(denom_df_taxon_filt)

Example #3

Show file

    def test_no_common_samples(self):
        """No samples with both numerator and denominator features.

           -----------------------------------------------
          |                    S0    S1    S2    S3    S4 |
          | F0/Charmander       0     1    10     0     0 |
          | F1/Charmeleon       0     2    15     0     0 |
          | F2/Charizard        0     4     7     0     0 |
          | F3/Bulbasaur        5     0     0     0     6 |
          | F4/Ivysaur          7     0     0     0     4 |
          | F5/Vensaur          9     0     0     0     2 |
           -----------------------------------------------
        """
        samps = ["S{}".format(i) for i in range(5)]
        feats = ["F{}".format(i) for i in range(6)]
        s0 = [0, 0, 0, 5, 7, 9]
        s1 = [1, 2, 4, 0, 0, 0]
        s2 = [10, 15, 7, 0, 0, 0]
        s3 = [0, 0, 0, 0, 0, 0]
        s4 = [0, 0, 0, 6, 4, 2]
        mat = np.matrix([s0, s1, s2, s3, s4]).T
        table = biom.table.Table(mat, feats, samps).to_dataframe()

        tax_labels = [
            "Charmander",
            "Charmeleon",
            "Charizard",
            "Bulbasaur",
            "Ivysaur",
            "Venusaur",
        ]
        confidence = ["0.99"] * 6
        taxonomy = pd.DataFrame([feats, tax_labels, confidence]).T
        taxonomy.columns = ["feature-id", "Taxon", "Confidence"]
        taxonomy.set_index("feature-id", inplace=True, drop=True)

        with pytest.raises(ValueError) as excinfo:
            filter_and_join_taxonomy(table, taxonomy, "Char", "saur")
        assert ("No samples contain both numerator and denominator features!"
                == str(excinfo.value))

Example #4

Show file

    def test_overlapping_columns(self, get_testing_data):
        """Feature table and taxonomy have overlapping column(s)

        This test is basically making sure that Qarcoal drops all columns
        except Taxon in the taxonomy table. If there are overlapping columns
        between the feature table and the taxonomy table (i.e. a sample named
        Confidence), want to ensure that the feature table columns are retained
        without causing Pandas join errors.
        """
        table = get_testing_data.table
        taxonomy = get_testing_data.taxonomy

        taxonomy["Overlap1"] = [np.random.randint(1, 10) for x in range(6)]
        table["Overlap1"] = [np.random.randint(1, 10) for x in range(6)]
        taxonomy["Overlap2"] = [np.random.randint(1, 10) for x in range(6)]
        table["Overlap2"] = [np.random.randint(1, 10) for x in range(6)]
        table = table.apply(lambda x: x.astype(np.float64))

        num_df, denom_df = filter_and_join_taxonomy(
            table,
            taxonomy,
            "Char",
            "saur",
        )

        num_features = ["F3", "F4", "F5"]
        denom_features = ["F0", "F1", "F2"]

        table_num_taxon_filt = table.loc[num_features]
        table_denom_taxon_filt = table.loc[denom_features]

        num_df_taxon_filt = num_df.loc[num_features]
        denom_df_taxon_filt = denom_df.loc[denom_features]

        # test that num/denom df accurately extract table values for
        # Overlap1 and Overlap2
        _check_dataframe_equality(
            table_num_taxon_filt,
            num_df_taxon_filt,
        )

        _check_dataframe_equality(
            table_denom_taxon_filt,
            denom_df_taxon_filt,
        )