예제 #1
def test_vibe_check_successes():
    table, metadata, ranks = get_test_data()

    # Should succeed since all of the test data, by default, is in the default
    # safe range
    vibe_check(ranks, table)

    # Should succeed since the numbers in the test data (table and ranks) range
    # from 0 to 8
    vibe_check(ranks, table, safe_range=[0, 8])
예제 #2
def test_vibe_check_safe_range_invalid_safe_ranges():
    """Checks cases where the input range specified to vibe_check() is somehow
    table, metadata, ranks = get_test_data()

    ranges = [[1, 2, 3, 4, 5], [], [1], (), (2, )]
    for r in ranges:
        with pytest.raises(ValueError) as exception_info:
            vibe_check(ranks, table, safe_range=r)
        assert "safe_range must have a length of 2." in str(

    with pytest.raises(ValueError) as exception_info:
        vibe_check(ranks, table, safe_range=[10, 1])
    assert "safe_range[1] must be GREATER THAN safe_range[0]." in str(
예제 #3
def test_vibe_check_failures():
    table, metadata, ranks = get_test_data()

    # Accordingly, should fail
    with pytest.raises(OverflowError) as exception_info:
        vibe_check(ranks, table, safe_range=[1, 8])
    assert (
        'The input feature table contains entries lower than the "safe" lower '
        "limit for numbers of 1.") in str(exception_info.value)

    # Should also fail
    with pytest.raises(OverflowError) as exception_info:
        vibe_check(ranks, table, safe_range=[0, 7])
    assert ('The input feature table contains entries larger than the "safe" '
            "upper limit for numbers of 7.") in str(exception_info.value)

    # Test failure, with the default safe range, on a few small cases.
    lower_lim = -(2**53) - 1
    upper_lim = (2**53) - 1

    weird_small_values = [lower_lim - 1, lower_lim * 2, lower_lim * 3]
    for w in weird_small_values:
        ranks["Rank 0"]["F3"] = w

        with pytest.raises(OverflowError) as exception_info:
            vibe_check(ranks, table)
        assert (
            "The input feature rankings data contains entries lower than the "
            '"safe" lower limit for numbers of -9007199254740991.') in str(

    # Test failure, with the default safe range, on a few large cases.
    weird_large_values = [upper_lim + 1, upper_lim * 2, upper_lim * 3]
    for w in weird_large_values:
        ranks["Rank 0"]["F3"] = w

        with pytest.raises(OverflowError) as exception_info:
            vibe_check(ranks, table)
        assert (
            "The input feature rankings data contains entries larger than the "
            '"safe" upper limit for numbers of 9007199254740991.') in str(
예제 #4
def process_input(
    """Validates/processes the input files and parameter(s) to Qurro.

    In particular, this function

    1. Calls validate_df() and then check_column_names() on all of the
       input DataFrames passed (feature ranks, sample metadata, feature
       metadata if passed).

    2. Calls replace_nan() on the metadata DataFrame(s), so that all
       missing values are represented consistently with a None (which
       will be represented as a null in JSON/JavaScript).

    3. Converts the BIOM table to a SparseDataFrame by calling

    4. Runs vibe_check() on the feature ranks and BIOM table to ensure
       that numbers are within the range of safe IEEE 754 numbers for

    5. Matches up the table with the feature ranks and sample metadata by
       calling match_table_and_data().

    6. Calls filter_unextreme_features() using the provided
       extreme_feature_count. (If it's None, then nothing will be done.)

    7. Calls remove_empty_samples_and_features() to filter empty samples
       (and features). This is purposefully done *after*
       filter_unextreme_features() is called.

    8. Calls merge_feature_metadata() on the feature ranks and feature
       metadata. (If feature metadata is None, nothing will be done.)

    output_metadata: pd.DataFrame
         Sample metadata, but matched with the table and with empty samples

    output_ranks: pd.DataFrame
         Feature ranks, post-filtering and with feature metadata columns
         added in.

         The ranking columns' names in output_ranks.

    feature_metadata_cols: list
         The feature metadata columns' names in output_ranks.

    output_table: pd.SparseDataFrame
         The BIOM table, post matching with the feature ranks and sample
         metadata and with empty samples removed.

    logging.debug("Starting processing input.")

    validate_df(feature_ranks, "feature ranks", 2, 1)
    validate_df(sample_metadata, "sample metadata", 1, 1)
    if feature_metadata is not None:
        # It's cool if there aren't any features actually described in the
        # feature metadata (hence why we pass in 0 as the minimum # of rows in
        # the feature metadata DataFrame), but we still pass it to
        # validate_df() in order to ensure that:
        #   1) there's at least one feature metadata column (because
        #      otherwise the feature metadata is useless)
        #   2) column names are unique
        validate_df(feature_metadata, "feature metadata", 0, 1)

    check_column_names(sample_metadata, feature_ranks, feature_metadata)

    # Replace NaN values (which both _metadata_utils.read_metadata_file() and
    # qiime2.Metadata use to represent missing values, i.e. ""s) with None --
    # this is generally easier for us to handle in the JS side of things (since
    # it'll just be consistently converted to null by json.dumps()).
    sample_metadata = replace_nan(sample_metadata)
    if feature_metadata is not None:
        feature_metadata = replace_nan(feature_metadata)

    table = biom_table_to_sparse_df(biom_table)

    # Check that the solely-numeric data only contains "safe" numbers
    vibe_check(feature_ranks, table)

    # Match up the table with the feature ranks and sample metadata.
    m_table, m_sample_metadata = match_table_and_data(table, feature_ranks,

    # Note that although we always call filter_unextreme_features(), filtering
    # isn't necessarily always done (whether or not depends on the value of
    # extreme_feature_count and the contents of the table/ranks).
    filtered_table, filtered_ranks = filter_unextreme_features(
        m_table, feature_ranks, extreme_feature_count)

    # Filter now-empty samples (and empty features) from the BIOM table.
    output_table, output_metadata, u_ranks = remove_empty_samples_and_features(
        filtered_table, m_sample_metadata, filtered_ranks)

    # Save a list of ranking IDs (before we add in feature metadata)
    # TODO: just have merge_feature_metadata() give us this?
    ranking_ids = u_ranks.columns

    output_ranks, feature_metadata_cols = merge_feature_metadata(
        u_ranks, feature_metadata)

    logging.debug("Finished input processing.")
    return (