コード例 #1
0
def assert_melt(df: pd.DataFrame,
                eval_metric: str = "replicate_reproducibility") -> None:
    r"""Helper function to ensure that we properly melted the pairwise correlation
    matrix

    Downstream functions depend on how we process the pairwise correlation matrix. The
    processing is different depending on the evaluation metric.

    Parameters
    ----------
    df : pandas.DataFrame
        A melted pairwise correlation matrix
    eval_metric : str
        The user input eval metric

    Returns
    -------
    None
        Assertion will fail if we incorrectly melted the matrix
    """
    check_eval_metric(eval_metric=eval_metric)

    pair_ids = set_pair_ids()
    df = df.loc[:, [pair_ids[x]["index"] for x in pair_ids]]
    index_sums = df.sum().tolist()

    assert_error = "Stop! The eval_metric provided in 'metric_melt()' is incorrect!"
    assert_error = "{err} This is a fatal error providing incorrect results".format(
        err=assert_error)
    if eval_metric == "replicate_reproducibility":
        assert index_sums[0] != index_sums[1], assert_error
    elif eval_metric == "precision_recall":
        assert index_sums[0] == index_sums[1], assert_error
    elif eval_metric == "grit":
        assert index_sums[0] == index_sums[1], assert_error
コード例 #2
0
def check_replicate_groups(eval_metric: str,
                           replicate_groups: Union[List[str], dict]) -> None:
    r"""Helper function checking that the user correctly constructed the input replicate
    groups argument

    The package will not calculate evaluation metrics with incorrectly constructed
    replicate_groups. See :py:func:`cytominer_eval.evaluate.evaluate`.

    Parameters
    ----------
    eval_metric : str
        Which evaluation metric to calculate. See
        :py:func:`cytominer_eval.transform.util.get_available_eval_metrics`.
    replicate_groups : {list, dict}
        The tentative data structure listing replicate groups

    Returns
    -------
    None
        Assertion will fail for improperly constructed replicate_groups
    """
    check_eval_metric(eval_metric=eval_metric)

    if eval_metric == "grit":
        assert isinstance(replicate_groups,
                          dict), "For grit, replicate_groups must be a dict"

        replicate_key_ids = ["profile_col", "replicate_group_col"]

        assert all(
            [x in replicate_groups for x in replicate_key_ids]
        ), "replicate_groups for grit not formed properly. Must contain {id}".format(
            id=replicate_key_ids)
    elif eval_metric == "mp_value":
        assert isinstance(
            replicate_groups,
            str), "For mp_value, replicate_groups must be a single string."
    else:
        assert isinstance(
            replicate_groups, list
        ), "Replicate groups must be a list for the {op} operation".format(
            op=eval_metric)
コード例 #3
0
def test_check_eval_metric():
    with pytest.raises(AssertionError) as ae:
        output = check_eval_metric(eval_metric="MISSING")
    assert "MISSING not supported. Select one of" in str(ae.value)
コード例 #4
0
def process_melt(
    df: pd.DataFrame,
    meta_df: pd.DataFrame,
    eval_metric: str = "replicate_reproducibility",
) -> pd.DataFrame:
    """Helper function to annotate and process an input similarity matrix

    Parameters
    ----------
    df : pandas.DataFrame
        A similarity matrix output from
        :py:func:`cytominer_eval.transform.transform.get_pairwise_metric`
    meta_df : pandas.DataFrame
        A wide matrix of metadata information where the index aligns to the similarity
        matrix index
    eval_metric : str, optional
        Which metric to ultimately calculate. Determines whether or not to keep the full
        similarity matrix or only one diagonal. Defaults to "replicate_reproducibility".

    Returns
    -------
    pandas.DataFrame
        A pairwise similarity matrix
    """
    # Confirm that the user formed the input arguments properly
    assert df.shape[0] == df.shape[1], "Matrix must be symmetrical"
    check_eval_metric(eval_metric)

    # Get identifiers for pairing metadata
    pair_ids = set_pair_ids()

    # Subset the pairwise similarity metric depending on the eval metric given:
    #   "replicate_reproducibility" - requires only the upper triangle of a symmetric matrix
    #   "precision_recall" - requires the full symmetric matrix (no diagonal)
    # Remove pairwise matrix diagonal and redundant pairwise comparisons
    if eval_metric == "replicate_reproducibility":
        upper_tri = get_upper_matrix(df)
        df = df.where(upper_tri)
    else:
        np.fill_diagonal(df.values, np.nan)

    # Convert pairwise matrix to melted (long) version based on index value
    metric_unlabeled_df = (
        pd.melt(
            df.reset_index(),
            id_vars="index",
            value_vars=df.columns,
            var_name=pair_ids["pair_b"]["index"],
            value_name="similarity_metric",
        )
        .dropna()
        .reset_index(drop=True)
        .rename({"index": pair_ids["pair_a"]["index"]}, axis="columns")
    )

    # Merge metadata on index for both comparison pairs
    output_df = meta_df.merge(
        meta_df.merge(
            metric_unlabeled_df,
            left_index=True,
            right_on=pair_ids["pair_b"]["index"],
        ),
        left_index=True,
        right_on=pair_ids["pair_a"]["index"],
        suffixes=[pair_ids["pair_a"]["suffix"], pair_ids["pair_b"]["suffix"]],
    ).reset_index(drop=True)

    return output_df