Ejemplo n.º 1
0
def test_modz_multiple_columns():
    replicate_columns = ["Metadata_g", "Metadata_h"]
    data_replicate_multi_df = data_replicate_df.assign(
        Metadata_h=["c", "c", "c", "d", "d", "d"])
    # The expected result is to completely remove influence of anticorrelated sample
    consensus_df = modz(data_replicate_multi_df,
                        replicate_columns,
                        min_weight=0,
                        precision=precision)
    expected_result = pd.DataFrame({
        "Metadata_g": ["a", "b"],
        "Metadata_h": ["c", "d"],
        "Cells_x": [1.0, 4.0],
        "Cytoplasm_y": [5.0, 2.0],
        "Nuclei_z": [2.0, -0.5],
    })
    pd.testing.assert_frame_equal(expected_result.reset_index(),
                                  consensus_df.reset_index())

    # With the min_weight = 1, then modz is mean
    consensus_df = modz(data_replicate_multi_df,
                        replicate_columns,
                        min_weight=1,
                        precision=precision)
    expected_result = data_replicate_multi_df.groupby(
        replicate_columns).mean().round(4)

    pd.testing.assert_frame_equal(expected_result.reset_index(),
                                  consensus_df,
                                  check_less_precise=True)
Ejemplo n.º 2
0
def test_modz():
    # The expected result is to completely remove influence of anticorrelated sample
    consensus_df = modz(data_replicate_df,
                        replicate_columns,
                        min_weight=0,
                        precision=precision)
    expected_result = pd.DataFrame(
        {
            "Cells_x": [1.0, 4.0],
            "Cytoplasm_y": [5.0, 2.0],
            "Nuclei_z": [2.0, -0.5]
        },
        index=["a", "b"],
    )
    expected_result.index.name = replicate_columns
    pd.testing.assert_frame_equal(expected_result.reset_index(), consensus_df)

    # With the min_weight = 1, then modz is mean
    consensus_df = modz(data_replicate_df,
                        replicate_columns,
                        min_weight=1,
                        precision=precision)
    expected_result = data_replicate_df.groupby(
        replicate_columns).mean().round(4)
    expected_result.index.name = replicate_columns
    pd.testing.assert_frame_equal(expected_result.reset_index(),
                                  consensus_df,
                                  check_less_precise=True)
Ejemplo n.º 3
0
def test_modz_extraneous_column():
    # The expected result is to completely remove influence of anticorrelated sample
    data_replicate_new_col_df = data_replicate_df.assign(Metadata_h="c")
    consensus_df = modz(data_replicate_new_col_df,
                        replicate_columns,
                        min_weight=0,
                        precision=precision)
    expected_result = pd.DataFrame(
        {
            "Cells_x": [1.0, 4.0],
            "Cytoplasm_y": [5.0, 2.0],
            "Nuclei_z": [2.0, -0.5]
        },
        index=["a", "b"],
    )
    expected_result.index.name = replicate_columns
    pd.testing.assert_frame_equal(expected_result.reset_index(), consensus_df)
Ejemplo n.º 4
0
def test_modz_unbalanced_sample_numbers():
    # The expected result is to not freak out when only one sample exists for a piece of metadata
    data_replicate_multi_df = data_replicate_df.assign(
        Metadata_h=["c", "c", "c", "c", "c", "d"])

    consensus_df = modz(
        data_replicate_multi_df,
        replicate_columns="Metadata_h",
        min_weight=0,
        precision=precision,
    )

    expected_result = pd.DataFrame(
        {
            "Metadata_h": ["c", "d"],
            "Cells_x": [0.9999, 5.0],
            "Cytoplasm_y": [5.9994, 1.0],
            "Nuclei_z": [2.9997, 1],
        }, )
    pd.testing.assert_frame_equal(expected_result, consensus_df)
Ejemplo n.º 5
0
def test_modz_multiple_columns_feature_specify():
    # Include replicate information
    data_replicate_feature_df = pd.concat([
        pd.DataFrame({
            "g": "a",
            "x": [1, 1, -1],
            "y": [5, 5, -5],
            "z": [2, 2, -2]
        }),
        pd.DataFrame({
            "g": "b",
            "x": [1, 3, 5],
            "y": [8, 3, 1],
            "z": [5, -2, 1]
        }),
    ]).reset_index(drop=True)
    data_replicate_feature_df.index = [
        "sample_{}".format(x) for x in data_replicate_feature_df.index
    ]

    # The expected result is to completely remove influence of anticorrelated sample
    consensus_df = modz(
        data_replicate_feature_df,
        replicate_columns="g",
        features=["x", "y", "z"],
        min_weight=0,
        precision=precision,
    )

    expected_result = pd.DataFrame(
        {
            "x": [1.0, 4.0],
            "y": [5.0, 2.0],
            "z": [2.0, -0.5]
        }, index=["a", "b"])
    expected_result.index.name = "g"
    pd.testing.assert_frame_equal(expected_result.reset_index(), consensus_df)
Ejemplo n.º 6
0
def consensus(
    profiles,
    replicate_columns=["Metadata_Plate", "Metadata_Well"],
    operation="median",
    features="infer",
    output_file="none",
    compression_options=None,
    float_format=None,
    modz_args={"method": "spearman"},
):
    """Form level 5 consensus profile data.

    :param profiles: A file or pandas DataFrame of profile data
    :type profiles: str
    :param replicate_columns: Metadata columns indicating which replicates to collapse, defaults to ["Metadata_Plate", "Metadata_Well"]
    :type replicate_columns: list
    :param operation: The method used to form consensus profiles, defaults to "median"
    :type operation: str
    :param features: The features to collapse, defaults to "infer"
    :type features: str, list
    :param output_file: If specified, the location to write the file, defaults to "none"
    :type output_file: str
    :param modz_args: Additional custom arguments passed as kwargs if operation="modz". See pycytominer.cyto_utils.modz for more details.
    :type modz_args: dict
    :param compression_options: the method to compress output data, defaults to None. See pycytominer.cyto_utils.output.py for options
    :type compression_options: str
    :param float_format: decimal precision to use in writing output file, defaults to None. For example, use "%.3g" for 3 decimal precision.

    :Example:

    import pandas as pd
    from pycytominer import consensus

    data_df = pd.concat(
        [
            pd.DataFrame(
                {
                    "Metadata_Plate": "X",
                    "Metadata_Well": "a",
                    "Cells_x": [0.1, 0.3, 0.8],
                    "Nuclei_y": [0.5, 0.3, 0.1],
                }
            ),
            pd.DataFrame(
                {
                    "Metadata_Plate": "X",
                    "Metadata_Well": "b",
                    "Cells_x": [0.4, 0.2, -0.5],
                    "Nuclei_y": [-0.8, 1.2, -0.5],
                }
            ),
        ]
    ).reset_index(drop=True)

    consensus_df = consensus(
        profiles=data_df,
        replicate_columns=["Metadata_Plate", "Metadata_Well"],
        operation="median",
        features="infer",
        output_file="none",
    )
    """
    # Confirm that the operation is supported
    check_consensus_operation(operation)

    # Load Data
    profiles = load_profiles(profiles)

    if operation == "modz":
        consensus_df = modz(population_df=profiles,
                            replicate_columns=replicate_columns,
                            features=features,
                            **modz_args)
    else:
        consensus_df = aggregate(
            population_df=profiles,
            strata=replicate_columns,
            features=features,
            operation=operation,
            subset_data_df="none",
        )

    if output_file != "none":
        output(
            df=consensus_df,
            output_filename=output_file,
            compression_options=compression_options,
            float_format=float_format,
        )
    else:
        return consensus_df