def test_feature_infer_compartments(): features = infer_cp_features(population_df=data_df, compartments=["CElls"]) expected = ["Cells_Something_Something"] features2 = infer_cp_features(population_df=data_df, compartments=["nothing"]) expected2 = ["Nothing_somethingwrong"] assert features == expected assert features2 == expected2
def get_na_columns(population_df, features="infer", samples="all", cutoff=0.05): """ Get features that have more NA values than cutoff defined Arguments: population_df - pandas DataFrame storing profiles features - list of features present in the population dataframe [default: "infer"] if "infer", then assume cell painting features are those that do not start with "Cells", "Nuclei", or "Cytoplasm" samples - if provided, a list of samples to provide operation on [default: "all"] - if "all", use all samples to calculate cutoff - float to exclude features that have a higher proportion of missingness Output: A list of the features to exclude """ if samples != "all": population_df = population_df.loc[samples, :] if features == "infer": features = infer_cp_features(population_df) else: population_df = population_df.loc[:, features] num_rows = population_df.shape[0] na_prop_df = population_df.isna().sum() / num_rows na_prop_df = na_prop_df[na_prop_df > cutoff] return list(set(na_prop_df.index.tolist()))
def test_feature_infer(): features = infer_cp_features(population_df=data_df) expected = [ "Cells_Something_Something", "Cytoplasm_Something_Something", "Nuclei_Correlation_Manders_AGP_DNA", "Nuclei_Correlation_RWC_ER_RNA", ] assert features == expected
def test_metadata_feature_infer(): features = infer_cp_features(population_df=data_df, metadata=True) expected = ["Metadata_Something_Something"] assert features == expected
def test_feature_infer_nocp(): with pytest.raises(AssertionError) as nocp: features = infer_cp_features(population_df=non_cp_data_df) assert "No CP features found." in str(nocp.value)