def test_problem_slicing(): """Validates that we can slice problems along the sample axis""" _, _, df = mock_problem() prob = Problem(df, ['gene1', 'gene2'], 'disease', 'yes') male_prob = prob[prob.dataframe['gender'] == 'male'] assert_metadata_eq(prob, male_prob) nose.tools.eq_(male_prob.n_samples, 2) nose.tools.eq_(male_prob.n_features, 2) np.testing.assert_array_equal(male_prob.y, [1, 0]) np.testing.assert_array_equal(male_prob.X, prob.X[:2]) custom_prob = prob.iloc([0, 2, 3]) assert_metadata_eq(prob, custom_prob) nose.tools.eq_(custom_prob.n_samples, 3) nose.tools.eq_(custom_prob.n_features, 2) np.testing.assert_array_equal(custom_prob.y, [1, 0, 1]) np.testing.assert_array_equal(custom_prob.X, prob.X[[0, 2, 3]])
def test_y_for_multiclass_slicing(): """ Testing y method for multiclass""" df = pd.DataFrame(columns=['gene', 'number'], data=[['gene1', 'one'], ['gene2', 'two'], ['gene3', 'three'], ['gene4', 'four'], ['gene5', 'five']]) prob = Problem(df, ['gene'], 'number', None) y = prob.y nose.tools.assert_list_equal(list(y), [2, 4, 3, 1, 0]) subset_prob = prob[prob.dataframe['gene'] != 'gene3'] y_subset = subset_prob.y nose.tools.assert_list_equal(list(y_subset), [2, 4, 1, 0]) subset_df = df[df['gene'] != 'gene3'] prob_subset_df = Problem(subset_df, ['gene'], 'number', None) y_subset_df = prob_subset_df.y nose.tools.assert_list_equal(list(y_subset_df), [2, 3, 1, 0]) prob_subset_df_with_list = Problem(subset_df, ['gene'], 'number', None, prob.label_list) y_subset_df_with_list = prob_subset_df_with_list.y nose.tools.assert_list_equal(list(y_subset_df_with_list), list(y_subset)) custom_prob = prob.iloc([0, 2, 3]) y_custom = custom_prob.y nose.tools.assert_list_equal(list(y_custom), [2, 3, 1]) custom_df = df.iloc[[0, 2, 3]] prob_custom_df_with_list = Problem(custom_df, ['gene'], 'number', None) y_custom_df_with_list = prob_custom_df_with_list.y nose.tools.assert_list_equal(list(y_custom_df_with_list), [1, 2, 0], None) prob_custom_df = Problem(custom_df, ['gene'], 'number', None, prob.label_list) y_custom_df = prob_custom_df.y nose.tools.assert_list_equal(list(y_custom_df), list(y_custom))