コード例 #1
0
def test_problem_slicing():
    """Validates that we can slice problems along the sample axis"""
    _, _, df = mock_problem()
    prob = Problem(df, ['gene1', 'gene2'], 'disease', 'yes')

    male_prob = prob[prob.dataframe['gender'] == 'male']
    assert_metadata_eq(prob, male_prob)
    nose.tools.eq_(male_prob.n_samples, 2)
    nose.tools.eq_(male_prob.n_features, 2)
    np.testing.assert_array_equal(male_prob.y, [1, 0])
    np.testing.assert_array_equal(male_prob.X, prob.X[:2])

    custom_prob = prob.iloc([0, 2, 3])
    assert_metadata_eq(prob, custom_prob)
    nose.tools.eq_(custom_prob.n_samples, 3)
    nose.tools.eq_(custom_prob.n_features, 2)
    np.testing.assert_array_equal(custom_prob.y, [1, 0, 1])
    np.testing.assert_array_equal(custom_prob.X, prob.X[[0, 2, 3]])
コード例 #2
0
def test_y_for_multiclass_slicing():
    """ Testing y method for multiclass"""
    df = pd.DataFrame(columns=['gene', 'number'],
                      data=[['gene1', 'one'], ['gene2', 'two'],
                            ['gene3', 'three'], ['gene4', 'four'],
                            ['gene5', 'five']])
    prob = Problem(df, ['gene'], 'number', None)
    y = prob.y
    nose.tools.assert_list_equal(list(y), [2, 4, 3, 1, 0])

    subset_prob = prob[prob.dataframe['gene'] != 'gene3']
    y_subset = subset_prob.y
    nose.tools.assert_list_equal(list(y_subset), [2, 4, 1, 0])

    subset_df = df[df['gene'] != 'gene3']
    prob_subset_df = Problem(subset_df, ['gene'], 'number', None)
    y_subset_df = prob_subset_df.y
    nose.tools.assert_list_equal(list(y_subset_df), [2, 3, 1, 0])

    prob_subset_df_with_list = Problem(subset_df, ['gene'], 'number', None,
                                       prob.label_list)
    y_subset_df_with_list = prob_subset_df_with_list.y
    nose.tools.assert_list_equal(list(y_subset_df_with_list), list(y_subset))

    custom_prob = prob.iloc([0, 2, 3])
    y_custom = custom_prob.y
    nose.tools.assert_list_equal(list(y_custom), [2, 3, 1])

    custom_df = df.iloc[[0, 2, 3]]
    prob_custom_df_with_list = Problem(custom_df, ['gene'], 'number', None)
    y_custom_df_with_list = prob_custom_df_with_list.y
    nose.tools.assert_list_equal(list(y_custom_df_with_list), [1, 2, 0], None)

    prob_custom_df = Problem(custom_df, ['gene'], 'number', None,
                             prob.label_list)
    y_custom_df = prob_custom_df.y
    nose.tools.assert_list_equal(list(y_custom_df), list(y_custom))