Exemple #1
0
def test_set_get_bitset(values_to_insert, expected_bitset):
    n_32bits_ints = 3
    bitset = np.zeros(n_32bits_ints, dtype=np.uint32)
    for value in values_to_insert:
        set_bitset_memoryview(bitset, value)
    assert_allclose(expected_bitset, bitset)
    for value in range(32 * n_32bits_ints):
        if value in values_to_insert:
            assert in_bitset_memoryview(bitset, value)
        else:
            assert not in_bitset_memoryview(bitset, value)
Exemple #2
0
def test_raw_bitset_from_binned_bitset(raw_categories, binned_cat_to_insert,
                                       expected_raw_bitset):
    binned_bitset = np.zeros(2, dtype=np.uint32)
    raw_bitset = np.zeros(2, dtype=np.uint32)
    raw_categories = np.asarray(raw_categories, dtype=X_DTYPE)

    for val in binned_cat_to_insert:
        set_bitset_memoryview(binned_bitset, val)

    set_raw_bitset_from_binned_bitset(raw_bitset, binned_bitset,
                                      raw_categories)

    assert_allclose(expected_raw_bitset, raw_bitset)
    for binned_cat_val, raw_cat_val in enumerate(raw_categories):
        if binned_cat_val in binned_cat_to_insert:
            assert in_bitset_memoryview(raw_bitset, raw_cat_val)
        else:
            assert not in_bitset_memoryview(raw_bitset, raw_cat_val)
Exemple #3
0
def test_categorical_predictor(bins_go_left, expected_predictions):
    # Test predictor outputs are correct with categorical features

    X_binned = np.array([[0, 1, 2, 3, 4, 5]], dtype=X_BINNED_DTYPE).T
    categories = np.array([2, 5, 6, 8, 10, 15], dtype=X_DTYPE)

    bins_go_left = np.array(bins_go_left, dtype=X_BINNED_DTYPE)

    # We just construct a simple tree with 1 root and 2 children
    # parent node
    nodes = np.zeros(3, dtype=PREDICTOR_RECORD_DTYPE)
    nodes[0]['left'] = 1
    nodes[0]['right'] = 2
    nodes[0]['feature_idx'] = 0
    nodes[0]['is_categorical'] = True
    nodes[0]['missing_go_to_left'] = True

    # left child
    nodes[1]['is_leaf'] = True
    nodes[1]['value'] = 1

    # right child
    nodes[2]['is_leaf'] = True
    nodes[2]['value'] = 0

    binned_cat_bitsets = np.zeros((1, 8), dtype=X_BITSET_INNER_DTYPE)
    raw_categorical_bitsets = np.zeros((1, 8), dtype=X_BITSET_INNER_DTYPE)
    for go_left in bins_go_left:
        set_bitset_memoryview(binned_cat_bitsets[0], go_left)

    set_raw_bitset_from_binned_bitset(raw_categorical_bitsets[0],
                                      binned_cat_bitsets[0], categories)

    predictor = TreePredictor(nodes, binned_cat_bitsets,
                              raw_categorical_bitsets)

    # Check binned data gives correct predictions
    prediction_binned = predictor.predict_binned(X_binned,
                                                 missing_values_bin_idx=6)
    assert_allclose(prediction_binned, expected_predictions)

    # manually construct bitset
    known_cat_bitsets = np.zeros((1, 8), dtype=np.uint32)
    known_cat_bitsets[0, 0] = np.sum(2**categories, dtype=np.uint32)
    f_idx_map = np.array([0], dtype=np.uint32)

    # Check with un-binned data
    predictions = predictor.predict(categories.reshape(-1, 1),
                                    known_cat_bitsets, f_idx_map)
    assert_allclose(predictions, expected_predictions)

    # Check missing goes left because missing_values_bin_idx=6
    X_binned_missing = np.array([[6]], dtype=X_BINNED_DTYPE).T
    predictions = predictor.predict_binned(X_binned_missing,
                                           missing_values_bin_idx=6)
    assert_allclose(predictions, [1])

    # missing and unknown go left
    predictions = predictor.predict(
        np.array([[np.nan, 17]], dtype=X_DTYPE).T, known_cat_bitsets,
        f_idx_map)
    assert_allclose(predictions, [1, 1])