Example #1
0
def test__predict_by_tree():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree(X, node)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_tree('abc', node)

    mean, std = package_target._predict_by_tree(np.array([4.0, 2.0, 3.0, 1.0]),
                                                node)
    print(mean)
    print(std)

    assert mean == 0.179224925920024
    assert std == 0.31748922709120864
Example #2
0
def get_random_forest(
    X: np.ndarray,
    Y: np.ndarray,
    num_trees: int,
    depth_max: int,
    size_min_leaf: int,
    num_features: int,
) -> list:
    """
    It returns a random forest.

    :param X: inputs. Shape: (N, d).
    :type X: np.ndarray
    :param Y: outputs. Shape: (N, 1).
    :type Y: str.
    :param num_trees: the number of trees.
    :type num_trees: int.
    :param depth_max: maximum depth of tree.
    :type depth_max: int.
    :param size_min_leaf: minimum size of leaf.
    :type size_min_leaf: int.
    :param num_features: the number of split features.
    :type num_features: int.

    :returns: list of trees
    :rtype: list

    :raises: AssertionError

    """

    assert isinstance(X, np.ndarray)
    assert isinstance(Y, np.ndarray)
    assert isinstance(num_trees, int)
    assert isinstance(depth_max, int)
    assert isinstance(size_min_leaf, int)
    assert isinstance(num_features, int)

    assert len(X.shape) == 2
    assert len(Y.shape) == 2
    assert X.shape[0] == Y.shape[0]
    assert Y.shape[1] == 1

    ratio_sample = 1.0
    replace_samples = True
    split_random_location = False

    list_trees = []

    for _ in range(0, num_trees):
        X_, Y_ = trees_common.subsample(X, Y, ratio_sample, replace_samples)

        root = trees_common._split(X_, Y_, num_features, split_random_location)
        trees_common.split(root, depth_max, size_min_leaf, num_features,
                           split_random_location, 1)

        list_trees.append(root)

    return list_trees
Example #3
0
def test__predict_by_trees():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node_1 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_1, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_2 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_2, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_3 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_3, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    list_trees = [node_1, node_2, node_3]

    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]),
                                         node_1)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees(X, list_trees)
    with pytest.raises(AssertionError) as error:
        package_target._predict_by_trees('abc', list_trees)

    mean, std = package_target._predict_by_trees(
        np.array([4.0, 2.0, 3.0, 1.0]), list_trees)
    print(mean)
    print(std)

    assert mean == 0.12544669602080652
    assert std == 0.3333040901154691
Example #4
0
def test_split():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = False

    node = package_target._split(X, Y, num_features, split_random_location)

    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             split_random_location, 'abc')
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             split_random_location, 1.0)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, num_features,
                             'abc', 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, size_min_leaf, 'abc',
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, depth_max, 'abc', num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(node, 'abc', size_min_leaf, num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split(X, depth_max, size_min_leaf, num_features,
                             split_random_location, 1)
    with pytest.raises(AssertionError) as error:
        package_target.split('abc', depth_max, size_min_leaf, num_features,
                             split_random_location, 1)

    package_target.split(node, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)
    assert isinstance(node, dict)
def get_generic_trees(
    X: np.ndarray,
    Y: np.ndarray,
    num_trees: int,
    depth_max: int,
    size_min_leaf: int,
    ratio_sampling: float,
    replace_samples: bool,
    num_features: int,
    split_random_location: bool,
) -> list:
    """
    It returns a list of generic trees.

    :param X: inputs. Shape: (N, d).
    :type X: np.ndarray
    :param Y: outputs. Shape: (N, 1).
    :type Y: str.
    :param num_trees: the number of trees.
    :type num_trees: int.
    :param depth_max: maximum depth of tree.
    :type depth_max: int.
    :param size_min_leaf: minimum size of leaf.
    :type size_min_leaf: int.
    :param ratio_sampling: ratio of dataset subsampling.
    :type ratio_sampling: float
    :param replace_samples: flag for replacement.
    :type replace_samples: bool.
    :param num_features: the number of split features.
    :type num_features: int.
    :param split_random_location: flag for random split location.
    :type split_random_location: bool.

    :returns: list of trees
    :rtype: list

    :raises: AssertionError

    """

    assert isinstance(X, np.ndarray)
    assert isinstance(Y, np.ndarray)
    assert isinstance(num_trees, int)
    assert isinstance(depth_max, int)
    assert isinstance(size_min_leaf, int)
    assert isinstance(ratio_sampling, float)
    assert isinstance(replace_samples, bool)
    assert isinstance(num_features, int)
    assert isinstance(split_random_location, bool)

    assert len(X.shape) == 2
    assert len(Y.shape) == 2
    assert X.shape[0] == Y.shape[0]
    assert Y.shape[1] == 1
    if replace_samples:
        assert ratio_sampling > 0.0
    else:
        assert 0.0 < ratio_sampling <= 1.0

    list_trees = []

    for _ in range(0, num_trees):
        X_, Y_ = trees_common.subsample(X, Y, ratio_sampling, replace_samples)

        root = trees_common._split(X_, Y_, num_features, split_random_location)
        trees_common.split(root, depth_max, size_min_leaf, num_features,
                           split_random_location, 1)

        list_trees.append(root)

    return list_trees
Example #6
0
def test_predict_by_trees():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    depth_max = 4
    size_min_leaf = 2
    num_features = 2
    split_random_location = True

    node_1 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_1, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_2 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_2, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    node_3 = package_target._split(X, Y, num_features, split_random_location)
    package_target.split(node_3, depth_max, size_min_leaf, num_features,
                         split_random_location, 1)

    list_trees = [node_1, node_2, node_3]

    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), node_1)
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]), 'abc')
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees(np.array([4.0, 2.0, 3.0, 1.0]),
                                        list_trees)
    with pytest.raises(AssertionError) as error:
        package_target.predict_by_trees('abc', list_trees)

    means, stds = package_target.predict_by_trees(X, list_trees)
    print(means)
    print(stds)

    means_truth = np.array([
        [0.33710618],
        [0.1254467],
        [0.68947573],
        [0.9866563],
        [0.16257799],
        [0.16258346],
        [0.85148454],
        [0.55084716],
        [0.30721653],
        [0.30721653],
    ])

    stds_truth = np.array([
        [0.29842457],
        [0.33330409],
        [0.44398864],
        [0.72536523],
        [0.74232577],
        [0.74232284],
        [0.83388663],
        [0.5615399],
        [0.64331582],
        [0.64331582],
    ])

    assert isinstance(means, np.ndarray)
    assert isinstance(stds, np.ndarray)
    assert len(means.shape) == 2
    assert len(stds.shape) == 2
    assert means.shape[0] == stds.shape[0] == X.shape[0]
    assert means.shape[1] == stds.shape[1] == 1

    assert np.all(np.abs(means - means_truth) < TEST_EPSILON)
    assert np.all(np.abs(stds - stds_truth) < TEST_EPSILON)

    X = np.random.randn(1000, 4)

    means, stds = package_target.predict_by_trees(X, list_trees)

    assert isinstance(means, np.ndarray)
    assert isinstance(stds, np.ndarray)
    assert len(means.shape) == 2
    assert len(stds.shape) == 2
    assert means.shape[0] == stds.shape[0] == X.shape[0]
    assert means.shape[1] == stds.shape[1] == 1
Example #7
0
def test__split():
    np.random.seed(42)

    X = np.reshape(np.arange(0, 40), (10, 4))
    Y = np.random.randn(10, 1)
    num_features = 2
    split_random_location = False

    with pytest.raises(AssertionError) as error:
        package_target._split(X, Y, num_features, 'abc')
    with pytest.raises(AssertionError) as error:
        package_target._split(X, Y, 'abc', split_random_location)
    with pytest.raises(AssertionError) as error:
        package_target._split(X, Y, 2.0, split_random_location)
    with pytest.raises(AssertionError) as error:
        package_target._split(X, 'abc', num_features, split_random_location)
    with pytest.raises(AssertionError) as error:
        package_target._split('abc', Y, num_features, split_random_location)

    dict_split = package_target._split(X, Y, num_features,
                                       split_random_location)
    print(dict_split)
    print(dict_split['index'])
    print(dict_split['value'])
    print(dict_split['left_right'])

    assert isinstance(dict_split, dict)
    assert dict_split['index'] == 1
    assert dict_split['value'] == 35.0

    assert np.all(dict_split['left_right'][0][0][0] == np.array([0, 1, 2, 3]))
    assert np.abs(dict_split['left_right'][0][0][1] -
                  np.array([0.49671415])) < TEST_EPSILON

    assert np.all(dict_split['left_right'][0][1][0] == np.array([4, 5, 6, 7]))
    assert np.abs(dict_split['left_right'][0][1][1] -
                  np.array([-0.1382643])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][2][0] == np.array([8, 9, 10, 11]))
    assert np.abs(dict_split['left_right'][0][2][1] -
                  np.array([0.64768854])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][3][0] == np.array([12, 13, 14, 15]))
    assert np.abs(dict_split['left_right'][0][3][1] -
                  np.array([1.52302986])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][4][0] == np.array([16, 17, 18, 19]))
    assert np.abs(dict_split['left_right'][0][4][1] -
                  np.array([-0.23415337])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][5][0] == np.array([20, 21, 22, 23]))
    assert np.abs(dict_split['left_right'][0][5][1] -
                  np.array([-0.23413696])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][6][0] == np.array([24, 25, 26, 27]))
    assert np.abs(dict_split['left_right'][0][6][1] -
                  np.array([1.57921282])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][7][0] == np.array([28, 29, 30, 31]))
    assert np.abs(dict_split['left_right'][0][7][1] -
                  np.array([0.76743473])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][0][8][0] == np.array([32, 33, 34, 35]))
    assert np.abs(dict_split['left_right'][0][8][1] -
                  np.array([-0.46947439])) < TEST_EPSILON

    assert np.all(
        dict_split['left_right'][1][0][0] == np.array([36, 37, 38, 39]))
    assert np.abs(dict_split['left_right'][1][0][1] -
                  np.array([0.54256004])) < TEST_EPSILON

    dict_split = package_target._split(X, Y, num_features, True)
    print(dict_split)
    print(dict_split['index'])
    print(dict_split['value'])
    print(dict_split['left_right'])

    assert isinstance(dict_split, dict)
    assert dict_split['index'] == 0
    assert dict_split['value'] == 0.2543869879098266

    X = np.ones(X.shape)

    dict_split = package_target._split(X, Y, num_features, True)
    print(dict_split)
    print(dict_split['index'])
    print(dict_split['value'])
    print(dict_split['left_right'])

    assert isinstance(dict_split, dict)
    assert dict_split['index'] == 1
    assert dict_split['value'] == 1.0