Example #1
0
def build_tree(data, centers, distances, valid_centers, valid_data):
    node = Node()
    if valid_centers.sum() == 1:
        node.value = np.argmax(valid_centers)
        return node
    dim, cut, cost = best_cut(data, valid_data, centers, 
                              valid_centers, distances)
    node.feature = dim
    node.value = cut
    
    n = data.shape[0]
    left_valid_data = np.zeros(n, dtype=bool)
    right_valid_data = np.zeros(n, dtype=bool)
    for i in range(n):
        if valid_data[i]:
            if data[i,dim] <= cut:
                left_valid_data[i] = True
            else:
                right_valid_data[i] = True

    k = centers.shape[0]
    left_valid_centers = np.zeros(k, dtype=bool)
    right_valid_centers = np.zeros(k, dtype=bool)
    for i in range(k):
        if valid_centers[i]:
            if centers[i, dim] <= cut:
                left_valid_centers[i] = True
            else:
                right_valid_centers[i] = True
    
    node.left = build_tree(data, centers, distances, 
                           left_valid_centers, left_valid_data)
    node.right = build_tree(data, centers, distances, 
                            right_valid_centers, right_valid_data)
    
    return node