def build_tree(data, centers, distances, valid_centers, valid_data): node = Node() if valid_centers.sum() == 1: node.value = np.argmax(valid_centers) return node dim, cut, cost = best_cut(data, valid_data, centers, valid_centers, distances) node.feature = dim node.value = cut n = data.shape[0] left_valid_data = np.zeros(n, dtype=bool) right_valid_data = np.zeros(n, dtype=bool) for i in range(n): if valid_data[i]: if data[i,dim] <= cut: left_valid_data[i] = True else: right_valid_data[i] = True k = centers.shape[0] left_valid_centers = np.zeros(k, dtype=bool) right_valid_centers = np.zeros(k, dtype=bool) for i in range(k): if valid_centers[i]: if centers[i, dim] <= cut: left_valid_centers[i] = True else: right_valid_centers[i] = True node.left = build_tree(data, centers, distances, left_valid_centers, left_valid_data) node.right = build_tree(data, centers, distances, right_valid_centers, right_valid_data) return node