コード例 #1
0
def forecasting_models_example():
    """
    Build 3 models and evaluate the performance of model trees,
    regression trees and standard linear regression.
    """
    train_file = 'data/bikeSpeedVsIq_train.txt'
    test_file = 'data/bikeSpeedVsIq_test.txt'
    training_matrix = np.mat(utils.load_tsv_into_array(train_file))
    test_matrix = np.mat(utils.load_tsv_into_array(test_file))

    # training tree
    tree = regression_trees.create_tree(training_matrix, ops=(1, 20))
    y_hat = regression_trees.create_forecast(tree, test_matrix[:, 0])
    accuracy = np.corrcoef(y_hat, test_matrix[:, 1], rowvar=0)[0, 1]
    logging.info("training accuracy = {0}".format(accuracy))

    # model tree
    tree = regression_trees.create_tree(training_matrix,
                                        regression_trees.model_leaf,
                                        regression_trees.model_error, (1, 20))
    y_hat = regression_trees.create_forecast(
        tree, test_matrix[:, 0], regression_trees.model_tree_evaluation)

    accuracy = np.corrcoef(y_hat, test_matrix[:, 1], rowvar=0)[0, 1]
    logging.info("model tree accuracy = {0}".format(accuracy))

    weights, x, y = regression_trees.linearly_solve(training_matrix)
    for i in range(np.shape(test_matrix)[0]):
        y_hat[i] = test_matrix[i, 0] * weights[1, 0] + weights[0, 0]
    accuracy = np.corrcoef(y_hat, test_matrix[:, 1], rowvar=0)[0, 1]
    logging.info("regression accuracy = {0}".format(accuracy))
コード例 #2
0
def pruning_example():
    complex_tree = more_complex_tree()
    log_formatted_tree(complex_tree)

    my_data = utils.load_tsv_into_array('data/ex2test.txt')
    my_matrix = np.mat(my_data)
    pruned = regression_trees.prune(complex_tree, my_matrix)

    log_formatted_tree(pruned, 'pruned tree')
コード例 #3
0
def pruning_example():
    complex_tree = more_complex_tree()
    log_formatted_tree(complex_tree)

    my_data = utils.load_tsv_into_array('data/ex2test.txt')
    my_matrix = np.mat(my_data)
    pruned = regression_trees.prune(complex_tree, my_matrix)

    log_formatted_tree(pruned, 'pruned tree')
コード例 #4
0
def main():
    data_array = utils.load_tsv_into_array('data/k_means_test_set.txt')
    data_matrix = np.mat(data_array)

    rand_cent = k_means.random_centroid(data_matrix, 2)
    logging.info("random centroid = {rand_cent}".format(rand_cent=rand_cent))

    euc = k_means.euclidean_distance(data_matrix[0], data_matrix[1])
    logging.info("Euclidean distance = {euc}".format(euc=euc))

    centroids, cluster_assignment = k_means.k_means(data_matrix, 4)
    logging.info("centroids = {cent}".format(cent=centroids))

    bisecting_k_means()
コード例 #5
0
    def test_k_means(self):
        """k_means - k means should build clusters"""
        data_matrix = np.mat(utils.load_tsv_into_array('data/test_set_3.txt'))
        centroids, cluster_assignment = k_means.k_means(data_matrix, 4)
        expected = np.mat(
            np.array([[1., 0.45675494], [0., 0.3032197], [3., 1.74481454],
                      [1., 0.80407696], [0., 1.02508049], [3., 2.59648559],
                      [1., 0.42859499], [0., 0.0305198], [3., 2.37924609],
                      [2., 0.], [0., 5.38984416], [3., 0.04519236],
                      [1., 1.23757291], [0., 0.01298907], [3., 3.28350116],
                      [1., 2.33205513], [0., 3.72839989], [3., 0.1398885],
                      [1., 0.03288099], [0., 0.4038706], [3., 1.00363352],
                      [1., 1.16346981], [0., 0.93928783], [3., 0.02261741],
                      [1., 3.42458409], [0., 5.92927609], [3., 0.98873759],
                      [1., 1.83018987], [0., 0.91125974], [3., 1.28677032]]))

        self.assertEqual(cluster_assignment.any(), expected.any())
コード例 #6
0
ファイル: test_k_means.py プロジェクト: mjamesruggiero/lily
    def test_k_means(self):
        """k_means - k means should build clusters"""
        data_matrix = np.mat(utils.load_tsv_into_array('data/test_set_3.txt'))
        centroids, cluster_assignment = k_means.k_means(data_matrix, 4)
        expected = np.mat(np.array([[1., 0.45675494],
                              [0., 0.3032197],
                              [3., 1.74481454],
                              [1., 0.80407696],
                              [0., 1.02508049],
                              [3., 2.59648559],
                              [1., 0.42859499],
                              [0., 0.0305198],
                              [3., 2.37924609],
                              [2., 0.],
                              [0., 5.38984416],
                              [3., 0.04519236],
                              [1., 1.23757291],
                              [0., 0.01298907],
                              [3., 3.28350116],
                              [1., 2.33205513],
                              [0., 3.72839989],
                              [3., 0.1398885],
                              [1., 0.03288099],
                              [0., 0.4038706],
                              [3., 1.00363352],
                              [1., 1.16346981],
                              [0., 0.93928783],
                              [3., 0.02261741],
                              [1., 3.42458409],
                              [0., 5.92927609],
                              [3., 0.98873759],
                              [1., 1.83018987],
                              [0., 0.91125974],
                              [3., 1.28677032]]))

        self.assertEqual(cluster_assignment.any(), expected.any())
コード例 #7
0
def piecewise_linear_solve_example():
    matrix_2 = np.mat(utils.load_tsv_into_array('data/exp2.txt'))
    model_tree = regression_trees.create_tree(matrix_2,
                                              regression_trees.model_leaf,
                                              regression_trees.model_error)
    log_formatted_tree(model_tree, 'model tree')
コード例 #8
0
def more_complex_tree():
    data = utils.load_tsv_into_array('data/ex0.txt')
    matrix = np.mat(data)
    tree = regression_trees.create_tree(matrix, ops=(0, 1))
    return tree
コード例 #9
0
def very_simple_tree():
    data = utils.load_tsv_into_array('data/ex00.txt')
    matrix = np.mat(data)
    tree = regression_trees.create_tree(matrix)
    log_formatted_tree(tree, "the tree")
コード例 #10
0
def bisecting_k_means():
    data_matrix = np.mat(utils.load_tsv_into_array('data/test_set_2.txt'))
    centroid_list, assessments = k_means.bisect_k_means(data_matrix, 3)
    return centroid_list, assessments
コード例 #11
0
def piecewise_linear_solve_example():
    matrix_2 = np.mat(utils.load_tsv_into_array('data/exp2.txt'))
    model_tree = regression_trees.create_tree(matrix_2,
                                              regression_trees.model_leaf,
                                              regression_trees.model_error)
    log_formatted_tree(model_tree, 'model tree')
コード例 #12
0
def more_complex_tree():
    data = utils.load_tsv_into_array('data/ex0.txt')
    matrix = np.mat(data)
    tree = regression_trees.create_tree(matrix, ops=(0, 1))
    return tree
コード例 #13
0
def very_simple_tree():
    data = utils.load_tsv_into_array('data/ex00.txt')
    matrix = np.mat(data)
    tree = regression_trees.create_tree(matrix)
    log_formatted_tree(tree, "the tree")