def run_example():
    """
    Q-10 of the Application
    """
    singleton_list = []
    for line in DATA_TABLE:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

    k_means_distortion = []
    for num_clusters in range(6, 21):
        cluster_list = proj3_solution.kmeans_clustering(singleton_list, num_clusters, 5)
        distortion = compute_distortion(cluster_list)
        k_means_distortion.append(distortion)
    print k_means_distortion

    hierarchical_distortion = []
    cluster_list = singleton_list
    for num_clusters in range(20, 5, -1):
        cluster_list = proj3_solution.hierarchical_clustering(cluster_list, num_clusters)
        distortion = compute_distortion(cluster_list)
        hierarchical_distortion.append(distortion)
    hierarchical_distortion.reverse()
    print hierarchical_distortion

    compute_plot(range(6, 21), hierarchical_distortion, k_means_distortion)
Exemple #2
0
def run_example():
    """
    Q-10 of the Application
    """
    singleton_list = []
    for line in DATA_TABLE:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    k_means_distortion = []
    for num_clusters in range(6, 21):
        cluster_list = proj3_solution.kmeans_clustering(
            singleton_list, num_clusters, 5)
        distortion = compute_distortion(cluster_list)
        k_means_distortion.append(distortion)
    print k_means_distortion

    hierarchical_distortion = []
    cluster_list = singleton_list
    for num_clusters in range(20, 5, -1):
        cluster_list = proj3_solution.hierarchical_clustering(
            cluster_list, num_clusters)
        distortion = compute_distortion(cluster_list)
        hierarchical_distortion.append(distortion)
    hierarchical_distortion.reverse()
    print hierarchical_distortion

    compute_plot(range(6, 21), hierarchical_distortion, k_means_distortion)
def test_hierarchical24():
    """
    Test for hierarchical clustering
    Note that hierarchical_clustering mutates cluster_list
    """

    # load small data table
    print
    print "Testing hierarchical_clustering on 24 county set"
    data_24_table = load_data_table(DATA_24_URL)


    # test data of the form [size of output cluster, sets of county tuples]
    hierdata_24 = [[23, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36061',), ('36005',), ('36047',), ('36059',), ('36081',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [22, set([('11001', '51013'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36061',), ('36005',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [21, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [20, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34039',), ('34013', '34017'), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [19, set([('34013', '34017', '34039'), ('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [18, set([('34013', '34017', '34039'), ('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('36005', '36047', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [17, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('34013', '34017', '34039', '36005', '36047', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [16, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [15, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [14, set([('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013')])],
                   [13, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013')])],
                   [12, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013', '51840')])],
                   [11, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013', '51840')])],
                   [10, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '51013', '51760', '51840')])],
                   [9, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '51013', '51760', '51840')])],
                   [8, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840')])],
                   [7, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'), ('55079',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])],
                   [6, set([('06029', '06037', '06059', '06071', '06075'), ('01073',), ('08031',), ('41051', '41067'), ('55079',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])],
                   [5, set([('06029', '06037', '06059', '06071', '06075'), ('08031',), ('41051', '41067'), ('01073', '55079'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])],
                   [4, set([('06029', '06037', '06059', '06071', '06075'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031',), ('41051', '41067')])],
                   [3, set([('06029', '06037', '06059', '06071', '06075', '41051', '41067'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031',)])],
                   [2, set([('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('06029', '06037', '06059', '06071', '06075', '08031', '41051', '41067')])],
                   ]


    suite = poc_simpletest.TestSuite()

    for num_clusters, expected_county_tuple in hierdata_24:

        # build initial list of clusters for each test since mutation is allowed
        cluster_list = []
        for idx in range(len(data_24_table)):
            line = data_24_table[idx]
            cluster_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

        # compute student answer
        student_clustering = student.hierarchical_clustering(cluster_list, num_clusters)
        student_county_tuple = set_of_county_tuples(student_clustering)

        # Prepare test
        error_message = "Testing hierarchical_clustering on 24 county table, num_clusters = " + str(num_clusters)
        error_message += "\nStudent county tuples: " + str(student_county_tuple)
        error_message += "\nExpected county tuples: " + str(expected_county_tuple)
        suite.run_test(student_county_tuple == expected_county_tuple, True, error_message)

    suite.report_results()
Exemple #4
0
def test_hierarchical24():
    """
    Test for hierarchical clustering
    Note that hierarchical_clustering mutates cluster_list
    """

    # load small data table
    print
    print "Testing hierarchical_clustering on 24 county set"
    data_24_table = load_data_table(DATA_24_URL)

    # test data of the form [size of output cluster, sets of county tuples]
    hierdata_24 = [
        [
            23,
            set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ),
                 ('06029', ), ('06071', ), ('06075', ), ('08031', ),
                 ('24510', ), ('34013', ), ('34039', ), ('34017', ),
                 ('36061', ), ('36005', ), ('36047', ), ('36059', ),
                 ('36081', ), ('41051', ), ('41067', ), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', )])
        ],
        [
            22,
            set([('11001', '51013'), ('36047', '36081'), ('01073', ),
                 ('06059', ), ('06037', ), ('06029', ), ('06071', ),
                 ('06075', ), ('08031', ), ('24510', ), ('34013', ),
                 ('34039', ), ('34017', ), ('36061', ), ('36005', ),
                 ('36059', ), ('41051', ), ('41067', ), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', )])
        ],
        [
            21,
            set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'),
                 ('01073', ), ('06059', ), ('06037', ), ('06029', ),
                 ('06071', ), ('06075', ), ('08031', ), ('24510', ),
                 ('34013', ), ('34039', ), ('34017', ), ('36059', ),
                 ('41051', ), ('41067', ), ('51840', ), ('51760', ),
                 ('55079', ), ('54009', )])
        ],
        [
            20,
            set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'),
                 ('01073', ), ('06059', ), ('06037', ), ('06029', ),
                 ('06071', ),
                 ('06075', ), ('08031', ), ('24510', ), ('34039', ),
                 ('34013', '34017'), ('36059', ), ('41051', ), ('41067', ),
                 ('51840', ), ('51760', ), ('55079', ), ('54009', )])
        ],
        [
            19,
            set([('34013', '34017', '34039'), ('11001', '51013'),
                 ('36005', '36061'), ('36047', '36081'), ('01073', ),
                 ('06059', ), ('06037', ), ('06029', ), ('06071', ),
                 ('06075', ), ('08031', ), ('24510', ), ('36059', ),
                 ('41051', ), ('41067', ), ('51840', ), ('51760', ),
                 ('55079', ), ('54009', )])
        ],
        [
            18,
            set([('34013', '34017', '34039'), ('11001', '51013'), ('01073', ),
                 ('06059', ), ('06037', ), ('06029', ), ('06071', ),
                 ('06075', ), ('08031', ), ('24510', ), ('36059', ),
                 ('36005', '36047', '36061',
                  '36081'), ('41051', ), ('41067', ), ('51840', ), ('51760', ),
                 ('55079', ), ('54009', )])
        ],
        [
            17,
            set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ),
                 ('06029', ), ('06071', ), ('06075', ), ('08031', ),
                 ('24510', ), ('36059', ),
                 ('34013', '34017', '34039', '36005', '36047', '36061',
                  '36081'), ('41051', ), ('41067', ), ('51840', ), ('51760', ),
                 ('55079', ), ('54009', )])
        ],
        [
            16,
            set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ),
                 ('06029', ), ('06071', ), ('06075', ), ('08031', ),
                 ('24510', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', ), ('41067', ), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', )])
        ],
        [
            15,
            set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ),
                 ('06029', ), ('06071', ), ('06075', ), ('08031', ),
                 ('24510', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', '41067'), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', )])
        ],
        [
            14,
            set([('01073', ), ('06059', ), ('06037', ), ('06029', ),
                 ('06071', ), ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', '41067'), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', ),
                 ('11001', '24510', '51013')])
        ],
        [
            13,
            set([('06037', '06059'), ('01073', ), ('06029', ), ('06071', ),
                 ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', '41067'), ('51840', ),
                 ('51760', ), ('55079', ), ('54009', ),
                 ('11001', '24510', '51013')])
        ],
        [
            12,
            set([('06037', '06059'), ('01073', ), ('06029', ), ('06071', ),
                 ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005',
                  '36047', '36059', '36061', '36081'), ('41051', '41067'),
                 ('51760', ), ('55079', ), ('54009', ),
                 ('11001', '24510', '51013', '51840')])
        ],
        [
            11,
            set([('06029', '06037', '06059'), ('01073', ), ('06071', ),
                 ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005',
                  '36047', '36059', '36061', '36081'), ('41051', '41067'),
                 ('51760', ), ('55079', ), ('54009', ),
                 ('11001', '24510', '51013', '51840')])
        ],
        [
            10,
            set([('06029', '06037', '06059'), ('01073', ), ('06071', ),
                 ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', '41067'), ('55079', ),
                 ('54009', ), ('11001', '24510', '51013', '51760', '51840')])
        ],
        [
            9,
            set([('01073', ), ('06029', '06037', '06059', '06071'),
                 ('06075', ), ('08031', ),
                 ('34013', '34017', '34039', '36005', '36047', '36059',
                  '36061', '36081'), ('41051', '41067'), ('55079', ),
                 ('54009', ), ('11001', '24510', '51013', '51760', '51840')])
        ],
        [
            8,
            set([
                ('01073', ), ('06029', '06037', '06059', '06071'), ('06075', ),
                ('08031', ), ('41051', '41067'), ('55079', ), ('54009', ),
                ('11001', '24510', '34013', '34017', '34039', '36005', '36047',
                 '36059', '36061', '36081', '51013', '51760', '51840')
            ])
        ],
        [
            7,
            set([
                ('01073', ), ('06029', '06037', '06059', '06071'), ('06075', ),
                ('08031', ), ('41051', '41067'), ('55079', ),
                ('11001', '24510', '34013', '34017', '34039', '36005', '36047',
                 '36059', '36061', '36081', '51013', '51760', '51840', '54009')
            ])
        ],
        [
            6,
            set([
                ('06029', '06037', '06059', '06071', '06075'), ('01073', ),
                ('08031', ), ('41051', '41067'), ('55079', ),
                ('11001', '24510', '34013', '34017', '34039', '36005', '36047',
                 '36059', '36061', '36081', '51013', '51760', '51840', '54009')
            ])
        ],
        [
            5,
            set([
                ('06029', '06037', '06059', '06071', '06075'), ('08031', ),
                ('41051', '41067'), ('01073', '55079'),
                ('11001', '24510', '34013', '34017', '34039', '36005', '36047',
                 '36059', '36061', '36081', '51013', '51760', '51840', '54009')
            ])
        ],
        [
            4,
            set([('06029', '06037', '06059', '06071', '06075'),
                 ('01073', '11001', '24510', '34013', '34017', '34039',
                  '36005', '36047', '36059', '36061', '36081', '51013',
                  '51760', '51840', '54009', '55079'), ('08031', ),
                 ('41051', '41067')])
        ],
        [
            3,
            set([('06029', '06037', '06059', '06071', '06075', '41051',
                  '41067'),
                 ('01073', '11001', '24510', '34013', '34017', '34039',
                  '36005', '36047', '36059', '36061', '36081', '51013',
                  '51760', '51840', '54009', '55079'), ('08031', )])
        ],
        [
            2,
            set([('01073', '11001', '24510', '34013', '34017', '34039',
                  '36005', '36047', '36059', '36061', '36081', '51013',
                  '51760', '51840', '54009', '55079'),
                 ('06029', '06037', '06059', '06071', '06075', '08031',
                  '41051', '41067')])
        ],
    ]

    suite = poc_simpletest.TestSuite()

    for num_clusters, expected_county_tuple in hierdata_24:

        # build initial list of clusters for each test since mutation is allowed
        cluster_list = []
        for idx in range(len(data_24_table)):
            line = data_24_table[idx]
            cluster_list.append(
                alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                    line[4]))

        # compute student answer
        student_clustering = student.hierarchical_clustering(
            cluster_list, num_clusters)
        student_county_tuple = set_of_county_tuples(student_clustering)

        # Prepare test
        error_message = "Testing hierarchical_clustering on 24 county table, num_clusters = " + str(
            num_clusters)
        error_message += "\nStudent county tuples: " + str(
            student_county_tuple)
        error_message += "\nExpected county tuples: " + str(
            expected_county_tuple)
        suite.run_test(student_county_tuple == expected_county_tuple, True,
                       error_message)

    suite.report_results()