コード例 #1
0
ファイル: alg_project3_viz.py プロジェクト: hrpatel/coursera
def assignment_q2():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters
    """
    data_table = load_data_table(DATA_3108_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    cluster_list = project.hierarchical_clustering(singleton_list, 15)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
コード例 #2
0
ファイル: project_testsuite.py プロジェクト: hrpatel/coursera
def test_hierarchical24():
    """
    Test for hierarchical clustering
    Note that hierarchical_clustering mutates cluster_list
    """

    # load small data table
    print
    print "Testing hierarchical_clustering on 24 county set"
    data_24_table = load_data_table(DATA_24_URL)


    # test data of the form [size of output cluster, sets of county tuples]
    hierdata_24 = [[23, set(
        [('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',),
         ('24510',), ('34013',), ('34039',), ('34017',), ('36061',), ('36005',), ('36047',), ('36059',), ('36081',),
         ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [22, set([('11001', '51013'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',),
                             ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',),
                             ('36061',), ('36005',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',),
                             ('55079',), ('54009',)])],
                   [21, set(
                       [('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',),
                        ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',),
                        ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [20, set(
                       [('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',),
                        ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34039',), ('34013', '34017'),
                        ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [19, set([('34013', '34017', '34039'), ('11001', '51013'), ('36005', '36061'), ('36047', '36081'),
                             ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',),
                             ('24510',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',),
                             ('54009',)])],
                   [18, set(
                       [('34013', '34017', '34039'), ('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',),
                        ('06071',), ('06075',), ('08031',), ('24510',), ('36059',),
                        ('36005', '36047', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',),
                        ('55079',), ('54009',)])],
                   [17, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',),
                             ('08031',), ('24510',), ('36059',),
                             ('34013', '34017', '34039', '36005', '36047', '36061', '36081'), ('41051',), ('41067',),
                             ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [16, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',),
                             ('08031',), ('24510',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051',),
                             ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [15, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',),
                             ('08031',), ('24510',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',)])],
                   [14, set([('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',),
                             ('11001', '24510', '51013')])],
                   [13, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',),
                             ('11001', '24510', '51013')])],
                   [12, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('51760',), ('55079',), ('54009',),
                             ('11001', '24510', '51013', '51840')])],
                   [11, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('51760',), ('55079',), ('54009',),
                             ('11001', '24510', '51013', '51840')])],
                   [10, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',),
                             ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                             ('41051', '41067'), ('55079',), ('54009',),
                             ('11001', '24510', '51013', '51760', '51840')])],
                   [9, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',),
                            ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'),
                            ('41051', '41067'), ('55079',), ('54009',),
                            ('11001', '24510', '51013', '51760', '51840')])],
                   [8, set(
                       [('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'),
                        ('55079',), ('54009',), (
                       '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                       '51013', '51760', '51840')])],
                   [7, set(
                       [('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'),
                        ('55079',), (
                       '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                       '51013', '51760', '51840', '54009')])],
                   [6, set([('06029', '06037', '06059', '06071', '06075'), ('01073',), ('08031',), ('41051', '41067'),
                            ('55079',), (
                       '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                       '51013', '51760', '51840', '54009')])],
                   [5, set([('06029', '06037', '06059', '06071', '06075'), ('08031',), ('41051', '41067'),
                            ('01073', '55079'), (
                       '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                       '51013', '51760', '51840', '54009')])],
                   [4, set([('06029', '06037', '06059', '06071', '06075'), (
                   '01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                   '51013', '51760', '51840', '54009', '55079'), ('08031',), ('41051', '41067')])],
                   [3, set([('06029', '06037', '06059', '06071', '06075', '41051', '41067'), (
                   '01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                   '51013', '51760', '51840', '54009', '55079'), ('08031',)])],
                   [2, set([('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061',
                             '36081', '51013', '51760', '51840', '54009', '55079'),
                            ('06029', '06037', '06059', '06071', '06075', '08031', '41051', '41067')])],
    ]

    suite = simpletest.TestSuite()

    for num_clusters, expected_county_tuple in hierdata_24:

        # build initial list of clusters for each test since mutation is allowed
        cluster_list = []
        for idx in range(len(data_24_table)):
            line = data_24_table[idx]
            cluster_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

        # compute student answer
        student_clustering = student.hierarchical_clustering(cluster_list, num_clusters)
        student_county_tuple = set_of_county_tuples(student_clustering)

        # Prepare test
        error_message = "Testing hierarchical_clustering on 24 county table, num_clusters = " + str(num_clusters)
        error_message += "\nStudent county tuples: " + str(student_county_tuple)
        error_message += "\nExpected county tuples: " + str(expected_county_tuple)
        suite.run_test(student_county_tuple == expected_county_tuple, True, error_message)

    suite.report_results()