Ejemplo n.º 1
0
def test_dataset():
    """
    Tests the Dataset class.
    """
    print('  Testing class Dataset')

    # TEST CASE 1
    # Create and test an empty dataset
    dset1 = a6dataset.Dataset(3)
    introcs.assert_equals(3, dset1.getDimension())
    introcs.assert_equals(0, dset1.getSize())

    # We use this assert function to compare lists
    introcs.assert_float_lists_equal([], dset1.getContents())

    print('    Default initialization looks okay')

    # TEST CASE 2
    # Create and test a non-empty dataset
    items = [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0],
             [0.0, 0.0, 1.0]]
    dset2 = a6dataset.Dataset(3, items)
    introcs.assert_equals(3, dset2.getDimension())
    introcs.assert_equals(4, dset2.getSize())

    # Check that contents is initialized correctly
    # Make sure items is COPIED
    assert_point_sets_equal(items, dset2.getContents())
    introcs.assert_false(dset2.getContents() is items)
    introcs.assert_false(dset2.getContents()[0] is items[0])

    print('    User-provided initialization looks okay')

    # Check that getPoint() is correct AND that it copies
    introcs.assert_float_lists_equal([0.0, 1.0, 0.0], dset2.getPoint(2))
    introcs.assert_false(dset2.getContents()[2] is dset2.getPoint(2))

    print('    Method Dataset.getPoint looks okay')

    # Add something to the dataset (and check it was added)
    dset1.addPoint([0.0, 0.5, 4.2])
    assert_point_sets_equal([[0.0, 0.5, 4.2]], dset1.getContents())
    introcs.assert_float_lists_equal([0.0, 0.5, 4.2], dset1.getPoint(0))
    # Check the point is COPIED
    introcs.assert_false(dset1.getPoint(0) is dset1.getContents()[0])

    extra = [0.0, 0.5, 4.2]
    dset2.addPoint(extra)
    items.append(extra)
    assert_point_sets_equal(items, dset2.getContents())
    # Check the point was COPIED
    introcs.assert_false(id(extra) in map(id, dset2.getContents()))

    print('    Method Dataset.addPoint looks okay')
    print('  class Dataset appears correct')
    print()
Ejemplo n.º 2
0
def test_algorithm_a():
    """
    Tests Part A of the Algorithm class.
    """
    print('  Testing Part A of class Algorithm')

    # A dataset with four points almost in a square
    items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]]
    dset = a6dataset.Dataset(2, items)

    # Test creating a clustering with random seeds
    km = a6algorithm.Algorithm(dset, 3)
    # Should have 3 clusters
    introcs.assert_equals(len(km.getClusters()), 3)

    for clust in km.getClusters():
        print(clust.getCentroid)
        # cluster centroids should have been chosen from items
        introcs.assert_true(clust.getCentroid() in items)
        # cluster centroids should be distinct (since items are)
        for clust2 in km.getClusters():
            if clust2 is not clust:
                introcs.assert_float_lists_not_equal(clust.getCentroid(),
                                                     clust2.getCentroid())

    print('    Random Algorithm initialization looks okay')

    # Clusterings of that dataset, with two and three deterministic clusters
    km = a6algorithm.Algorithm(dset, 2, [0, 2])
    introcs.assert_equals(items[0], km.getClusters()[0].getCentroid())
    introcs.assert_equals(items[2], km.getClusters()[1].getCentroid())
    km = a6algorithm.Algorithm(dset, 3, [0, 2, 3])
    introcs.assert_equals(items[0], km.getClusters()[0].getCentroid())
    introcs.assert_equals(items[2], km.getClusters()[1].getCentroid())
    introcs.assert_equals(items[3], km.getClusters()[2].getCentroid())

    # Try it on a file
    file = os.path.join(os.path.split(__file__)[0], TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km2 = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    introcs.assert_equals([0.38, 0.94, 0.53, 0.07],
                          km2.getClusters()[0].getCentroid())
    introcs.assert_equals([0.84, 0.88, 0.04, 0.86],
                          km2.getClusters()[1].getCentroid())
    introcs.assert_equals([0.8, 0.4, 0.23, 0.33],
                          km2.getClusters()[2].getCentroid())

    print('    Seeded Algorithm initialization looks okay')
    print('  Part A of class Algorithm appears correct')
    print()
Ejemplo n.º 3
0
def test_cluster_a():
    """
    Tests Part A of the Cluster class assignment.
    """
    print('  Testing Part A of class Cluster')

    # TEST CASE 1
    # Create and test a cluster (always empty)
    dset = a6dataset.Dataset(3)
    point = [0.0, 1.0, 0.0]
    cluster1 = a6cluster.Cluster(dset, point)

    # Compare centroid and contents
    introcs.assert_float_lists_equal(point, cluster1.getCentroid())
    introcs.assert_equals([], cluster1.getIndices())
    # Make sure centroid COPIED
    introcs.assert_not_equals(id(point), id(cluster1.getCentroid()))

    print('    Basic cluster methods look okay')

    # Add something to cluster (and check it was added)
    extra = [[0.0, 0.5, 4.2], [0.0, 1.0, 0.0]]
    dset.addPoint(extra[0])
    dset.addPoint(extra[1])
    cluster1.addIndex(1)
    introcs.assert_equals([1], cluster1.getIndices())
    cluster1.addIndex(0)
    introcs.assert_equals([1, 0], cluster1.getIndices())
    # Make sure we can handle duplicates!
    cluster1.addIndex(1)
    introcs.assert_equals([1, 0], cluster1.getIndices())

    print('    Method Cluster.addIndex look okay')

    # And clear it
    contents = cluster1.getContents()
    introcs.assert_equals(2, len(contents))
    introcs.assert_float_lists_equal(extra[1], contents[0])
    introcs.assert_float_lists_equal(extra[0], contents[1])

    print('    Method Cluster.getContents look okay')

    # And clear it
    cluster1.clear()
    introcs.assert_equals([], cluster1.getIndices())

    print('    Method Cluster.clear look okay')
    print('  Part A of class Cluster appears correct')
    print()
Ejemplo n.º 4
0
def compute(filename, k, limit=500):
    """
    Computes the result of a k-means algorithm and returns it as a table [with header]
    
    Parameter filename: The name of the initial dataset
    Precondition: filename is a valid file path OR None.
    
    Parameter k: The initial number of clusters
    Precondition: k is an int
    
    Parameter limit: The limit on the number of iterations to run
    Precondition: limit is an int >= 0
    """
    data = data_for_file(filename)
    if len(data) == 0:
        return []

    # Get the header again
    table = introcs.read_csv(filename)
    # Is there a column called COMMENTS?
    header = list(filter(lambda x: x.lower() != 'comments', table[0]))

    import a6dataset
    import a6algorithm
    dset = a6dataset.Dataset(len(data[0]), data)
    km = a6algorithm.Algorithm(dset, k)
    km.run(limit)

    result = []
    newhead = ['CID'] + header
    for x in header:
        newhead.append('C_' + x)
    result.append(newhead)

    clusters = km.getClusters()
    for x in range(len(clusters)):
        for y in clusters[x].getIndices():
            row = [x + 1] + clusters[x]._dataset.getPoint(y)  # BAD
            row.extend(clusters[x].getCentroid())
            result.append(row)

    return result
Ejemplo n.º 5
0
    def _load_data(self, contents):
        """
        Loads a data set file into a Dataset object.

        Parameter contents: The contents of the dataset
        Precondition: contents is a 2d rectangular table
        """
        try:
            self._dset = a6dataset.Dataset(2, contents)
            if not self._dset.getContents():
                raise RuntimeError()
            #self._filebutton.configure(text=shortname)
            self._kmean = None
            self._reset()
            self._plot()
        except RuntimeError:
            messagebox.showwarning('Load',
                                   'ERROR: You must complete Dataset first.')
        except:
            traceback.print_exc()
            messagebox.showwarning('Load', 'ERROR: An unknown error occurred.')
Ejemplo n.º 6
0
def test_algorithm_d():
    """
    Tests Part D of the Algorithm class.
    """
    print('  Testing Part D of class Algorithm')
    items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6],
             [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]]
    dset = a6dataset.Dataset(3, items)

    # Try the same test case straight from the top using perform_k_means
    km1 = a6algorithm.Algorithm(dset, 2, [1, 3])
    km1.run(10)

    # Check first cluster
    cluster1 = km1.getClusters()[0]
    introcs.assert_float_lists_equal([8. / 15, 17. / 30, 17. / 30],
                                     cluster1.getCentroid())
    introcs.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster
    cluster2 = km1.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 13. / 30, 14. / 30],
                                     cluster2.getCentroid())
    introcs.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))
    print('    Method run looks okay')

    # Test on a real world data set
    file = os.path.join(os.path.split(__file__)[0], TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km2 = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km2.run(20)

    # The actual results
    cluster0 = km2.getClusters()[0]
    cluster1 = km2.getClusters()[1]
    cluster2 = km2.getClusters()[2]

    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61],
                 [0.81, 0.69, 0.65, 0.65], [0.62, 0.75, 0.65, 0.43],
                 [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44],
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39],
                 [0.28, 0.91, 0.63, 0.08], [0.38, 0.94, 0.53, 0.07],
                 [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65],
                 [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33],
                 [0.71, 0.78, 0.64, 0.57], [0.15, 0.87, 0.62, 0.22],
                 [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39],
                 [0.35, 0.7, 0.41, 0.15], [0.91, 0.98, 0.61, 0.58],
                 [0.9, 0.63, 0.83, 0.6], [0.95, 0.83, 0.64, 0.5],
                 [0.76, 0.86, 0.74, 0.61], [0.27, 0.65, 0.52, 0.28],
                 [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12],
                 [0.99, 0.68, 0.8, 0.42], [0.09, 0.85, 0.55, 0.21],
                 [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39],
                 [0.89, 0.72, 0.78, 0.38], [0.39, 0.9, 0.52, 0.26],
                 [0.46, 0.35, 0.96, 0.05], [0.21, 0.62, 0.33, 0.09],
                 [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35],
                 [0.67, 0.32, 0.66, 0.2], [0.36, 0.64, 0.57, 0.26],
                 [0.9, 0.7, 0.74, 0.63], [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.73, 0.31, 0.15, 0.08],
                 [0.87, 0.99, 0.2, 0.8], [0.77, 0.45, 0.31, 0.31],
                 [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39],
                 [0.86, 0.86, 0.32, 0.88], [0.8, 0.4, 0.23, 0.33],
                 [0.81, 0.66, 0.26, 0.82], [0.95, 0.62, 0.28, 0.01],
                 [0.35, 0.71, 0.01, 0.32], [0.73, 0.65, 0.23, 0.02],
                 [0.84, 0.88, 0.04, 0.86], [0.8, 0.62, 0.09, 0.65],
                 [0.72, 0.55, 0.1, 0.17], [0.61, 0.42, 0.24, 0.33],
                 [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88],
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98],
                 [0.2, 0.54, 0.73, 0.85], [0.14, 0.31, 0.86, 0.74],
                 [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75],
                 [0.65, 0.05, 0.39, 0.49], [0.04, 0.52, 0.99, 0.75],
                 [0.14, 0.55, 0.67, 0.63], [0.5, 0.2, 0.69, 0.95],
                 [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74],
                 [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92],
                 [0.39, 0.38, 0.85, 0.32], [0.38, 0.07, 0.82, 0.01],
                 [0.66, 0.09, 0.69, 0.46], [0.26, 0.39, 0.95, 0.63],
                 [0.54, 0.06, 0.74, 0.86], [0.2, 0.48, 0.98, 0.84],
                 [0.62, 0.24, 0.77, 0.17], [0.27, 0.38, 0.76, 0.63],
                 [0.7, 0.04, 0.7, 0.82], [0.41, 0.11, 0.61, 0.78],
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66],
                 [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9],
                 [0.31, 0.5, 0.87, 0.85], [0.5, 0.09, 0.84, 0.78],
                 [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99],
                 [0.57, 0.23, 0.6, 0.85], [0.72, 0.14, 0.63, 0.37],
                 [0.39, 0.08, 0.77, 0.96], [0.09, 0.47, 0.63, 0.8],
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77],
                 [0.35, 0.04, 0.85, 0.86], [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [0.54125, 0.7545, 0.66125, 0.3775]
    centroid1 = [0.76900, 0.5705, 0.20550, 0.4775]
    centroid2 = [0.42325, 0.2330, 0.75775, 0.6765]

    introcs.assert_float_lists_equal(centroid0, cluster0.getCentroid())
    introcs.assert_float_lists_equal(centroid1, cluster1.getCentroid())
    introcs.assert_float_lists_equal(centroid2, cluster2.getCentroid())
    assert_point_sets_equal(contents0, cluster0.getContents())
    assert_point_sets_equal(contents1, cluster1.getContents())
    assert_point_sets_equal(contents2, cluster2.getContents())
    print('    File analysis test looks okay')
    print('  Part D of class ClusterGroup appears correct')
    print()
Ejemplo n.º 7
0
def test_algorithm_c():
    """
    Tests Part C of the Algorithm class.
    """
    print('  Testing Part C of class Algorithm')
    items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]]
    dset = a6dataset.Dataset(2, items)
    km1 = a6algorithm.Algorithm(dset, 2, [0, 2])
    km1._partition()

    # Test update()
    stable = km1._update()
    introcs.assert_float_lists_equal([0, 4.5],
                                     km1.getClusters()[0].getCentroid())
    introcs.assert_float_lists_equal([10.0, 5.5],
                                     km1.getClusters()[1].getCentroid())
    introcs.assert_false(stable)

    # updating again should not change anything, but should return stable
    stable = km1._update()
    introcs.assert_float_lists_equal([0, 4.5],
                                     km1.getClusters()[0].getCentroid())
    introcs.assert_float_lists_equal([10.0, 5.5],
                                     km1.getClusters()[1].getCentroid())
    introcs.assert_true(stable)

    print('    Method Algorithm._update() looks okay')

    # Now test the k-means process itself.

    # FOR ALL TEST CASES
    # Create and initialize a non-empty dataset
    items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6],
             [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]]
    dset = a6dataset.Dataset(3, items)

    # Create a clustering, providing non-random seed indices so the test is deterministic
    km2 = a6algorithm.Algorithm(dset, 2, [1, 3])

    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid())
    introcs.assert_equals(set([]), set(cluster1.getIndices()))

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid())
    introcs.assert_equals(set([]), set(cluster2.getIndices()))

    # Make a fake cluster to test update_centroid() method
    clustertest = a6cluster.Cluster(dset, [0.5, 0.6, 0.6])
    for ind in [1, 2]:
        clustertest.addIndex(ind)

    # TEST CASE 1 (update)
    stable = clustertest.update()
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6],
                                     clustertest.getCentroid())
    introcs.assert_false(stable)  # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.update()
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6],
                                     clustertest.getCentroid())
    introcs.assert_true(stable)  # Now it is stable

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid())
    introcs.assert_equals(set([1, 2]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 0.475, 0.475],
                                     cluster2.getCentroid())
    introcs.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices()))

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([8. / 15, 17. / 30, 17. / 30],
                                     cluster1.getCentroid())
    introcs.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 13. / 30, 14. / 30],
                                     cluster2.getCentroid())
    introcs.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))

    # Try it on a file
    file = os.path.join(os.path.split(__file__)[0], TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km3 = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km3.step()

    # The actual results
    cluster0 = km3.getClusters()[0]
    cluster1 = km3.getClusters()[1]
    cluster2 = km3.getClusters()[2]

    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61],
                 [0.2, 0.54, 0.73, 0.85], [0.62, 0.75, 0.65, 0.43],
                 [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44],
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39],
                 [0.04, 0.52, 0.99, 0.75], [0.28, 0.91, 0.63, 0.08],
                 [0.14, 0.55, 0.67, 0.63], [0.38, 0.94, 0.53, 0.07],
                 [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65],
                 [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33],
                 [0.39, 0.38, 0.85, 0.32], [0.26, 0.39, 0.95, 0.63],
                 [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55],
                 [0.27, 0.63, 0.69, 0.39], [0.35, 0.7, 0.41, 0.15],
                 [0.2, 0.48, 0.98, 0.84], [0.76, 0.86, 0.74, 0.61],
                 [0.27, 0.65, 0.52, 0.28], [0.86, 0.91, 0.88, 0.62],
                 [0.1, 0.79, 0.5, 0.12], [0.09, 0.85, 0.55, 0.21],
                 [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39],
                 [0.31, 0.5, 0.87, 0.85], [0.39, 0.9, 0.52, 0.26],
                 [0.46, 0.35, 0.96, 0.05], [0.21, 0.62, 0.33, 0.09],
                 [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35],
                 [0.36, 0.64, 0.57, 0.26], [0.09, 0.47, 0.63, 0.8],
                 [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.87, 0.99, 0.2, 0.8],
                 [0.86, 0.86, 0.32, 0.88], [0.81, 0.66, 0.26, 0.82],
                 [0.91, 0.98, 0.61, 0.58], [0.84, 0.88, 0.04, 0.86],
                 [0.8, 0.62, 0.09, 0.65], [0.72, 0.88, 0.02, 0.95],
                 [0.88, 0.96, 0.09, 0.88]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98],
                 [0.73, 0.31, 0.15, 0.08], [0.81, 0.69, 0.65, 0.65],
                 [0.14, 0.31, 0.86, 0.74], [0.77, 0.45, 0.31, 0.31],
                 [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75],
                 [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.3],
                 [0.86, 0.03, 0.3, 0.39], [0.5, 0.2, 0.69, 0.95],
                 [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74],
                 [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92],
                 [0.71, 0.78, 0.64, 0.57], [0.8, 0.4, 0.23, 0.33],
                 [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46],
                 [0.54, 0.06, 0.74, 0.86], [0.95, 0.62, 0.28, 0.01],
                 [0.35, 0.71, 0.01, 0.32], [0.62, 0.24, 0.77, 0.17],
                 [0.73, 0.65, 0.23, 0.02], [0.27, 0.38, 0.76, 0.63],
                 [0.9, 0.63, 0.83, 0.6], [0.7, 0.04, 0.7, 0.82],
                 [0.95, 0.83, 0.64, 0.5], [0.41, 0.11, 0.61, 0.78],
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66],
                 [0.99, 0.68, 0.8, 0.42], [0.72, 0.55, 0.1, 0.17],
                 [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9],
                 [0.61, 0.42, 0.24, 0.33], [0.89, 0.72, 0.78, 0.38],
                 [0.5, 0.09, 0.84, 0.78], [0.62, 0.01, 0.88, 0.1],
                 [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85],
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36],
                 [0.67, 0.32, 0.66, 0.2], [0.72, 0.14, 0.63, 0.37],
                 [0.39, 0.08, 0.77, 0.96], [0.9, 0.7, 0.74, 0.63],
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77],
                 [0.35, 0.04, 0.85, 0.86], [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [
        0.3987179487179487, 0.7097435897435899, 0.6864102564102561,
        0.4164102564102565
    ]
    centroid1 = [
        0.7788888888888889, 0.8555555555555555, 0.19666666666666668,
        0.788888888888889
    ]
    centroid2 = [
        0.6038461538461538, 0.29865384615384616, 0.6217307692307692,
        0.5455769230769231
    ]

    introcs.assert_float_lists_equal(centroid0, cluster0.getCentroid())
    introcs.assert_float_lists_equal(centroid1, cluster1.getCentroid())
    introcs.assert_float_lists_equal(centroid2, cluster2.getCentroid())
    assert_point_sets_equal(contents0, cluster0.getContents())
    assert_point_sets_equal(contents1, cluster1.getContents())
    assert_point_sets_equal(contents2, cluster2.getContents())

    print('    Method Algorithm.step looks okay')
    print('  Part C of class Algorithm appears correct')
    print()
Ejemplo n.º 8
0
def test_algorithm_b():
    """
    Tests Part B of the Algorithm class.
    s"""
    # This function tests the methods _nearest and _partition, both of which are hidden
    # methods.  Normally it's not good form to directly call these methods from outside
    # the class, but we make an exception for testing code, which often has to be more
    # tightly integrated with the implementation of a class than other code that just
    # uses the class.
    print('  Testing Part B of class Algorithm')
    # Reinitialize data set
    items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]]
    dset = a6dataset.Dataset(2, items)
    km1 = a6algorithm.Algorithm(dset, 2, [0, 2])
    km2 = a6algorithm.Algorithm(dset, 3, [0, 2, 3])

    nearest = km1._nearest([1., 1.])
    introcs.assert_true(nearest is km1.getClusters()[0])

    nearest = km1._nearest([1., 10.])
    introcs.assert_true(nearest is km1.getClusters()[1])

    nearest = km2._nearest([1., 1.])
    introcs.assert_true(nearest is km2.getClusters()[0])

    nearest = km2._nearest([1., 10.])
    introcs.assert_true(nearest is km2.getClusters()[2])
    print('    Method Algorithm._nearest() looks okay')

    # Testing partition()
    # For this example points 0 and 3 are closer, as are 1 and 2
    km1._partition()
    introcs.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices()))
    # partition and repeat -- should not change clusters.
    km1._partition()
    introcs.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices()))

    # Reset the cluster centroids; now it changes
    cluster = km1.getClusters()
    cluster[0]._centroid = [5.0, 10.0]
    cluster[1]._centroid = [0.0, 2.0]
    km1._partition()
    introcs.assert_equals(set([2, 3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([0, 1]), set(km1.getClusters()[1].getIndices()))

    # Try it on a file
    index1 = [
        2, 3, 5, 9, 11, 15, 16, 18, 19, 20, 22, 23, 29, 30, 32, 33, 37, 40, 41,
        42, 44, 45, 50, 60, 61, 62, 64, 69, 71, 73, 75, 76, 78, 80, 85, 88, 90,
        94, 97
    ]
    index2 = [0, 34, 8, 43, 66, 46, 77, 84, 54]
    index3 = [
        1, 4, 6, 7, 10, 12, 13, 14, 17, 21, 24, 25, 26, 27, 28, 31, 35, 36, 38,
        39, 47, 48, 49, 51, 52, 53, 55, 56, 57, 58, 59, 63, 65, 67, 68, 70, 72,
        74, 79, 81, 82, 83, 86, 87, 89, 91, 92, 93, 95, 96, 98, 99
    ]

    file = os.path.join(os.path.split(__file__)[0], TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km3 = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km3._partition()
    introcs.assert_equals(set(index1), set(km3.getClusters()[0].getIndices()))
    introcs.assert_equals(set(index2), set(km3.getClusters()[1].getIndices()))
    introcs.assert_equals(set(index3), set(km3.getClusters()[2].getIndices()))

    print('    Method Algorithm._partition() looks okay')
    print('  Part B of class Algorithm appears correct')
    print()
Ejemplo n.º 9
0
def test_cluster_b():
    """
    Tests Part B of the Cluster class assignment.
    """
    print('  Testing Part B of class Cluster')

    # A dataset with four points
    items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0],
             [0.0, 0.0, 1.0]]
    dset = a6dataset.Dataset(3, items)

    # Create some clusters
    cluster1 = a6cluster.Cluster(dset, [0.0, 0.0, 0.2])
    cluster2 = a6cluster.Cluster(dset, [0.5, 0.5, 0.0])
    cluster3 = a6cluster.Cluster(dset, [0.0, 0.0, 0.5])

    # TEST CASE 1 (distance)
    dist = cluster2.distance([1.0, 0.0, -1.0])
    introcs.assert_floats_equal(1.22474487139, dist)

    # TEST CASE 2 (distance)
    dist = cluster2.distance([0.5, 0.5, 0.0])
    introcs.assert_floats_equal(0.0, dist)

    # TEST CASE 3 (distance)
    dist = cluster3.distance([0.5, 0.0, 0.5])
    introcs.assert_floats_equal(0.5, dist)
    print('    Method Cluster.distance() looks okay')

    # Add some indices
    cluster1.addIndex(0)
    cluster1.addIndex(1)
    cluster2.addIndex(0)
    cluster2.addIndex(1)
    cluster3.addIndex(0)
    cluster3.addIndex(1)
    cluster3.addIndex(2)

    # TEST CASE 1 (radius)
    rads = cluster1.getRadius()
    introcs.assert_floats_equal(1.0198039, rads)

    # TEST CASE 2 (radius)
    rads = cluster2.getRadius()
    introcs.assert_floats_equal(0.7071068, rads)

    # TEST CASE 3 (radius)
    rads = cluster3.getRadius()
    introcs.assert_floats_equal(1.1180340, rads)
    print('    Method Cluster.getRadius() looks okay')

    # TEST CASE 1 (updateCentroid): centroid remains the same
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid())
    introcs.assert_true(stable)

    # TEST CASE 2 (updateCentroid): centroid changes
    cluster2.addIndex(2)
    cluster2.addIndex(3)
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.25, 0.25, 0.25],
                                     cluster2.getCentroid())
    introcs.assert_false(stable)
    # updating again without changing points: centroid stable
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.25, 0.25, 0.25],
                                     cluster2.getCentroid())
    introcs.assert_true(stable)

    print('    Method Cluster.update() looks okay')
    print('  Part B of class Cluster appears correct')
    print()