예제 #1
0
def test_cluster_map_centroid_getitem():
    nb_clusters = 11
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering
    advanced_indices = indices + [0, 1, 2, -1, -2, -3]

    cluster_map = ClusterMapCentroid()
    clusters = []
    for i in range(nb_clusters):
        centroid = np.zeros_like(features)
        cluster = ClusterCentroid(centroid)
        cluster.id = cluster_map.add_cluster(cluster)
        clusters.append(cluster)

    # Test indexing
    for i in advanced_indices:
        assert_equal(cluster_map[i], clusters[i])

    # Test advanced indexing
    assert_arrays_equal(cluster_map[advanced_indices],
                        [clusters[i] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster_map.__getitem__, len(clusters))
    assert_raises(IndexError, cluster_map.__getitem__, -len(clusters) - 1)

    # Test slicing and negative indexing
    assert_equal(cluster_map[-1], clusters[-1])
    assert_array_equal(cluster_map[::2], clusters[::2])
    assert_arrays_equal(cluster_map[::-1], clusters[::-1])
    assert_arrays_equal(cluster_map[:-1], clusters[:-1])
    assert_arrays_equal(cluster_map[1:], clusters[1:])
예제 #2
0
def test_cluster_map_centroid_remove_cluster():
    clusters = ClusterMapCentroid()

    centroid1 = np.random.rand(*features_shape).astype(dtype)
    cluster1 = ClusterCentroid(centroid1, indices=[1])
    clusters.add_cluster(cluster1)

    centroid2 = np.random.rand(*features_shape).astype(dtype)
    cluster2 = ClusterCentroid(centroid2, indices=[1, 2])
    clusters.add_cluster(cluster2)

    centroid3 = np.random.rand(*features_shape).astype(dtype)
    cluster3 = ClusterCentroid(centroid3, indices=[1, 2, 3])
    clusters.add_cluster(cluster3)

    assert_equal(len(clusters), 3)

    clusters.remove_cluster(cluster2)
    assert_equal(len(clusters), 2)
    assert_array_equal(list(itertools.chain(*clusters)),
                       list(itertools.chain(*[cluster1, cluster3])))
    assert_array_equal(clusters.centroids, np.array([centroid1, centroid3]))
    assert_equal(clusters[0], cluster1)
    assert_equal(clusters[1], cluster3)

    clusters.remove_cluster(cluster3)
    assert_equal(len(clusters), 1)
    assert_array_equal(list(itertools.chain(*clusters)), list(cluster1))
    assert_array_equal(clusters.centroids, np.array([centroid1]))
    assert_equal(clusters[0], cluster1)

    clusters.remove_cluster(cluster1)
    assert_equal(len(clusters), 0)
    assert_array_equal(list(itertools.chain(*clusters)), [])
    assert_array_equal(clusters.centroids, [])
예제 #3
0
def test_cluster_map_centroid_getitem():
    nb_clusters = 11
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering
    advanced_indices = indices + [0, 1, 2, -1, -2, -3]

    cluster_map = ClusterMapCentroid()
    clusters = []
    for i in range(nb_clusters):
        centroid = np.zeros_like(features)
        cluster = ClusterCentroid(centroid)
        cluster.id = cluster_map.add_cluster(cluster)
        clusters.append(cluster)

    # Test indexing
    for i in advanced_indices:
        assert_equal(cluster_map[i], clusters[i])

    # Test advanced indexing
    assert_array_equal(cluster_map[advanced_indices], [clusters[i] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster_map.__getitem__, len(clusters))
    assert_raises(IndexError, cluster_map.__getitem__, -len(clusters)-1)

    # Test slicing and negative indexing
    assert_equal(cluster_map[-1], clusters[-1])
    assert_array_equal(cluster_map[::2], clusters[::2])
    assert_arrays_equal(cluster_map[::-1], clusters[::-1])
    assert_arrays_equal(cluster_map[:-1], clusters[:-1])
    assert_arrays_equal(cluster_map[1:], clusters[1:])
예제 #4
0
def test_cluster_map_centroid_add_cluster():
    clusters = ClusterMapCentroid()

    centroids = []
    for i in range(3):
        cluster = ClusterCentroid(centroid=np.zeros_like(features))

        centroids.append(np.zeros_like(features))
        for id_data in range(2*i):
            centroids[-1] = (centroids[-1]*id_data + (id_data+1)*features) / (id_data+1)
            cluster.assign(id_data, (id_data+1)*features)
            cluster.update()

        clusters.add_cluster(cluster)
        assert_array_equal(cluster.centroid, centroids[-1])
        assert_equal(type(cluster), ClusterCentroid)
        assert_equal(cluster, clusters[-1])

    assert_equal(type(clusters.centroids), list)
    assert_array_equal(list(itertools.chain(*clusters.centroids)), list(itertools.chain(*centroids)))

    # Check adding features of different sizes (shorter and longer)
    features_shape_short = (1, features_shape[1]-3)
    features_too_short = np.ones(features_shape_short, dtype=dtype)
    assert_raises(ValueError, cluster.assign, 123, features_too_short)

    features_shape_long = (1, features_shape[1]+3)
    features_too_long = np.ones(features_shape_long, dtype=dtype)
    assert_raises(ValueError, cluster.assign, 123, features_too_long)
예제 #5
0
def test_cluster_centroid_getitem():
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering
    advanced_indices = indices + [0, 1, 2, -1, -2, -3]

    # Test without specifying refdata in ClusterCentroid
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)
    for idx in indices:
        cluster.assign(idx, (idx + 1) * features)

    # Test indexing
    for i in advanced_indices:
        assert_equal(cluster[i], indices[i])

    # Test advanced indexing
    assert_array_equal(cluster[advanced_indices],
                       [indices[i] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster.__getitem__, len(cluster))
    assert_raises(IndexError, cluster.__getitem__, -len(cluster) - 1)

    # Test slicing and negative indexing
    assert_equal(cluster[-1], indices[-1])
    assert_array_equal(cluster[::2], indices[::2])
    assert_arrays_equal(cluster[::-1], indices[::-1])
    assert_arrays_equal(cluster[:-1], indices[:-1])
    assert_arrays_equal(cluster[1:], indices[1:])

    # Test with specifying refdata in ClusterCentroid
    cluster.refdata = data

    # Test indexing
    for i in advanced_indices:
        assert_array_equal(cluster[i], data[indices[i]])

    # Test advanced indexing
    assert_arrays_equal(cluster[advanced_indices],
                        [data[indices[i]] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster.__getitem__, len(cluster))
    assert_raises(IndexError, cluster.__getitem__, -len(cluster) - 1)

    # Test slicing and negative indexing
    assert_array_equal(cluster[-1], data[indices[-1]])
    assert_arrays_equal(cluster[::2], [data[i] for i in indices[::2]])
    assert_arrays_equal(cluster[::-1], [data[i] for i in indices[::-1]])
    assert_arrays_equal(cluster[:-1], [data[i] for i in indices[:-1]])
    assert_arrays_equal(cluster[1:], [data[i] for i in indices[1:]])
예제 #6
0
def test_cluster_centroid_getitem():
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering
    advanced_indices = indices + [0, 1, 2, -1, -2, -3]

    # Test without specifying refdata in ClusterCentroid
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)
    for idx in indices:
        cluster.assign(idx, (idx+1)*features)

    # Test indexing
    for i in advanced_indices:
        assert_equal(cluster[i], indices[i])

    # Test advanced indexing
    assert_array_equal(cluster[advanced_indices],
                       [indices[i] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster.__getitem__, len(cluster))
    assert_raises(IndexError, cluster.__getitem__, -len(cluster)-1)

    # Test slicing and negative indexing
    assert_equal(cluster[-1], indices[-1])
    assert_array_equal(cluster[::2], indices[::2])
    assert_arrays_equal(cluster[::-1], indices[::-1])
    assert_arrays_equal(cluster[:-1], indices[:-1])
    assert_arrays_equal(cluster[1:], indices[1:])

    # Test with specifying refdata in ClusterCentroid
    cluster.refdata = data

    # Test indexing
    for i in advanced_indices:
        assert_array_equal(cluster[i], data[indices[i]])

    # Test advanced indexing
    assert_arrays_equal(cluster[advanced_indices],
                        [data[indices[i]] for i in advanced_indices])

    # Test index out of bounds
    assert_raises(IndexError, cluster.__getitem__, len(cluster))
    assert_raises(IndexError, cluster.__getitem__, -len(cluster)-1)

    # Test slicing and negative indexing
    assert_array_equal(cluster[-1], data[indices[-1]])
    assert_arrays_equal(cluster[::2], [data[i] for i in indices[::2]])
    assert_arrays_equal(cluster[::-1], [data[i] for i in indices[::-1]])
    assert_arrays_equal(cluster[:-1], [data[i] for i in indices[:-1]])
    assert_arrays_equal(cluster[1:], [data[i] for i in indices[1:]])
예제 #7
0
def test_cluster_map_centroid_iter():
    rng = np.random.RandomState(42)
    nb_clusters = 11

    cluster_map = ClusterMapCentroid()
    clusters = []
    for i in range(nb_clusters):
        new_centroid = np.zeros_like(features)
        new_cluster = ClusterCentroid(new_centroid,
                                      indices=rng.randint(0,
                                                          len(data),
                                                          size=10))
        cluster_map.add_cluster(new_cluster)
        clusters.append(new_cluster)

    assert_true(
        all([c1 is c2 for c1, c2 in zip(cluster_map.clusters, clusters)]))
    assert_array_equal(cluster_map, clusters)
    assert_array_equal(cluster_map.clusters, clusters)
    assert_array_equal(cluster_map, [cluster.indices for cluster in clusters])

    # Set refdata
    cluster_map.refdata = data
    for c1, c2 in zip(cluster_map, clusters):
        assert_arrays_equal(c1, [data[i] for i in c2.indices])
예제 #8
0
def test_cluster_centroid_iter():
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering

    # Test without specifying refdata in ClusterCentroid
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)
    for idx in indices:
        cluster.assign(idx, (idx + 1) * features)

    assert_array_equal(cluster.indices, indices)
    assert_array_equal(list(cluster), indices)

    # Test with specifying refdata in ClusterCentroid
    cluster.refdata = data
    assert_arrays_equal(list(cluster), [data[i] for i in indices])
예제 #9
0
def test_cluster_centroid_iter():
    indices = list(range(len(data)))
    np.random.shuffle(indices)  # None trivial ordering

    # Test without specifying refdata in ClusterCentroid
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)
    for idx in indices:
        cluster.assign(idx, (idx+1)*features)

    assert_array_equal(cluster.indices, indices)
    assert_array_equal(list(cluster), indices)

    # Test with specifying refdata in ClusterCentroid
    cluster.refdata = data
    assert_arrays_equal(list(cluster), [data[i] for i in indices])
예제 #10
0
def test_cluster_centroid_assign():
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)

    indices = []
    centroid = np.zeros(features_shape, dtype=dtype)
    for idx in range(1, 10):
        cluster.assign(idx, (idx+1) * features)
        cluster.update()
        indices.append(idx)
        centroid = (centroid * (idx-1) + (idx+1) * features) / idx
        assert_equal(len(cluster), idx)
        assert_equal(type(cluster.indices), list)
        assert_array_equal(cluster.indices, indices)
        assert_equal(type(cluster.centroid), np.ndarray)
        assert_array_equal(cluster.centroid, centroid)
예제 #11
0
def test_cluster_centroid_attributes_and_constructor():
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)
    assert_equal(type(cluster), ClusterCentroid)

    assert_equal(cluster.id, 0)
    assert_array_equal(cluster.indices, [])
    assert_array_equal(cluster.centroid, np.zeros(features_shape))
    assert_equal(len(cluster), 0)

    # Duplicate
    assert_equal(cluster, ClusterCentroid(centroid))
    assert_false(cluster != ClusterCentroid(centroid))
    assert_false(cluster == ClusterCentroid(centroid + 1))

    # Invalid comparison
    assert_raises(TypeError, cluster.__cmp__, cluster)
예제 #12
0
def test_cluster_map_centroid_add_cluster():
    clusters = ClusterMapCentroid()

    centroids = []
    for i in range(3):
        cluster = ClusterCentroid(centroid=np.zeros_like(features))

        centroids.append(np.zeros_like(features))
        for id_data in range(2 * i):
            centroids[-1] = ((centroids[-1] * id_data +
                              (id_data + 1) * features) / (id_data + 1))
            cluster.assign(id_data, (id_data + 1) * features)
            cluster.update()

        clusters.add_cluster(cluster)
        assert_array_equal(cluster.centroid, centroids[-1])
        assert_equal(type(cluster), ClusterCentroid)
        assert_equal(cluster, clusters[-1])

    assert_equal(type(clusters.centroids), list)
    assert_array_equal(list(itertools.chain(*clusters.centroids)),
                       list(itertools.chain(*centroids)))

    # Check adding features of different sizes (shorter and longer)
    features_shape_short = (1, features_shape[1] - 3)
    features_too_short = np.ones(features_shape_short, dtype=dtype)
    assert_raises(ValueError, cluster.assign, 123, features_too_short)

    features_shape_long = (1, features_shape[1] + 3)
    features_too_long = np.ones(features_shape_long, dtype=dtype)
    assert_raises(ValueError, cluster.assign, 123, features_too_long)
예제 #13
0
def test_cluster_centroid_assign():
    centroid = np.zeros(features_shape)
    cluster = ClusterCentroid(centroid)

    indices = []
    centroid = np.zeros(features_shape, dtype=dtype)
    for idx in range(1, 10):
        cluster.assign(idx, (idx + 1) * features)
        cluster.update()
        indices.append(idx)
        centroid = (centroid * (idx - 1) + (idx + 1) * features) / idx
        assert_equal(len(cluster), idx)
        assert_equal(type(cluster.indices), list)
        assert_array_equal(cluster.indices, indices)
        assert_equal(type(cluster.centroid), np.ndarray)
        assert_array_equal(cluster.centroid, centroid)
예제 #14
0
def test_cluster_map_centroid_comparison_with_int():
    clusters1_indices = range(10)
    clusters2_indices = range(10, 15)
    clusters3_indices = [15]

    # Build a test ClusterMapCentroid
    centroid = np.zeros_like(features)
    cluster1 = ClusterCentroid(centroid.copy())
    for i in clusters1_indices:
        cluster1.assign(i, features)

    cluster2 = ClusterCentroid(centroid.copy())
    for i in clusters2_indices:
        cluster2.assign(i, features)

    cluster3 = ClusterCentroid(centroid.copy())
    for i in clusters3_indices:
        cluster3.assign(i, features)

    # Update centroids
    cluster1.update()
    cluster2.update()
    cluster3.update()

    clusters = ClusterMapCentroid()
    clusters.add_cluster(cluster1)
    clusters.add_cluster(cluster2)
    clusters.add_cluster(cluster3)

    subset = clusters < 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters3_indices)

    subset = clusters <= 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters2_indices)
    assert_array_equal(list(clusters[subset][1]), clusters3_indices)

    subset = clusters == 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters2_indices)

    subset = clusters != 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)
    assert_array_equal(list(clusters[subset][1]), clusters3_indices)

    subset = clusters > 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)

    subset = clusters >= 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)
    assert_array_equal(list(clusters[subset][1]), clusters2_indices)
예제 #15
0
def test_cluster_map_centroid_comparison_with_int():
    clusters1_indices = range(10)
    clusters2_indices = range(10, 15)
    clusters3_indices = [15]

    # Build a test ClusterMapCentroid
    centroid = np.zeros_like(features)
    cluster1 = ClusterCentroid(centroid.copy())
    for i in clusters1_indices:
        cluster1.assign(i, features)

    cluster2 = ClusterCentroid(centroid.copy())
    for i in clusters2_indices:
        cluster2.assign(i, features)

    cluster3 = ClusterCentroid(centroid.copy())
    for i in clusters3_indices:
        cluster3.assign(i, features)

    # Update centroids
    cluster1.update()
    cluster2.update()
    cluster3.update()

    clusters = ClusterMapCentroid()
    clusters.add_cluster(cluster1)
    clusters.add_cluster(cluster2)
    clusters.add_cluster(cluster3)

    subset = clusters < 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters3_indices)

    subset = clusters <= 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters2_indices)
    assert_array_equal(list(clusters[subset][1]), clusters3_indices)

    subset = clusters == 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters2_indices)

    subset = clusters != 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)
    assert_array_equal(list(clusters[subset][1]), clusters3_indices)

    subset = clusters > 5
    assert_equal(subset.sum(), 1)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)

    subset = clusters >= 5
    assert_equal(subset.sum(), 2)
    assert_array_equal(list(clusters[subset][0]), clusters1_indices)
    assert_array_equal(list(clusters[subset][1]), clusters2_indices)