def test_cluster_map_centroid_getitem(): nb_clusters = 11 indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering advanced_indices = indices + [0, 1, 2, -1, -2, -3] cluster_map = ClusterMapCentroid() clusters = [] for i in range(nb_clusters): centroid = np.zeros_like(features) cluster = ClusterCentroid(centroid) cluster.id = cluster_map.add_cluster(cluster) clusters.append(cluster) # Test indexing for i in advanced_indices: assert_equal(cluster_map[i], clusters[i]) # Test advanced indexing assert_arrays_equal(cluster_map[advanced_indices], [clusters[i] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster_map.__getitem__, len(clusters)) assert_raises(IndexError, cluster_map.__getitem__, -len(clusters) - 1) # Test slicing and negative indexing assert_equal(cluster_map[-1], clusters[-1]) assert_array_equal(cluster_map[::2], clusters[::2]) assert_arrays_equal(cluster_map[::-1], clusters[::-1]) assert_arrays_equal(cluster_map[:-1], clusters[:-1]) assert_arrays_equal(cluster_map[1:], clusters[1:])
def test_cluster_map_centroid_remove_cluster(): clusters = ClusterMapCentroid() centroid1 = np.random.rand(*features_shape).astype(dtype) cluster1 = ClusterCentroid(centroid1, indices=[1]) clusters.add_cluster(cluster1) centroid2 = np.random.rand(*features_shape).astype(dtype) cluster2 = ClusterCentroid(centroid2, indices=[1, 2]) clusters.add_cluster(cluster2) centroid3 = np.random.rand(*features_shape).astype(dtype) cluster3 = ClusterCentroid(centroid3, indices=[1, 2, 3]) clusters.add_cluster(cluster3) assert_equal(len(clusters), 3) clusters.remove_cluster(cluster2) assert_equal(len(clusters), 2) assert_array_equal(list(itertools.chain(*clusters)), list(itertools.chain(*[cluster1, cluster3]))) assert_array_equal(clusters.centroids, np.array([centroid1, centroid3])) assert_equal(clusters[0], cluster1) assert_equal(clusters[1], cluster3) clusters.remove_cluster(cluster3) assert_equal(len(clusters), 1) assert_array_equal(list(itertools.chain(*clusters)), list(cluster1)) assert_array_equal(clusters.centroids, np.array([centroid1])) assert_equal(clusters[0], cluster1) clusters.remove_cluster(cluster1) assert_equal(len(clusters), 0) assert_array_equal(list(itertools.chain(*clusters)), []) assert_array_equal(clusters.centroids, [])
def test_cluster_map_centroid_getitem(): nb_clusters = 11 indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering advanced_indices = indices + [0, 1, 2, -1, -2, -3] cluster_map = ClusterMapCentroid() clusters = [] for i in range(nb_clusters): centroid = np.zeros_like(features) cluster = ClusterCentroid(centroid) cluster.id = cluster_map.add_cluster(cluster) clusters.append(cluster) # Test indexing for i in advanced_indices: assert_equal(cluster_map[i], clusters[i]) # Test advanced indexing assert_array_equal(cluster_map[advanced_indices], [clusters[i] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster_map.__getitem__, len(clusters)) assert_raises(IndexError, cluster_map.__getitem__, -len(clusters)-1) # Test slicing and negative indexing assert_equal(cluster_map[-1], clusters[-1]) assert_array_equal(cluster_map[::2], clusters[::2]) assert_arrays_equal(cluster_map[::-1], clusters[::-1]) assert_arrays_equal(cluster_map[:-1], clusters[:-1]) assert_arrays_equal(cluster_map[1:], clusters[1:])
def test_cluster_map_centroid_add_cluster(): clusters = ClusterMapCentroid() centroids = [] for i in range(3): cluster = ClusterCentroid(centroid=np.zeros_like(features)) centroids.append(np.zeros_like(features)) for id_data in range(2*i): centroids[-1] = (centroids[-1]*id_data + (id_data+1)*features) / (id_data+1) cluster.assign(id_data, (id_data+1)*features) cluster.update() clusters.add_cluster(cluster) assert_array_equal(cluster.centroid, centroids[-1]) assert_equal(type(cluster), ClusterCentroid) assert_equal(cluster, clusters[-1]) assert_equal(type(clusters.centroids), list) assert_array_equal(list(itertools.chain(*clusters.centroids)), list(itertools.chain(*centroids))) # Check adding features of different sizes (shorter and longer) features_shape_short = (1, features_shape[1]-3) features_too_short = np.ones(features_shape_short, dtype=dtype) assert_raises(ValueError, cluster.assign, 123, features_too_short) features_shape_long = (1, features_shape[1]+3) features_too_long = np.ones(features_shape_long, dtype=dtype) assert_raises(ValueError, cluster.assign, 123, features_too_long)
def test_cluster_centroid_getitem(): indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering advanced_indices = indices + [0, 1, 2, -1, -2, -3] # Test without specifying refdata in ClusterCentroid centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) for idx in indices: cluster.assign(idx, (idx + 1) * features) # Test indexing for i in advanced_indices: assert_equal(cluster[i], indices[i]) # Test advanced indexing assert_array_equal(cluster[advanced_indices], [indices[i] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster.__getitem__, len(cluster)) assert_raises(IndexError, cluster.__getitem__, -len(cluster) - 1) # Test slicing and negative indexing assert_equal(cluster[-1], indices[-1]) assert_array_equal(cluster[::2], indices[::2]) assert_arrays_equal(cluster[::-1], indices[::-1]) assert_arrays_equal(cluster[:-1], indices[:-1]) assert_arrays_equal(cluster[1:], indices[1:]) # Test with specifying refdata in ClusterCentroid cluster.refdata = data # Test indexing for i in advanced_indices: assert_array_equal(cluster[i], data[indices[i]]) # Test advanced indexing assert_arrays_equal(cluster[advanced_indices], [data[indices[i]] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster.__getitem__, len(cluster)) assert_raises(IndexError, cluster.__getitem__, -len(cluster) - 1) # Test slicing and negative indexing assert_array_equal(cluster[-1], data[indices[-1]]) assert_arrays_equal(cluster[::2], [data[i] for i in indices[::2]]) assert_arrays_equal(cluster[::-1], [data[i] for i in indices[::-1]]) assert_arrays_equal(cluster[:-1], [data[i] for i in indices[:-1]]) assert_arrays_equal(cluster[1:], [data[i] for i in indices[1:]])
def test_cluster_centroid_getitem(): indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering advanced_indices = indices + [0, 1, 2, -1, -2, -3] # Test without specifying refdata in ClusterCentroid centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) for idx in indices: cluster.assign(idx, (idx+1)*features) # Test indexing for i in advanced_indices: assert_equal(cluster[i], indices[i]) # Test advanced indexing assert_array_equal(cluster[advanced_indices], [indices[i] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster.__getitem__, len(cluster)) assert_raises(IndexError, cluster.__getitem__, -len(cluster)-1) # Test slicing and negative indexing assert_equal(cluster[-1], indices[-1]) assert_array_equal(cluster[::2], indices[::2]) assert_arrays_equal(cluster[::-1], indices[::-1]) assert_arrays_equal(cluster[:-1], indices[:-1]) assert_arrays_equal(cluster[1:], indices[1:]) # Test with specifying refdata in ClusterCentroid cluster.refdata = data # Test indexing for i in advanced_indices: assert_array_equal(cluster[i], data[indices[i]]) # Test advanced indexing assert_arrays_equal(cluster[advanced_indices], [data[indices[i]] for i in advanced_indices]) # Test index out of bounds assert_raises(IndexError, cluster.__getitem__, len(cluster)) assert_raises(IndexError, cluster.__getitem__, -len(cluster)-1) # Test slicing and negative indexing assert_array_equal(cluster[-1], data[indices[-1]]) assert_arrays_equal(cluster[::2], [data[i] for i in indices[::2]]) assert_arrays_equal(cluster[::-1], [data[i] for i in indices[::-1]]) assert_arrays_equal(cluster[:-1], [data[i] for i in indices[:-1]]) assert_arrays_equal(cluster[1:], [data[i] for i in indices[1:]])
def test_cluster_map_centroid_iter(): rng = np.random.RandomState(42) nb_clusters = 11 cluster_map = ClusterMapCentroid() clusters = [] for i in range(nb_clusters): new_centroid = np.zeros_like(features) new_cluster = ClusterCentroid(new_centroid, indices=rng.randint(0, len(data), size=10)) cluster_map.add_cluster(new_cluster) clusters.append(new_cluster) assert_true( all([c1 is c2 for c1, c2 in zip(cluster_map.clusters, clusters)])) assert_array_equal(cluster_map, clusters) assert_array_equal(cluster_map.clusters, clusters) assert_array_equal(cluster_map, [cluster.indices for cluster in clusters]) # Set refdata cluster_map.refdata = data for c1, c2 in zip(cluster_map, clusters): assert_arrays_equal(c1, [data[i] for i in c2.indices])
def test_cluster_centroid_iter(): indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering # Test without specifying refdata in ClusterCentroid centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) for idx in indices: cluster.assign(idx, (idx + 1) * features) assert_array_equal(cluster.indices, indices) assert_array_equal(list(cluster), indices) # Test with specifying refdata in ClusterCentroid cluster.refdata = data assert_arrays_equal(list(cluster), [data[i] for i in indices])
def test_cluster_centroid_iter(): indices = list(range(len(data))) np.random.shuffle(indices) # None trivial ordering # Test without specifying refdata in ClusterCentroid centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) for idx in indices: cluster.assign(idx, (idx+1)*features) assert_array_equal(cluster.indices, indices) assert_array_equal(list(cluster), indices) # Test with specifying refdata in ClusterCentroid cluster.refdata = data assert_arrays_equal(list(cluster), [data[i] for i in indices])
def test_cluster_centroid_assign(): centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) indices = [] centroid = np.zeros(features_shape, dtype=dtype) for idx in range(1, 10): cluster.assign(idx, (idx+1) * features) cluster.update() indices.append(idx) centroid = (centroid * (idx-1) + (idx+1) * features) / idx assert_equal(len(cluster), idx) assert_equal(type(cluster.indices), list) assert_array_equal(cluster.indices, indices) assert_equal(type(cluster.centroid), np.ndarray) assert_array_equal(cluster.centroid, centroid)
def test_cluster_centroid_attributes_and_constructor(): centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) assert_equal(type(cluster), ClusterCentroid) assert_equal(cluster.id, 0) assert_array_equal(cluster.indices, []) assert_array_equal(cluster.centroid, np.zeros(features_shape)) assert_equal(len(cluster), 0) # Duplicate assert_equal(cluster, ClusterCentroid(centroid)) assert_false(cluster != ClusterCentroid(centroid)) assert_false(cluster == ClusterCentroid(centroid + 1)) # Invalid comparison assert_raises(TypeError, cluster.__cmp__, cluster)
def test_cluster_map_centroid_add_cluster(): clusters = ClusterMapCentroid() centroids = [] for i in range(3): cluster = ClusterCentroid(centroid=np.zeros_like(features)) centroids.append(np.zeros_like(features)) for id_data in range(2 * i): centroids[-1] = ((centroids[-1] * id_data + (id_data + 1) * features) / (id_data + 1)) cluster.assign(id_data, (id_data + 1) * features) cluster.update() clusters.add_cluster(cluster) assert_array_equal(cluster.centroid, centroids[-1]) assert_equal(type(cluster), ClusterCentroid) assert_equal(cluster, clusters[-1]) assert_equal(type(clusters.centroids), list) assert_array_equal(list(itertools.chain(*clusters.centroids)), list(itertools.chain(*centroids))) # Check adding features of different sizes (shorter and longer) features_shape_short = (1, features_shape[1] - 3) features_too_short = np.ones(features_shape_short, dtype=dtype) assert_raises(ValueError, cluster.assign, 123, features_too_short) features_shape_long = (1, features_shape[1] + 3) features_too_long = np.ones(features_shape_long, dtype=dtype) assert_raises(ValueError, cluster.assign, 123, features_too_long)
def test_cluster_centroid_assign(): centroid = np.zeros(features_shape) cluster = ClusterCentroid(centroid) indices = [] centroid = np.zeros(features_shape, dtype=dtype) for idx in range(1, 10): cluster.assign(idx, (idx + 1) * features) cluster.update() indices.append(idx) centroid = (centroid * (idx - 1) + (idx + 1) * features) / idx assert_equal(len(cluster), idx) assert_equal(type(cluster.indices), list) assert_array_equal(cluster.indices, indices) assert_equal(type(cluster.centroid), np.ndarray) assert_array_equal(cluster.centroid, centroid)
def test_cluster_map_centroid_comparison_with_int(): clusters1_indices = range(10) clusters2_indices = range(10, 15) clusters3_indices = [15] # Build a test ClusterMapCentroid centroid = np.zeros_like(features) cluster1 = ClusterCentroid(centroid.copy()) for i in clusters1_indices: cluster1.assign(i, features) cluster2 = ClusterCentroid(centroid.copy()) for i in clusters2_indices: cluster2.assign(i, features) cluster3 = ClusterCentroid(centroid.copy()) for i in clusters3_indices: cluster3.assign(i, features) # Update centroids cluster1.update() cluster2.update() cluster3.update() clusters = ClusterMapCentroid() clusters.add_cluster(cluster1) clusters.add_cluster(cluster2) clusters.add_cluster(cluster3) subset = clusters < 5 assert_equal(subset.sum(), 1) assert_array_equal(list(clusters[subset][0]), clusters3_indices) subset = clusters <= 5 assert_equal(subset.sum(), 2) assert_array_equal(list(clusters[subset][0]), clusters2_indices) assert_array_equal(list(clusters[subset][1]), clusters3_indices) subset = clusters == 5 assert_equal(subset.sum(), 1) assert_array_equal(list(clusters[subset][0]), clusters2_indices) subset = clusters != 5 assert_equal(subset.sum(), 2) assert_array_equal(list(clusters[subset][0]), clusters1_indices) assert_array_equal(list(clusters[subset][1]), clusters3_indices) subset = clusters > 5 assert_equal(subset.sum(), 1) assert_array_equal(list(clusters[subset][0]), clusters1_indices) subset = clusters >= 5 assert_equal(subset.sum(), 2) assert_array_equal(list(clusters[subset][0]), clusters1_indices) assert_array_equal(list(clusters[subset][1]), clusters2_indices)