def test_KMeansMachine(): # Test a KMeansMachine means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') mean = numpy.array([3,70,1], 'float64') # Initializes a KMeansMachine km = KMeansMachine(2,3) km.means = means assert km.shape == (2,3) # Sets and gets assert (km.means == means).all() assert (km.get_mean(0) == means[0,:]).all() assert (km.get_mean(1) == means[1,:]).all() km.set_mean(0, mean) assert (km.get_mean(0) == mean).all() # Distance and closest mean eps = 1e-10 assert equals( km.get_distance_from_mean(mean, 0), 0, eps) assert equals( km.get_distance_from_mean(mean, 1), 6, eps) (index, dist) = km.get_closest_mean(mean) assert index == 0 assert equals( dist, 0, eps) assert equals( km.get_min_distance(mean), 0, eps) # Loads and saves filename = str(tempfile.mkstemp(".hdf5")[1]) km.save(bob.io.base.HDF5File(filename, 'w')) km_loaded = KMeansMachine(bob.io.base.HDF5File(filename)) assert km == km_loaded # Resize km.resize(4,5) assert km.shape == (4,5) # Copy constructor and comparison operators km.resize(2,3) km2 = KMeansMachine(km) assert km2 == km assert (km2 != km) is False assert km2.is_similar_to(km) means2 = numpy.array([[3, 70, 0], [4, 72, 2]], 'float64') km2.means = means2 assert (km2 == km) is False assert km2 != km assert (km2.is_similar_to(km)) is False # Clean-up os.unlink(filename)
def test_KMeansMachine(): # Test a KMeansMachine means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') mean = numpy.array([3, 70, 1], 'float64') # Initializes a KMeansMachine km = KMeansMachine(2, 3) km.means = means assert km.shape == (2, 3) # Sets and gets assert (km.means == means).all() assert (km.get_mean(0) == means[0, :]).all() assert (km.get_mean(1) == means[1, :]).all() km.set_mean(0, mean) assert (km.get_mean(0) == mean).all() # Distance and closest mean eps = 1e-10 assert equals(km.get_distance_from_mean(mean, 0), 0, eps) assert equals(km.get_distance_from_mean(mean, 1), 6, eps) (index, dist) = km.get_closest_mean(mean) assert index == 0 assert equals(dist, 0, eps) assert equals(km.get_min_distance(mean), 0, eps) # Loads and saves filename = str(tempfile.mkstemp(".hdf5")[1]) km.save(bob.io.base.HDF5File(filename, 'w')) km_loaded = KMeansMachine(bob.io.base.HDF5File(filename)) assert km == km_loaded # Resize km.resize(4, 5) assert km.shape == (4, 5) # Copy constructor and comparison operators km.resize(2, 3) km2 = KMeansMachine(km) assert km2 == km assert (km2 != km) is False assert km2.is_similar_to(km) means2 = numpy.array([[3, 70, 0], [4, 72, 2]], 'float64') km2.means = means2 assert (km2 == km) is False assert km2 != km assert (km2.is_similar_to(km)) is False # Clean-up os.unlink(filename)
def test_kmeans_noduplicate(): # Data/dimensions dim_c = 2 dim_d = 3 seed = 0 data = numpy.array([[1,2,3],[1,2,3],[1,2,3],[4,5,6.]]) # Defines machine and trainer machine = KMeansMachine(dim_c, dim_d) trainer = KMeansTrainer() rng = bob.core.random.mt19937(seed) trainer.initialization_method = 'RANDOM_NO_DUPLICATE' trainer.initialize(machine, data, rng) # Makes sure that the two initial mean vectors selected are different assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
def test_kmeans_noduplicate(): # Data/dimensions dim_c = 2 dim_d = 3 seed = 0 data = numpy.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6.]]) # Defines machine and trainer machine = KMeansMachine(dim_c, dim_d) trainer = KMeansTrainer() rng = bob.core.random.mt19937(seed) trainer.initialization_method = 'RANDOM_NO_DUPLICATE' trainer.initialize(machine, data, rng) # Makes sure that the two initial mean vectors selected are different assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
def test_kmeans_a(): # Trains a KMeansMachine # This files contains draws from two 1D Gaussian distributions: # * 100 samples from N(-10,1) # * 100 samples from N(10,1) data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/")) machine = KMeansMachine(2, 1) trainer = KMeansTrainer() #trainer.train(machine, data) bob.learn.em.train(trainer,machine,data) [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data) variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64) weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64) machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b) machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b) machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b) m1 = machine.get_mean(0) m2 = machine.get_mean(1) ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5] if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64') else: means=numpy.array(([m2[0],m1[0]]), 'float64') assert equals(means, numpy.array([-10.,10.]), 2e-1) assert equals(variances, numpy.array([1.,1.]), 2e-1) assert equals(weights, numpy.array([0.5,0.5]), 1e-3) assert equals(variances, variances_b, 1e-8) assert equals(weights, weights_b, 1e-8)
def test_kmeans_a(): # Trains a KMeansMachine # This files contains draws from two 1D Gaussian distributions: # * 100 samples from N(-10,1) # * 100 samples from N(10,1) data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/")) machine = KMeansMachine(2, 1) trainer = KMeansTrainer() # trainer.train(machine, data) bob.learn.em.train(trainer, machine, data) [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data) variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64) weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64) machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b) machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b) machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b) m1 = machine.get_mean(0) m2 = machine.get_mean(1) ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5] if (m1 < m2): means = numpy.array(([m1[0], m2[0]]), 'float64') else: means = numpy.array(([m2[0], m1[0]]), 'float64') assert equals(means, numpy.array([-10., 10.]), 2e-1) assert equals(variances, numpy.array([1., 1.]), 2e-1) assert equals(weights, numpy.array([0.5, 0.5]), 1e-3) assert equals(variances, variances_b, 1e-8) assert equals(weights, weights_b, 1e-8)