예제 #1
0
def test_KMeansMachine():
  # Test a KMeansMachine

  means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64')
  mean  = numpy.array([3,70,1], 'float64')

  # Initializes a KMeansMachine
  km = KMeansMachine(2,3)
  km.means = means
  assert km.shape == (2,3)

  # Sets and gets
  assert (km.means == means).all()
  assert (km.get_mean(0) == means[0,:]).all()  
  assert (km.get_mean(1) == means[1,:]).all()
  km.set_mean(0, mean)
  assert (km.get_mean(0) == mean).all()

  # Distance and closest mean
  eps = 1e-10

  assert equals( km.get_distance_from_mean(mean, 0), 0, eps)
  assert equals( km.get_distance_from_mean(mean, 1), 6, eps)  
  
  (index, dist) = km.get_closest_mean(mean)
  
  assert index == 0
  assert equals( dist, 0, eps)
  assert equals( km.get_min_distance(mean), 0, eps)

  # Loads and saves
  filename = str(tempfile.mkstemp(".hdf5")[1])
  km.save(bob.io.base.HDF5File(filename, 'w'))
  km_loaded = KMeansMachine(bob.io.base.HDF5File(filename))
  assert km == km_loaded

  # Resize
  km.resize(4,5)
  assert km.shape == (4,5)

  # Copy constructor and comparison operators
  km.resize(2,3)
  km2 = KMeansMachine(km)
  assert km2 == km
  assert (km2 != km) is False
  assert km2.is_similar_to(km)
  means2 = numpy.array([[3, 70, 0], [4, 72, 2]], 'float64')
  km2.means = means2
  assert (km2 == km) is False
  assert km2 != km
  assert (km2.is_similar_to(km)) is False

  # Clean-up
  os.unlink(filename)
예제 #2
0
def test_KMeansMachine():
    # Test a KMeansMachine

    means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64')
    mean = numpy.array([3, 70, 1], 'float64')

    # Initializes a KMeansMachine
    km = KMeansMachine(2, 3)
    km.means = means
    assert km.shape == (2, 3)

    # Sets and gets
    assert (km.means == means).all()
    assert (km.get_mean(0) == means[0, :]).all()
    assert (km.get_mean(1) == means[1, :]).all()
    km.set_mean(0, mean)
    assert (km.get_mean(0) == mean).all()

    # Distance and closest mean
    eps = 1e-10

    assert equals(km.get_distance_from_mean(mean, 0), 0, eps)
    assert equals(km.get_distance_from_mean(mean, 1), 6, eps)

    (index, dist) = km.get_closest_mean(mean)

    assert index == 0
    assert equals(dist, 0, eps)
    assert equals(km.get_min_distance(mean), 0, eps)

    # Loads and saves
    filename = str(tempfile.mkstemp(".hdf5")[1])
    km.save(bob.io.base.HDF5File(filename, 'w'))
    km_loaded = KMeansMachine(bob.io.base.HDF5File(filename))
    assert km == km_loaded

    # Resize
    km.resize(4, 5)
    assert km.shape == (4, 5)

    # Copy constructor and comparison operators
    km.resize(2, 3)
    km2 = KMeansMachine(km)
    assert km2 == km
    assert (km2 != km) is False
    assert km2.is_similar_to(km)
    means2 = numpy.array([[3, 70, 0], [4, 72, 2]], 'float64')
    km2.means = means2
    assert (km2 == km) is False
    assert km2 != km
    assert (km2.is_similar_to(km)) is False

    # Clean-up
    os.unlink(filename)
def test_kmeans_noduplicate():
  # Data/dimensions
  dim_c = 2
  dim_d = 3
  seed = 0
  data = numpy.array([[1,2,3],[1,2,3],[1,2,3],[4,5,6.]])
  # Defines machine and trainer
  machine = KMeansMachine(dim_c, dim_d)
  trainer = KMeansTrainer()
  rng = bob.core.random.mt19937(seed)
  trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
  trainer.initialize(machine, data, rng)
  # Makes sure that the two initial mean vectors selected are different
  assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
예제 #4
0
def test_kmeans_noduplicate():
    # Data/dimensions
    dim_c = 2
    dim_d = 3
    seed = 0
    data = numpy.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6.]])
    # Defines machine and trainer
    machine = KMeansMachine(dim_c, dim_d)
    trainer = KMeansTrainer()
    rng = bob.core.random.mt19937(seed)
    trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
    trainer.initialize(machine, data, rng)
    # Makes sure that the two initial mean vectors selected are different
    assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
def test_kmeans_a():

  # Trains a KMeansMachine
  # This files contains draws from two 1D Gaussian distributions:
  #   * 100 samples from N(-10,1)
  #   * 100 samples from N(10,1)
  data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))

  machine = KMeansMachine(2, 1)

  trainer = KMeansTrainer()
  #trainer.train(machine, data)
  bob.learn.em.train(trainer,machine,data)

  [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
  variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64)
  weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
  machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
  machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
  machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
  m1 = machine.get_mean(0)
  m2 = machine.get_mean(1)

  ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
  if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64')
  else: means=numpy.array(([m2[0],m1[0]]), 'float64')
  assert equals(means, numpy.array([-10.,10.]), 2e-1)
  assert equals(variances, numpy.array([1.,1.]), 2e-1)
  assert equals(weights, numpy.array([0.5,0.5]), 1e-3)

  assert equals(variances, variances_b, 1e-8)
  assert equals(weights, weights_b, 1e-8)
예제 #6
0
def test_kmeans_a():
    # Trains a KMeansMachine
    # This files contains draws from two 1D Gaussian distributions:
    #   * 100 samples from N(-10,1)
    #   * 100 samples from N(10,1)
    data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))

    machine = KMeansMachine(2, 1)

    trainer = KMeansTrainer()
    # trainer.train(machine, data)
    bob.learn.em.train(trainer, machine, data)

    [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
    variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64)
    weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
    machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
    machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
    machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
    m1 = machine.get_mean(0)
    m2 = machine.get_mean(1)

    ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
    if (m1 < m2):
        means = numpy.array(([m1[0], m2[0]]), 'float64')
    else:
        means = numpy.array(([m2[0], m1[0]]), 'float64')
    assert equals(means, numpy.array([-10., 10.]), 2e-1)
    assert equals(variances, numpy.array([1., 1.]), 2e-1)
    assert equals(weights, numpy.array([0.5, 0.5]), 1e-3)

    assert equals(variances, variances_b, 1e-8)
    assert equals(weights, weights_b, 1e-8)