def test_kmeans_a():

  # Trains a KMeansMachine
  # This files contains draws from two 1D Gaussian distributions:
  #   * 100 samples from N(-10,1)
  #   * 100 samples from N(10,1)
  data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))

  machine = KMeansMachine(2, 1)

  trainer = KMeansTrainer()
  #trainer.train(machine, data)
  bob.learn.em.train(trainer,machine,data)

  [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
  variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64)
  weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
  machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
  machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
  machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
  m1 = machine.get_mean(0)
  m2 = machine.get_mean(1)

  ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
  if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64')
  else: means=numpy.array(([m2[0],m1[0]]), 'float64')
  assert equals(means, numpy.array([-10.,10.]), 2e-1)
  assert equals(variances, numpy.array([1.,1.]), 2e-1)
  assert equals(weights, numpy.array([0.5,0.5]), 1e-3)

  assert equals(variances, variances_b, 1e-8)
  assert equals(weights, weights_b, 1e-8)
Exemple #2
0
def test_kmeans_a():
    # Trains a KMeansMachine
    # This files contains draws from two 1D Gaussian distributions:
    #   * 100 samples from N(-10,1)
    #   * 100 samples from N(10,1)
    data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))

    machine = KMeansMachine(2, 1)

    trainer = KMeansTrainer()
    # trainer.train(machine, data)
    bob.learn.em.train(trainer, machine, data)

    [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
    variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64)
    weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
    machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
    machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
    machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
    m1 = machine.get_mean(0)
    m2 = machine.get_mean(1)

    ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
    if (m1 < m2):
        means = numpy.array(([m1[0], m2[0]]), 'float64')
    else:
        means = numpy.array(([m2[0], m1[0]]), 'float64')
    assert equals(means, numpy.array([-10., 10.]), 2e-1)
    assert equals(variances, numpy.array([1., 1.]), 2e-1)
    assert equals(weights, numpy.array([0.5, 0.5]), 1e-3)

    assert equals(variances, variances_b, 1e-8)
    assert equals(weights, weights_b, 1e-8)
def test_kmeans_b():

  # Trains a KMeansMachine
  (arStd,std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))

  machine = KMeansMachine(2, 2)

  trainer = KMeansTrainer()
  #trainer.seed = 1337
  bob.learn.em.train(trainer,machine, arStd, convergence_threshold=0.001)

  [variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)

  means = numpy.array(machine.means)
  variances = numpy.array(variances)

  multiplyVectorsByFactors(means, std)
  multiplyVectorsByFactors(variances, std ** 2)

  gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
  gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
  gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))

  if (means[0, 0] < means[1, 0]):
    means = flipRows(means)
    variances = flipRows(variances)
    weights = flipRows(weights)

  assert equals(means, gmmMeans, 1e-3)
  assert equals(weights, gmmWeights, 1e-3)
  assert equals(variances, gmmVariances, 1e-3)

  # Check that there is no duplicate means during initialization
  machine = KMeansMachine(2, 1)
  trainer = KMeansTrainer()
  trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
  data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
  bob.learn.em.train(trainer, machine, data)
  assert (numpy.isnan(machine.means).any()) == False
Exemple #4
0
def test_kmeans_b():
    # Trains a KMeansMachine
    (arStd, std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))

    machine = KMeansMachine(2, 2)

    trainer = KMeansTrainer()
    # trainer.seed = 1337
    bob.learn.em.train(trainer, machine, arStd, convergence_threshold=0.001)

    [variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)

    means = numpy.array(machine.means)
    variances = numpy.array(variances)

    multiplyVectorsByFactors(means, std)
    multiplyVectorsByFactors(variances, std ** 2)

    gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
    gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
    gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))

    if (means[0, 0] < means[1, 0]):
        means = flipRows(means)
        variances = flipRows(variances)
        weights = flipRows(weights)

    assert equals(means, gmmMeans, 1e-3)
    assert equals(weights, gmmWeights, 1e-3)
    assert equals(variances, gmmVariances, 1e-3)

    # Check that there is no duplicate means during initialization
    machine = KMeansMachine(2, 1)
    trainer = KMeansTrainer()
    trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
    data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
    bob.learn.em.train(trainer, machine, data)
    assert (numpy.isnan(machine.means).any()) == False