예제 #1
0
    def acc(self, task_id):
        mix_file = util.cache_path(self.ubm)
        align_file = util.cache_path(self.alignment[task_id])
        feat_file = util.cache_path(self.features[task_id])
        allo_file = util.cache_path(self.allophones)

        logging.info("Reading mixture file from '%s'..." % mix_file)
        gmm = sc.MixtureSet(mix_file)
        logging.info("Read %d means and %d covariances of dimension %d" %
                     (gmm.nMeans, gmm.nCovs, gmm.dim))

        ubm = convert_gmm(gmm)

        ivm = IVectorMachine(ubm, self.dim)
        ivm.variance_threshold = 1e-5

        gs = GMMStats(gmm.nMeans, gmm.dim)

        logging.info(
            "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'"
            % (align_file, allo_file, ",".join(self.allophones_to_ignore)))
        aligncache = sc.FileArchive(align_file)
        aligncache.setAllophones(allo_file)

        cache = sc.FileArchive(feat_file)

        for a in cache.ft.keys():
            if a.endswith(".attribs"):
                continue
            logging.info("Reading '%s'..." % a)

            time, data = cache.read(a, "feat")

            align = aligncache.read(a, "align")
            if len(align) < 1:
                logging.warning("No data for segment: '%s' in alignment." % a)
                continue
            allos = []
            for (t, i, s, w) in align:
                allos.append(aligncache.allophones[i])
            allos = list(aligncache.allophones[i] for (t, i, s, w) in align)
            T = len(
                list(
                    filter(lambda al: al not in self.allophones_to_ignore,
                           allos)))

            feat = np.ndarray((T, len(data[0])))
            k = 0

            for t in range(len(data)):
                (_, allo, state, weight) = align[t]
                if aligncache.allophones[
                        allo] not in self.allophones_to_ignore:
                    feat[k, :] = data[t]
                    k += 1

            ivm.ubm.acc_statistics(feat, gs)

        logging.info("Writing Gaussian statistics to '%s'" %
                     self.single_accu_caches[task_id].get_path())
        gs.save(HDF5File(self.single_accu_caches[task_id].get_path(), "w"))
예제 #2
0
def test_GMMStats():
    # Test a GMMStats
    # Initializes a GMMStats
    n_gaussians = 2
    n_features = 3
    gs = GMMStats(n_gaussians, n_features)
    log_likelihood = -3.0
    T = 57
    n = np.array([4.37, 5.31], "float64")
    sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64")
    sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64")
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx
    np.testing.assert_equal(gs.log_likelihood, log_likelihood)
    np.testing.assert_equal(gs.t, T)
    np.testing.assert_equal(gs.n, n)
    np.testing.assert_equal(gs.sum_px, sumpx)
    np.testing.assert_equal(gs.sum_pxx, sumpxx)
    np.testing.assert_equal(gs.shape, (n_gaussians, n_features))

    # Saves and reads from file using `from_hdf5`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(HDF5File(filename, "w"))
    gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    assert type(gs_loaded.n_gaussians) is np.int64
    assert type(gs_loaded.n_features) is np.int64
    assert type(gs_loaded.log_likelihood) is np.float64

    # Saves and load from file using `load`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=HDF5File(filename, "w"))
    gs_loaded = GMMStats(n_gaussians, n_features)
    gs_loaded.load(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Makes them different
    gs_loaded.t = 58
    assert (gs == gs_loaded) is False
    assert gs != gs_loaded
    assert not (gs.is_similar_to(gs_loaded))

    # Accumulates from another GMMStats
    gs2 = GMMStats(n_gaussians, n_features)
    gs2.log_likelihood = log_likelihood
    gs2.t = T
    gs2.n = n.copy()
    gs2.sum_px = sumpx.copy()
    gs2.sum_pxx = sumpxx.copy()
    gs2 += gs
    np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood)
    np.testing.assert_equal(gs2.t, 2 * T)
    np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8)

    # Re-init and checks for zeros
    gs_loaded.init_fields()
    np.testing.assert_equal(gs_loaded.log_likelihood, 0)
    np.testing.assert_equal(gs_loaded.t, 0)
    np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians, )))
    np.testing.assert_equal(gs_loaded.sum_px,
                            np.zeros((n_gaussians, n_features)))
    np.testing.assert_equal(gs_loaded.sum_pxx,
                            np.zeros((n_gaussians, n_features)))
    # Resize and checks size
    assert gs_loaded.shape == (n_gaussians, n_features)
    gs_loaded.resize(4, 5)
    assert gs_loaded.shape == (4, 5)
    assert gs_loaded.sum_px.shape[0] == 4
    assert gs_loaded.sum_px.shape[1] == 5

    # Clean-up
    os.unlink(filename)
예제 #3
0
def test_GMMStats():
    # Test a GMMStats
    # Initializes a GMMStats
    gs = GMMStats(2, 3)
    log_likelihood = -3.
    T = 57
    n = numpy.array([4.37, 5.31], 'float64')
    sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
    sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx
    assert gs.log_likelihood == log_likelihood
    assert gs.t == T
    assert (gs.n == n).all()
    assert (gs.sum_px == sumpx).all()
    assert (gs.sum_pxx == sumpxx).all()
    assert gs.shape == (2, 3)

    # Saves and reads from file
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and reads from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and load from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats()
    gs_loaded.load(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and load from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats()
    gs_loaded.load(hdf5=bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Makes them different
    gs_loaded.t = 58
    assert (gs == gs_loaded) is False
    assert gs != gs_loaded
    assert (gs.is_similar_to(gs_loaded)) is False
    # Accumulates from another GMMStats
    gs2 = GMMStats(2, 3)
    gs2.log_likelihood = log_likelihood
    gs2.t = T
    gs2.n = n
    gs2.sum_px = sumpx
    gs2.sum_pxx = sumpxx
    gs2 += gs
    eps = 1e-8
    assert gs2.log_likelihood == 2 * log_likelihood
    assert gs2.t == 2 * T
    assert numpy.allclose(gs2.n, 2 * n, eps)
    assert numpy.allclose(gs2.sum_px, 2 * sumpx, eps)
    assert numpy.allclose(gs2.sum_pxx, 2 * sumpxx, eps)

    # Reinit and checks for zeros
    gs_loaded.init()
    assert gs_loaded.log_likelihood == 0
    assert gs_loaded.t == 0
    assert (gs_loaded.n == 0).all()
    assert (gs_loaded.sum_px == 0).all()
    assert (gs_loaded.sum_pxx == 0).all()
    # Resize and checks size
    assert gs_loaded.shape == (2, 3)
    gs_loaded.resize(4, 5)
    assert gs_loaded.shape == (4, 5)
    assert gs_loaded.sum_px.shape[0] == 4
    assert gs_loaded.sum_px.shape[1] == 5

    # Clean-up
    os.unlink(filename)
예제 #4
0
def test_GMMStats():
  # Test a GMMStats
  # Initializes a GMMStats
  gs = GMMStats(2,3)
  log_likelihood = -3.
  T = 57
  n = numpy.array([4.37, 5.31], 'float64')
  sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
  sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
  gs.log_likelihood = log_likelihood
  gs.t = T
  gs.n = n
  gs.sum_px = sumpx
  gs.sum_pxx = sumpxx
  assert gs.log_likelihood == log_likelihood
  assert gs.t == T
  assert (gs.n == n).all()
  assert (gs.sum_px == sumpx).all()
  assert (gs.sum_pxx == sumpxx).all()
  assert gs.shape==(2,3)

  # Saves and reads from file
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)
  
  # Saves and reads from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)

  # Saves and load from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats()
  gs_loaded.load(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)

  # Saves and load from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats()
  gs_loaded.load(hdf5=bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)
  
  
  # Makes them different
  gs_loaded.t = 58
  assert (gs == gs_loaded ) is False
  assert gs != gs_loaded
  assert (gs.is_similar_to(gs_loaded)) is False
  # Accumulates from another GMMStats
  gs2 = GMMStats(2,3)
  gs2.log_likelihood = log_likelihood
  gs2.t = T
  gs2.n = n
  gs2.sum_px = sumpx
  gs2.sum_pxx = sumpxx
  gs2 += gs
  eps = 1e-8
  assert gs2.log_likelihood == 2*log_likelihood
  assert gs2.t == 2*T
  assert numpy.allclose(gs2.n, 2*n, eps)
  assert numpy.allclose(gs2.sum_px, 2*sumpx, eps)
  assert numpy.allclose(gs2.sum_pxx, 2*sumpxx, eps)

  # Reinit and checks for zeros
  gs_loaded.init()
  assert gs_loaded.log_likelihood == 0
  assert gs_loaded.t == 0
  assert (gs_loaded.n == 0).all()
  assert (gs_loaded.sum_px == 0).all()
  assert (gs_loaded.sum_pxx == 0).all()
  # Resize and checks size
  assert  gs_loaded.shape==(2,3)
  gs_loaded.resize(4,5)  
  assert  gs_loaded.shape==(4,5)
  assert gs_loaded.sum_px.shape[0] == 4
  assert gs_loaded.sum_px.shape[1] == 5

  # Clean-up
  os.unlink(filename)