def read_gmm(stream, *_): v, covariance_type = stream.read_singleton("covariance_type") v, dimension = stream.read_scalar("dimension", int) v, num_components = stream.read_scalar("num_components", int) v, relevances = stream.read_list("relevances", float) v, gmm_weights = stream.read_array("weights", rtype=float, dim=1, shape=(num_components, )) v, smms = stream.read_indexed_collection( read_smm, (covariance_type, num_components, dimension), name="Gaussians") gmm_means = numpy.zeros((num_components, dimension), dtype=float) if covariance_type is GaussianMixtureModel.FULL_COVARIANCE: var_shape = (num_components, dimension, dimension) else: assert (covariance_type is GaussianMixtureModel.DIAGONAL_COVARIANCE) var_shape = (num_components, dimension) gmm_vars = numpy.zeros(var_shape, dtype=float) assert (len(smms) == num_components) for i in xrange(num_components): gmm_means[i] = smms[i].means gmm_vars[i] = smms[i].vars # Construct and return Gmm object ret = GaussianMixtureModel(dimension, covariance_type, num_components) ret.set_weights(gmm_weights) ret.set_means(gmm_means) ret.set_vars(gmm_vars) ret.set_relevances(relevances) return ret
def test8_helper(num_obs, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 3-state Hmm. Each observation is a sequence of length 11, taken by sampling 2, 3, and 6 times, respectively, from three target distributions. This is identical to test5 except that here I have built the Hmm with only one Gmm, which is shared by all three states. """ import pprint num_states = 3 dimension = 2 # Data generator setup target_means = ((1,1), (2,2), (3,3)) target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3)) target_durations = (2, 3, 6) num_steps = sum(target_durations) generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)] [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)] SimpleGaussianModel.seed(0) # Gmm setup num_states = 3 gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) models = (gmm,) mm = GmmMgr(models) # Here's where we're using the same Gmm in all three states of this Hmm. models = (0, 0, 0) # Hmm setup trans = array(((0.0, 1.0, 0.0, 0.0, 0.0), (0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.5, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.5), (0.0, 0.0, 0.0, 0.0, 0.0))) hmm0 = Hmm(num_states, log_domain=True) hmm0.build_model(mm, models, 1, 1, trans) print hmm0.to_string(True) for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") obs_gen = obs_generator(generators, target_durations) for i in xrange(num_obs): obs = obs_gen.next() hmm0.adapt_one_sequence(obs) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def make_gmm_diag(dimension, num_mixtures): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, num_mixtures) w = [1.0 / num_mixtures for n in xrange(num_mixtures)] gmm.set_weights(array(w)) mu = array(((1.5,1.5), (3,3))) v = array(((1.0,1.0), (1.0,1.0))) gmm.set_model(mu, v) return gmm
def test1(): print '============== test1 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 result = list() proc0 = AdaptingGmmClassProcessor(c0, result.append) # Prime things a little bit to try to get a good start c0.set_relevance(0.001) c0.set_num_em_iterations(2) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) # Now adapt on more data c0.set_relevance(10) c0.set_num_em_iterations(2) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) print print c0 print print len(result) # XXX Win32 gets values off in the last 2-3 hex digits. I'm not sure how to account for this in a # logref test, so I'm disabling this printing for now. # for training_label, scores in result[-10:]: # print training_label, tuple(((label, float_to_readable_string(score)) for score, label in scores)) correct = tuple(label for label, scores in result) guessed = tuple(scores[0][1] for l, scores in result) print len(correct), len(guessed) ind = [c == g for (c, g) in izip(correct, guessed)] print ind.count(True) print ind.count(True) / len(correct)
def make_standard_gmms(dimension, num_models): models = [] for i in xrange(num_models): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0, 0.0),)) v = array(((1.0, 1.0),)) gmm.set_model(mu, v) models.append(gmm) return models
def read_gmm(stream, *_): v, covariance_type = stream.read_singleton("covariance_type") v, dimension = stream.read_scalar("dimension", int) v, num_components = stream.read_scalar("num_components", int) v, relevances = stream.read_list("relevances", float) v, gmm_weights = stream.read_array("weights", rtype=float, dim=1, shape=(num_components,)) v, smms = stream.read_indexed_collection(read_smm, (covariance_type, num_components, dimension), name="Gaussians") gmm_means = numpy.zeros((num_components, dimension), dtype=float) if covariance_type is GaussianMixtureModel.FULL_COVARIANCE: var_shape = (num_components, dimension, dimension) else: assert covariance_type is GaussianMixtureModel.DIAGONAL_COVARIANCE var_shape = (num_components, dimension) gmm_vars = numpy.zeros(var_shape, dtype=float) assert len(smms) == num_components for i in xrange(num_components): gmm_means[i] = smms[i].means gmm_vars[i] = smms[i].vars # Construct and return Gmm object ret = GaussianMixtureModel(dimension, covariance_type, num_components) ret.set_weights(gmm_weights) ret.set_means(gmm_means) ret.set_vars(gmm_vars) ret.set_relevances(relevances) return ret
def test1(num_obs, num_passes): dimension = 2 # Data generator setup target_means = (1,1) target_vars = (0.1,0.1) generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) generator.set_model(target_means, target_vars) SimpleGaussianModel.seed(0) GaussianMixtureModel.seed(0) # Gmm setup num_mixtures = 2 gmm0 = make_gmm(dimension, num_mixtures) gmm1 = make_gmm(dimension, num_mixtures) mm = GmmMgr((gmm1,)) # Hmm setup hmm0 = Hmm(1, log_domain=True) # A transition probability matrix with a p=1/2 exit for the real state. # The entry state feeds into the real state with p=1. trans = array(((0.0, 1.0, 0.0), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm0.build_model(mm, (0,), 1, 1, trans) print hmm0.to_string(True) print gmm0 # Try some adaptation. Note that we are feeding the entire data set as one stream # to the Hmm adaption call. data = [generator.sample() for i in xrange(num_obs)] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") hmm0.adapt_one_sequence(data) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") gmm0.adapt(data, max_iters = num_passes) print hmm0.to_string(True) print gmm0
def make_target(dimension, num_comps, weights, means, vars): ret = GaussianMixtureModel(dimension, GaussianModelBase.DIAGONAL_COVARIANCE, num_comps) ret.set_weights(numpy.array(weights)) ret.set_means(numpy.array(means)) ret.set_vars(numpy.array(vars)) return ret
def test0(): print '============== test0 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 # Prime things a little bit to try to get a good start c0.set_relevance(0.001) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) # Now adapt on more data c0.set_relevance(10) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) print print c0 print
def test9(num_obs): """ Test sequence scoring interface. """ num_states = 3 dimension = 2 # Data generator setup target_means = ((1,1), (2,2), (3,3)) target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3)) target_durations = (2, 3, 6) num_steps = sum(target_durations) generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)] [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)] SimpleGaussianModel.seed(0) obs_gen = obs_generator(generators, target_durations) # Gmm setup num_states = 3 models = [] for i in xrange(num_states): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) models.append(gmm) mm = GmmMgr(models) models = range(num_states) # Hmm setup trans = array(((0.0, 1.0, 0.0, 0.0, 0.0), (0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.5, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.5), (0.0, 0.0, 0.0, 0.0, 0.0))) hmm0 = Hmm(num_states) hmm0.build_model(mm, models, 1, 1, trans) print hmm0.to_string(full=True) for i in xrange(num_obs): obs = obs_gen.next() scores = hmm0.forward_score_sequence(obs) print scores
def test3_helper(dataset_idx, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 1-state Hmm. Each data point is taken as a sequence of length 1. """ dimension = 2 # Gmm setup gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) mm = GmmMgr((gmm,)) # Hmm setup # A transition probability matrix with a p=1 self-loop for the real state. # The entry state feeds into the real state with p=1. trans = array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0), (0.0, 0.0, 0.0))) hmm0 = Hmm(1, log_domain=True) hmm0.build_model(mm, (0,), 1, 1, trans) print hmm0.to_string(True) # adaptation data = datasets[dataset_idx] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") for point in data: s = array(point) # We treat each point as an entire sequence hmm0.adapt_one_sequence((s,)) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def test0(num_obs, num_passes): dimension = 2 # Data generator setup target_means = (1,1) target_vars = (0.1,0.1) generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) generator.set_model(target_means, target_vars) SimpleGaussianModel.seed(0) GaussianMixtureModel.seed(0) mm = GmmMgr(dimension) # Hmm setup hmm0 = Hmm(0, log_domain=True) # A transition probability matrix with no real state. # The entry state feeds into the exit state with p=1. trans = array(((0.0, 1.0), (0.0, 0.0))) hmm0.build_model(mm, (), 1, 1, trans) print hmm0.to_string(True) # Try some adaptation. Note that we are feeding the entire data set as one stream # to the Hmm adaption call. data = [generator.sample() for i in xrange(num_obs)] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") with DebugPrint("hmm_gxfs", "hmm_aos") if False else DebugPrint(): hmm0.adapt_one_sequence(data) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def make_standard_gmms(dimension, num_models): models = [] for i in xrange(num_models): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0, ))) mu = array(((0.0, 0.0), )) v = array(((1.0, 1.0), )) gmm.set_model(mu, v) models.append(gmm) return models
def test5_helper(num_obs, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 3-state Hmm. Each observation is a sequence of length 11, taken by sampling 2, 3, and 6 times, respectively, from three target distributions. """ import pprint num_states = 3 dimension = 2 # Data generator setup target_means = ((1,1), (2,2), (3,3)) target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3)) target_durations = (2, 3, 6) num_steps = sum(target_durations) generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)] [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)] SimpleGaussianModel.seed(0) # Gmm setup num_states = 3 models = [] for i in xrange(num_states): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) models.append(gmm) mm = GmmMgr(models) models = range(num_states) # Hmm setup trans = array(((0.0, 1.0, 0.0, 0.0, 0.0), (0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.5, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.5), (0.0, 0.0, 0.0, 0.0, 0.0))) hmm0 = Hmm(num_states, log_domain=True) hmm0.build_model(mm, models, 1, 1, trans) print hmm0.to_string(True) for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") obs_gen = obs_generator(generators, target_durations) for i in xrange(num_obs): obs = obs_gen.next() hmm0.adapt_one_sequence(obs) obs2 = [tuple(a) for a in obs] # Uncomment these lines to show observations as nicely formatted sequences; this # is what I gave ChrisW to use with his HTK runs. # pprint.pprint(obs2) # print mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
false_means_prime = N.array((-9.4634, -5.3991, -4.2773, 1.7494, -0.0822, -228.6211), dtype=N.float32) false_vars_prime = N.array((3.0097, 6.0277, 8.3711, 10.7198, 13.4285, 456.7074), dtype=N.float32) # mfcc, no c0: 20,000 frames of Hugh talking true_means_prime = N.array((-4.8087, 3.9863, -0.5217, 1.3076, 0.7514, -4.6497), dtype=N.float32) true_vars_prime = N.array((26.8496, 32.6631, 32.3662, 24.2963, 36.2244, 34.1555), dtype=N.float32) false_means_prime = N.array((-6.8806, -1.3424, -3.8147, 0.4520, 0.7129, -3.1560), dtype=N.float32) false_vars_prime = N.array((2.7468, 6.2286, 7.4355, 10.1530, 13.3865, 15.9309), dtype=N.float32) true_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) true_prime.set_model(true_means_prime, true_vars_prime) false_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) false_prime.set_model(false_means_prime, false_vars_prime) primer = (true_prime, false_prime) GaussianMixtureModel.seed(0) gmm_mgr0 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) gmm_mgr1 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) classify0 = AdaptingGmmClassifier(gmm_mgr0, izip(labels, count())) classify1 = AdaptingGmmClassifier(gmm_mgr1, izip(labels, count())) classify0.set_relevance(333) classify1.set_relevance(333) classify0.set_num_em_iterations(2) classify1.set_num_em_iterations(2) classifier0 = AdaptingGmmClassProcessor(classify0) classifier1 = AdaptingGmmClassProcessor(classify1) gaussian_trainer = SimpleGaussianTrainer(labels, nfeatures) trainer = FunctionProcessor(gaussian_trainer) # audio.mic, fftmag, endpointer, mfcc0, square, mfcc1, classifier0, classifier1, trainer
dc and dc("m = \n%s" % (pformat(m),)) dc and dc("m.keys() = \n%s" % (m.keys(),)) if m.hasattr.decl: name = m.decl else: name = ("UnnamedModel%d" % unnamed_index) unnamed_index += 1 n = m.numstates - 2 # HTK numstates counts virtual entry and exit states hmm = Hmm(n, log_domain) gmms = [] for s_label, state in m.states: assert s_label == 'state' dc and dc("state.keys() = \n%s" % (state.keys(),)) num_mixtures = 1 weights = array((1.0,), dtype = float) gmm = GaussianMixtureModel(dim, covar_type, num_mixtures) gmm.set_weights(weights) gmm.set_model(state.mean, state.var) dc and dc("gmm = %s" % (gmm,)) gmms.append(gmm) model_indices = gmm_mgr.add_models(gmms) hmm.build_model(gmm_mgr, model_indices, 1, 1, m.transp) hmms.append(hmm) names.append(name) indices = hmm_mgr.add_models(hmms) return dict(izip(names, indices)), hmm_mgr, gmm_mgr def logreftest(): module_dir, module_name = os.path.split(__file__) files = tuple(os.path.join(module_dir, mmf_file) for mmf_file in ("start.mmf", 'mmf1.mmf', 'mmf4.mmf'))
def __init__(self, *args): """ Initialization can take four forms. First, one int argument constructs an empty GmmMgr for models with the given dimension (number of features). Second, another GmmMgr can be passed in, in which case the new GmmMgr is a deep copy of the argument. Third, an iterable of models can be passed in, in which case the new GmmMgr will have those models, in the order iterated. In this case, the iterable should return instances of either GaussianMixtureModel or DummyModel instances, and models must all have the same covariance type. Finally, arguments may be provided in the form (num_comps, dimension, covar_type priming=None), that is, with 3 or 4 arguments, where num_comps is an iterable of the number of components for each model, dimension is a positive integer, and covar_type is either SimpleGaussianModel.DIAGONAL_COVARIANCE or SimpleGaussianModel.FULL_COVARIANCE . New GaussianMixtureModels will be created for each element returned by num_comps. Priming, if it is provided, is an iterable of SimpleGaussianModels which will be used to initialize all the components of each model, so the priming argument must be as long as the num_comps argument, and the priming models should have the same covariance type as covar_type. """ if not len(args) in set((1, 3, 4)): raise ValueError("expected 1, 3, or 4 arguments, but got %d" % (len(args), )) self._models = list() self._covariance_type = None self._accums = dict() if len(args) == 1: if isinstance(args[0], GmmMgr): other = args[0] super(GmmMgr, self).__init__(other.dimension) for model in other: self._models.append(model.copy()) self._covariance_type = other._covariance_type elif isinstance(args[0], int): super(GmmMgr, self).__init__(args[0]) else: models = tuple(args[0]) if len(models) == 0: raise ValueError("can't construct from an empty iterable") super(GmmMgr, self).__init__(models[0].dimension) self.add_models(models) else: assert (3 <= len(args) <= 4) num_comps, dimension, covar_type = args[0], args[1], args[2] super(GmmMgr, self).__init__(dimension) num_comps = tuple(num_comps) priming = tuple(args[3]) if len(args) == 4 else None if priming is not None: if len(priming) < len(num_comps): raise ValueError( "not enough priming models were provided - expected %d but got %d" % (len(num_comps), len(priming))) else: priming = repeat(None) for nc, primer in izip(num_comps, priming): self._models.append( GaussianMixtureModel(dimension, covar_type, nc, primer)) self._covariance_type = covar_type