def test2(num_obs): # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) arc_id = gb.new_arc(node_id0, node_id1) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test1(num_obs): # 1 node contains a 4-node order-2 Hmm dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make one Hmm with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, )) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def _test11(): # A reduced version of test10 ret = "" # GmmMgr setup num_states = 2 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 2)) # The topology here is slightly complex than the previous example arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id4) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id2, node_id4) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 2 (self loop, forward 1) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, ) spd[(0, 2)] = (0.6, ) spd[(2, 3)] = (0.4, ) spd[(2, 4)] = (0.6, ) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test5(num_obs, do_display=False): # A test in which one of the HMMs has a transition from an input directly to # an output, so it can behave as an epsilon. This node is between two other # nodes in a linear arrangement. # Data generator setup and data generation dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model # in the middle is special in that it can skip directly from the input state # to the output state. seed(0) num_states = 2 hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) # node_id2 = gb.new_node((2,2)) arc_id = gb.new_arc(node_id0, node_id1) # arc_id = gb.new_arc(node_id1, node_id2) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test3(num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_states = 4 num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) # NB: the asymetry between the two successors is a key part of this test; otherwise, # there are no differences between the transition probs going to these successors, # which is the tricky case hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 3)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def _test9(): # Like test8, but now HMMs have multiple inputs and outputs. ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip directly seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) with DebugPrint("bwt_vrfy") if False else DebugPrint(): tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test0(): print '============== test0 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 # Prime things a little bit to try to get a good start c0.set_relevance(0.001) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) # Now adapt on more data c0.set_relevance(10) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) print print c0 print
def test6(): """ This test builds an Hmm with dummy models which always give a score of 1, but with a somewhat unusual topology in which there are 6 actual states chained together with 2 virtual inputs and 3 virtual outputs. The point is to make sure we can handle this asymetric case correctly. """ import pprint num_states = 6 dimension = 2 # GmmMgr setup models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) mm = GmmMgr(models) models = range(num_states) # Hmm setup T0: i1 i2 1 2 3 4 5 6 o1 o2 o3 FROM: trans = array(((0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # i1 (0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # i2 (0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # 1 (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # 2 (0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0), # 3 (0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0), # 4 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0), # 5 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), # 6 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # o1 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # o2 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # o3 hmm0 = Hmm(num_states, log_domain=True) hmm0.build_model(mm, models, 2, 3, trans) print hmm0.to_string(True) num_passes = 1 for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") obs = [array((0,0))] * 11 # Dummy sequence of length 11 hmm0.adapt_one_sequence(obs) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def test3_helper(dataset_idx, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 1-state Hmm. Each data point is taken as a sequence of length 1. """ dimension = 2 # Gmm setup gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) mm = GmmMgr((gmm,)) # Hmm setup # A transition probability matrix with a p=1 self-loop for the real state. # The entry state feeds into the real state with p=1. trans = array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0), (0.0, 0.0, 0.0))) hmm0 = Hmm(1, log_domain=True) hmm0.build_model(mm, (0,), 1, 1, trans) print hmm0.to_string(True) # adaptation data = datasets[dataset_idx] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") for point in data: s = array(point) # We treat each point as an entire sequence hmm0.adapt_one_sequence((s,)) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def test5_helper(num_obs, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 3-state Hmm. Each observation is a sequence of length 11, taken by sampling 2, 3, and 6 times, respectively, from three target distributions. """ import pprint num_states = 3 dimension = 2 # Data generator setup target_means = ((1,1), (2,2), (3,3)) target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3)) target_durations = (2, 3, 6) num_steps = sum(target_durations) generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)] [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)] SimpleGaussianModel.seed(0) # Gmm setup num_states = 3 models = [] for i in xrange(num_states): gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) models.append(gmm) mm = GmmMgr(models) models = range(num_states) # Hmm setup trans = array(((0.0, 1.0, 0.0, 0.0, 0.0), (0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.5, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.5), (0.0, 0.0, 0.0, 0.0, 0.0))) hmm0 = Hmm(num_states, log_domain=True) hmm0.build_model(mm, models, 1, 1, trans) print hmm0.to_string(True) for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") obs_gen = obs_generator(generators, target_durations) for i in xrange(num_obs): obs = obs_gen.next() hmm0.adapt_one_sequence(obs) obs2 = [tuple(a) for a in obs] # Uncomment these lines to show observations as nicely formatted sequences; this # is what I gave ChrisW to use with his HTK runs. # pprint.pprint(obs2) # print mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def test2a(num_obs, num_passes): dimension = 2 # Data generator setup target_means = (1,1) target_vars = (0.1,0.1) generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) generator.set_model(target_means, target_vars) SimpleGaussianModel.seed(0) data = [generator.sample() for i in xrange(num_obs)] # Gmm setup num_mixtures = 2 gmm0 = make_gmm_diag(dimension, num_mixtures) gmm1 = make_gmm_diag(dimension, num_mixtures) mm = GmmMgr((gmm1,)) # Hmm setup # A transition probability matrix with a p ~= 1 self-loop for the real state. # The entry state feeds into the real state with p=1. We use p ~= 1 for the # second self loop since we need *some* probability of finishing. trans = array(((0.0, 1.0, 0.0), (0.0, 0.999999999999, 0.000000000001), (0.0, 0.0, 0.0))) hmm0 = Hmm(1, log_domain=True) hmm0.build_model(mm, (0,), 1, 1, trans) print hmm0.to_string(True) + '\n' print gmm0 print '\n\n' # Try some adaptation for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") hmm0.adapt_one_sequence(data) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") really_print = False with DebugPrint("gaussian", "gaussian_pt", "gaussian_gmm_score") if really_print else DebugPrint(): gmm0.adapt(data, max_iters = num_passes) print hmm0.to_string(True) + '\n' print gmm0
def test0(num_obs, num_passes): dimension = 2 # Data generator setup target_means = (1,1) target_vars = (0.1,0.1) generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) generator.set_model(target_means, target_vars) SimpleGaussianModel.seed(0) GaussianMixtureModel.seed(0) mm = GmmMgr(dimension) # Hmm setup hmm0 = Hmm(0, log_domain=True) # A transition probability matrix with no real state. # The entry state feeds into the exit state with p=1. trans = array(((0.0, 1.0), (0.0, 0.0))) hmm0.build_model(mm, (), 1, 1, trans) print hmm0.to_string(True) # Try some adaptation. Note that we are feeding the entire data set as one stream # to the Hmm adaption call. data = [generator.sample() for i in xrange(num_obs)] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") with DebugPrint("hmm_gxfs", "hmm_aos") if False else DebugPrint(): hmm0.adapt_one_sequence(data) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
opts = result['options'] if 'models' not in result: raise IOError("No models found in %s!" % (filename,)) models = result['models'] if 'vecsize' not in opts: raise IOError("No vecsize option found in %s" % (filename,)) dim = opts['vecsize'] if 'covar' not in opts: covar_type = GaussianMixtureModel.DIAGONAL_COVARIANCE else: if opts['covar'] not in covar_map: raise IOError("Unknown covar option %s found in %s" % (opts['covar'], filename,)) covar_type = covar_map[opts['covar']] dim = opts['vecsize'] hmm_mgr = HmmMgr(dim) gmm_mgr = GmmMgr(dim) hmms = [] names = [] unnamed_index = 0 for label, m in models: assert label == 'HMM' dc and dc("m = \n%s" % (pformat(m),)) dc and dc("m.keys() = \n%s" % (m.keys(),)) if m.hasattr.decl: name = m.decl else: name = ("UnnamedModel%d" % unnamed_index) unnamed_index += 1 n = m.numstates - 2 # HTK numstates counts virtual entry and exit states hmm = Hmm(n, log_domain) gmms = []
def test2(num_obs, num_passes): dimension = 2 # Data generator setup target_means = (1,1) target_vars = (0.1,0.1) generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) generator.set_model(target_means, target_vars) SimpleGaussianModel.seed(0) GaussianMixtureModel.seed(0) # Gmm setup num_mixtures = 2 gmm0 = make_gmm(dimension, num_mixtures) gmm1 = make_gmm(dimension, num_mixtures) mm = GmmMgr((gmm1,)) # Hmm setup # A transition probability matrix with a p=1 exit for the real state. # The entry state feeds into the real state with p=1. trans = array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0), (0.0, 0.0, 0.0))) hmm0 = Hmm(1, log_domain=True) hmm0.build_model(mm, (0,), 1, 1, trans) print hmm0.to_string(True) + '\n' print gmm0 print '\n\n' # Try some adaptation data = [generator.sample() for i in xrange(num_obs)] for p in xrange(num_passes): mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") for point in data: # We treat each point as an entire sequence hmm0.adapt_one_sequence((point,)) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") gmm0.adapt(data, max_iters = num_passes) print hmm0.to_string(True) + '\n' print gmm0
def test4(num_passes, num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern ret = "" dimension = 2 # Data generator setup and data generation obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 10 models = make_standard_gmms(dimension, num_models) gmm_mgr = GmmMgr(models) # Hmm setup # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() # Note that here we are using the same HMM in two different TG nodes node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 0)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) # Now adapt original TrainingGraph for i in xrange(num_passes): gmm_mgr.set_adaptation_state("INITIALIZING") gmm_mgr.clear_all_accumulators() tg0.begin_training() gmm_mgr.set_adaptation_state("ACCUMULATING") for obs in obs_list: tg0.train_one_sequence(obs) tg0.end_training() gmm_mgr.set_adaptation_state("APPLYING") gmm_mgr.apply_all_accumulators() gmm_mgr.set_adaptation_state("NOT_ADAPTING") ret = tg0.to_string(full=True) return ret
def test7(): """ This test builds an Hmm with dummy models which always give a score of 1, but with a somewhat unusual topology in which there are 6 actual states chained together with 2 virtual inputs and 3 virtual outputs. The point is to make sure we can handle this asymetric case correctly. This is the same as test6 except that now we'll use the network adaptation interface instead. """ import pprint num_states = 6 dimension = 2 # GmmMgr setup models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) mm = GmmMgr(models) # Hmm setup T0: i1 i2 1 2 3 4 5 6 o1 o2 o3 FROM: trans = array(((0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # i1 (0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # i2 (0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # 1 (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # 2 (0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0), # 3 (0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0), # 4 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0), # 5 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), # 6 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # o1 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), # o2 (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # o3 hmm0 = Hmm(num_states, log_domain=True) models = range(num_states) hmm0.build_model(mm, models, 2, 3, trans) print hmm0.to_string(True) num_passes = 1 for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") hmm0.begin_adapt("NETWORK") mm.set_adaptation_state("ACCUMULATING") num_obs = 11 obs = [array((0,0))] * num_obs # Dummy sequence context = hmm0.init_for_forward_pass(obs, terminal = True) # Add some mass into the system for the forward pass. To match the behavior of # standalone adaptation, we divide an initial mass of 1 evenly across the inputs hmm0.accum_input_alphas(context, array([1.0/hmm0.num_inputs] * hmm0.num_inputs)) # Actually do the forward pass. Note that we must process one more frame than the number of # observations - this is because an extra frame is automatically added which scores 1 on the exit # states of the Hmm (and 0 on all real states). XXX we might want clients to do this for # themselves at some point rather than this automatic behavior: for frame in xrange(num_obs + 1): output_alphas = hmm0.process_one_frame_forward(context) print output_alphas # Likewise, we initialize and then make the backward pass: hmm0.init_for_backward_pass(context) hmm0.accum_input_betas(context, array([1.0] * hmm0.num_outputs)) for frame in xrange(num_obs + 1): output_betas = hmm0.process_one_frame_backward(context) print output_betas # Now collect all the gamma sums; here there's only one: norm = hmm0.get_initial_gamma_sum() hmm0.add_to_gamma_sum(norm, context) # Here's where the actual accumulation happens: hmm0.do_accumulation(context, norm) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
false_vars_prime = N.array((3.0097, 6.0277, 8.3711, 10.7198, 13.4285, 456.7074), dtype=N.float32) # mfcc, no c0: 20,000 frames of Hugh talking true_means_prime = N.array((-4.8087, 3.9863, -0.5217, 1.3076, 0.7514, -4.6497), dtype=N.float32) true_vars_prime = N.array((26.8496, 32.6631, 32.3662, 24.2963, 36.2244, 34.1555), dtype=N.float32) false_means_prime = N.array((-6.8806, -1.3424, -3.8147, 0.4520, 0.7129, -3.1560), dtype=N.float32) false_vars_prime = N.array((2.7468, 6.2286, 7.4355, 10.1530, 13.3865, 15.9309), dtype=N.float32) true_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) true_prime.set_model(true_means_prime, true_vars_prime) false_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) false_prime.set_model(false_means_prime, false_vars_prime) primer = (true_prime, false_prime) GaussianMixtureModel.seed(0) gmm_mgr0 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) gmm_mgr1 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) classify0 = AdaptingGmmClassifier(gmm_mgr0, izip(labels, count())) classify1 = AdaptingGmmClassifier(gmm_mgr1, izip(labels, count())) classify0.set_relevance(333) classify1.set_relevance(333) classify0.set_num_em_iterations(2) classify1.set_num_em_iterations(2) classifier0 = AdaptingGmmClassProcessor(classify0) classifier1 = AdaptingGmmClassProcessor(classify1) gaussian_trainer = SimpleGaussianTrainer(labels, nfeatures) trainer = FunctionProcessor(gaussian_trainer) # audio.mic, fftmag, endpointer, mfcc0, square, mfcc1, classifier0, classifier1, trainer
def _test10(): # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM # probabilities come into play ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) # The topology here is more complex than previous examples arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id5) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id3, node_id5) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) spd[(3, 4)] = (0.4, 0.3, 0.8) spd[(3, 5)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test8_helper(num_obs, num_passes): """ This tests mimics a run ChrisW did with HTK. The models are 2-D single-mode Gaussians embedded in a 3-state Hmm. Each observation is a sequence of length 11, taken by sampling 2, 3, and 6 times, respectively, from three target distributions. This is identical to test5 except that here I have built the Hmm with only one Gmm, which is shared by all three states. """ import pprint num_states = 3 dimension = 2 # Data generator setup target_means = ((1,1), (2,2), (3,3)) target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3)) target_durations = (2, 3, 6) num_steps = sum(target_durations) generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)] [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)] SimpleGaussianModel.seed(0) # Gmm setup num_states = 3 gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1) gmm.set_weights(array((1.0,))) mu = array(((0.0,0.0),)) v = array(((1.0,1.0),)) gmm.set_model(mu, v) models = (gmm,) mm = GmmMgr(models) # Here's where we're using the same Gmm in all three states of this Hmm. models = (0, 0, 0) # Hmm setup trans = array(((0.0, 1.0, 0.0, 0.0, 0.0), (0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.5, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.5), (0.0, 0.0, 0.0, 0.0, 0.0))) hmm0 = Hmm(num_states, log_domain=True) hmm0.build_model(mm, models, 1, 1, trans) print hmm0.to_string(True) for p in xrange(num_passes): # Reseeding here ensures we are repeating the same observations in each pass SimpleGaussianModel.seed(0) mm.set_adaptation_state("INITIALIZING") mm.clear_all_accumulators() hmm0.begin_adapt("STANDALONE") mm.set_adaptation_state("ACCUMULATING") obs_gen = obs_generator(generators, target_durations) for i in xrange(num_obs): obs = obs_gen.next() hmm0.adapt_one_sequence(obs) mm.set_adaptation_state("APPLYING") hmm0.end_adapt() mm.apply_all_accumulators() mm.set_adaptation_state("NOT_ADAPTING") print hmm0.to_string(True)
def test1(): print '============== test1 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 result = list() proc0 = AdaptingGmmClassProcessor(c0, result.append) # Prime things a little bit to try to get a good start c0.set_relevance(0.001) c0.set_num_em_iterations(2) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) # Now adapt on more data c0.set_relevance(10) c0.set_num_em_iterations(2) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) print print c0 print print len(result) # XXX Win32 gets values off in the last 2-3 hex digits. I'm not sure how to account for this in a # logref test, so I'm disabling this printing for now. # for training_label, scores in result[-10:]: # print training_label, tuple(((label, float_to_readable_string(score)) for score, label in scores)) correct = tuple(label for label, scores in result) guessed = tuple(scores[0][1] for l, scores in result) print len(correct), len(guessed) ind = [c == g for (c, g) in izip(correct, guessed)] print ind.count(True) print ind.count(True) / len(correct)