def test2(num_obs): # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) arc_id = gb.new_arc(node_id0, node_id1) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test1(num_obs): # 1 node contains a 4-node order-2 Hmm dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make one Hmm with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, )) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def _test11(): # A reduced version of test10 ret = "" # GmmMgr setup num_states = 2 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 2)) # The topology here is slightly complex than the previous example arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id4) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id2, node_id4) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 2 (self loop, forward 1) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, ) spd[(0, 2)] = (0.6, ) spd[(2, 3)] = (0.4, ) spd[(2, 4)] = (0.6, ) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test4(num_passes, num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern ret = "" dimension = 2 # Data generator setup and data generation obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 10 models = make_standard_gmms(dimension, num_models) gmm_mgr = GmmMgr(models) # Hmm setup # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() # Note that here we are using the same HMM in two different TG nodes node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 0)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) # Now adapt original TrainingGraph for i in xrange(num_passes): gmm_mgr.set_adaptation_state("INITIALIZING") gmm_mgr.clear_all_accumulators() tg0.begin_training() gmm_mgr.set_adaptation_state("ACCUMULATING") for obs in obs_list: tg0.train_one_sequence(obs) tg0.end_training() gmm_mgr.set_adaptation_state("APPLYING") gmm_mgr.apply_all_accumulators() gmm_mgr.set_adaptation_state("NOT_ADAPTING") ret = tg0.to_string(full=True) return ret
def test5(num_obs, do_display=False): # A test in which one of the HMMs has a transition from an input directly to # an output, so it can behave as an epsilon. This node is between two other # nodes in a linear arrangement. # Data generator setup and data generation dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model # in the middle is special in that it can skip directly from the input state # to the output state. seed(0) num_states = 2 hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) # node_id2 = gb.new_node((2,2)) arc_id = gb.new_arc(node_id0, node_id1) # arc_id = gb.new_arc(node_id1, node_id2) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test3(num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_states = 4 num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) # NB: the asymetry between the two successors is a key part of this test; otherwise, # there are no differences between the transition probs going to these successors, # which is the tricky case hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 3)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def _test9(): # Like test8, but now HMMs have multiple inputs and outputs. ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip directly seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) with DebugPrint("bwt_vrfy") if False else DebugPrint(): tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test0(): print '============== test0 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 # Prime things a little bit to try to get a good start c0.set_relevance(0.001) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) # Now adapt on more data c0.set_relevance(10) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) c0.adapt_one_class(label, data) print print c0 print
def test1(): print '============== test1 ============' dimension = 3 target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)), ((1, 1, 1), (0.5, 0.5, 1))) target1 = make_target(dimension, 2, (0.5, 0.5), ((-1, -1, -1), (-2, -3, -4)), ((1, 1, 1), (0.5, 0.5, 1))) target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)), ((1, 1, 1), (0.5, 0.5, 1))) print target0 print target1 print target2 GaussianModelBase.seed(0) labels = ('A', 'B', 'C') ncomps = (1, 2, 2) sources = dict((('A', target0), ('B', target1), ('C', target2))) GaussianMixtureModel.seed(0) gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE) c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count())) print print c0 result = list() proc0 = AdaptingGmmClassProcessor(c0, result.append) # Prime things a little bit to try to get a good start c0.set_relevance(0.001) c0.set_num_em_iterations(2) for i in xrange(1): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) # Now adapt on more data c0.set_relevance(10) c0.set_num_em_iterations(2) for i in xrange(10): for label in labels: target = sources[label] data = (target.sample() for i in xrange(100)) proc0.process((label, data)) print print c0 print print len(result) # XXX Win32 gets values off in the last 2-3 hex digits. I'm not sure how to account for this in a # logref test, so I'm disabling this printing for now. # for training_label, scores in result[-10:]: # print training_label, tuple(((label, float_to_readable_string(score)) for score, label in scores)) correct = tuple(label for label, scores in result) guessed = tuple(scores[0][1] for l, scores in result) print len(correct), len(guessed) ind = [c == g for (c, g) in izip(correct, guessed)] print ind.count(True) print ind.count(True) / len(correct)
false_vars_prime = N.array((3.0097, 6.0277, 8.3711, 10.7198, 13.4285, 456.7074), dtype=N.float32) # mfcc, no c0: 20,000 frames of Hugh talking true_means_prime = N.array((-4.8087, 3.9863, -0.5217, 1.3076, 0.7514, -4.6497), dtype=N.float32) true_vars_prime = N.array((26.8496, 32.6631, 32.3662, 24.2963, 36.2244, 34.1555), dtype=N.float32) false_means_prime = N.array((-6.8806, -1.3424, -3.8147, 0.4520, 0.7129, -3.1560), dtype=N.float32) false_vars_prime = N.array((2.7468, 6.2286, 7.4355, 10.1530, 13.3865, 15.9309), dtype=N.float32) true_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) true_prime.set_model(true_means_prime, true_vars_prime) false_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE) false_prime.set_model(false_means_prime, false_vars_prime) primer = (true_prime, false_prime) GaussianMixtureModel.seed(0) gmm_mgr0 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) gmm_mgr1 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer) classify0 = AdaptingGmmClassifier(gmm_mgr0, izip(labels, count())) classify1 = AdaptingGmmClassifier(gmm_mgr1, izip(labels, count())) classify0.set_relevance(333) classify1.set_relevance(333) classify0.set_num_em_iterations(2) classify1.set_num_em_iterations(2) classifier0 = AdaptingGmmClassProcessor(classify0) classifier1 = AdaptingGmmClassProcessor(classify1) gaussian_trainer = SimpleGaussianTrainer(labels, nfeatures) trainer = FunctionProcessor(gaussian_trainer) # audio.mic, fftmag, endpointer, mfcc0, square, mfcc1, classifier0, classifier1, trainer
def _test10(): # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM # probabilities come into play ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) # The topology here is more complex than previous examples arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id5) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id3, node_id5) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) spd[(3, 4)] = (0.4, 0.3, 0.8) spd[(3, 5)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret