Ejemplo n.º 1
0
def test2(num_obs):
    # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    arc_id = gb.new_arc(node_id0, node_id1)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Ejemplo n.º 2
0
def test1(num_obs):
    # 1 node contains a 4-node order-2 Hmm
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make one Hmm with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, ))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Ejemplo n.º 3
0
def _test11():
    # A reduced version of test10
    ret = ""
    # GmmMgr setup

    num_states = 2
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 2))

    # The topology here is slightly complex than the previous example
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id4)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id2, node_id4)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 2 (self loop, forward 1)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, )
    spd[(0, 2)] = (0.6, )

    spd[(2, 3)] = (0.4, )
    spd[(2, 4)] = (0.6, )

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Ejemplo n.º 4
0
def test5(num_obs, do_display=False):
    # A test in which one of the HMMs has a transition from an input directly to
    # an output, so it can behave as an epsilon.  This node is between two other
    # nodes in a linear arrangement.

    # Data generator setup and data generation
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model
    # in the middle is special in that it can skip directly from the input state
    # to the output state.
    seed(0)
    num_states = 2
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    # node_id2 = gb.new_node((2,2))
    arc_id = gb.new_arc(node_id0, node_id1)
    # arc_id = gb.new_arc(node_id1, node_id2)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Ejemplo n.º 5
0
def test3(num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_states = 4
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    # NB: the asymetry between the two successors is a key part of this test; otherwise,
    # there are no differences between the transition probs going to these successors,
    # which is the tricky case
    hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 3))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)
    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)
    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Ejemplo n.º 6
0
def _test9():
    # Like test8, but now HMMs have multiple inputs and outputs.
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip directly
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    with DebugPrint("bwt_vrfy") if False else DebugPrint():
        tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Ejemplo n.º 7
0
def test0():
    print '============== test0 ============'
    dimension = 3
    target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    target1 = make_target(dimension, 2, (0.5, 0.5),
                          ((-1, -1, -1), (-2, -3, -4)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    print target0
    print target1
    print target2
    GaussianModelBase.seed(0)

    labels = ('A', 'B', 'C')
    ncomps = (1, 2, 2)

    sources = dict((('A', target0), ('B', target1), ('C', target2)))

    GaussianMixtureModel.seed(0)
    gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE)
    c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count()))
    print
    print c0

    # Prime things a little bit to try to get a good start
    c0.set_relevance(0.001)
    for i in xrange(1):
        for label in labels:
            target = sources[label]
            data = (target.sample() for i in xrange(100))
            c0.adapt_one_class(label, data)

    # Now adapt on more data
    c0.set_relevance(10)
    for i in xrange(10):
        for label in labels:
            target = sources[label]
            data = (target.sample() for i in xrange(100))
            c0.adapt_one_class(label, data)

    print
    print c0
    print
Ejemplo n.º 8
0
def test6():
    """
    This test builds an Hmm with dummy models which always give a score of 1, but
    with a somewhat unusual topology in which there are 6 actual states chained together
    with 2 virtual inputs and 3 virtual outputs.  The point is to make sure we can handle
    this asymetric case correctly.
    """
    import pprint
    num_states = 6
    dimension = 2
    
    # GmmMgr setup
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)
    
    mm = GmmMgr(models)
    models = range(num_states)

    # Hmm setup T0:  i1   i2   1    2    3    4    5    6    o1   o2   o3    FROM: 
    trans = array(((0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # i1
                   (0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # i2

                   (0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # 1
                   (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # 2
                   (0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0),  # 3
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0),  # 4
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0),  # 5
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5),  # 6

                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # o1
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # o2
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # o3
    
    hmm0 = Hmm(num_states, log_domain=True)
    hmm0.build_model(mm, models, 2, 3, trans)
    print hmm0.to_string(True)

    num_passes = 1
    for p in xrange(num_passes):
        # Reseeding here ensures we are repeating the same observations in each pass
        SimpleGaussianModel.seed(0)
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")

        obs = [array((0,0))] * 11  # Dummy sequence of length 11
        hmm0.adapt_one_sequence(obs)
        
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")
        print hmm0.to_string(True)
Ejemplo n.º 9
0
def test3_helper(dataset_idx, num_passes):
    """
    This tests mimics a run ChrisW did with HTK.  The models are 2-D single-mode Gaussians
    embedded in a 1-state Hmm.  Each data point is taken as a sequence of length 1.    
    """
    dimension = 2

    # Gmm setup
    gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1)
    gmm.set_weights(array((1.0,)))
    mu = array(((0.0,0.0),))
    v = array(((1.0,1.0),))
    gmm.set_model(mu, v)
    mm = GmmMgr((gmm,))

    # Hmm setup
    # A transition probability matrix with a p=1 self-loop for the real state.
    # The entry state feeds into the real state with p=1.
    trans = array(((0.0, 1.0, 0.0),
                   (0.0, 0.0, 1.0),
                   (0.0, 0.0, 0.0)))
    hmm0 = Hmm(1, log_domain=True)
    hmm0.build_model(mm, (0,), 1, 1, trans)
    print hmm0.to_string(True)

    # adaptation
    data = datasets[dataset_idx]
    for p in xrange(num_passes):
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        for point in data:
            s = array(point)
            # We treat each point as an entire sequence
            hmm0.adapt_one_sequence((s,))
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")

    print hmm0.to_string(True)
Ejemplo n.º 10
0
def test5_helper(num_obs, num_passes):
    """
    This tests mimics a run ChrisW did with HTK.  The models are 2-D single-mode Gaussians
    embedded in a 3-state Hmm.  Each observation is a sequence of length 11, taken by sampling
    2, 3, and 6 times, respectively, from three target distributions.    
    """
    import pprint
    num_states = 3
    dimension = 2
    
    # Data generator setup
    target_means = ((1,1), (2,2), (3,3))
    target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3))
    target_durations = (2, 3, 6)
    num_steps = sum(target_durations)
    generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)]
    [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)]
    SimpleGaussianModel.seed(0)

    # Gmm setup
    num_states = 3
    models = []
    for i in xrange(num_states):
        gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1)
        gmm.set_weights(array((1.0,)))
        mu = array(((0.0,0.0),))
        v = array(((1.0,1.0),))
        gmm.set_model(mu, v)
        models.append(gmm)
    
    mm = GmmMgr(models)
    models = range(num_states)

    # Hmm setup
    trans = array(((0.0, 1.0, 0.0, 0.0, 0.0),
                   (0.0, 0.5, 0.5, 0.0, 0.0),
                   (0.0, 0.0, 0.5, 0.5, 0.0),
                   (0.0, 0.0, 0.0, 0.5, 0.5),
                   (0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm0 = Hmm(num_states, log_domain=True)
    hmm0.build_model(mm, models, 1, 1, trans)
    print hmm0.to_string(True)

    for p in xrange(num_passes):
        # Reseeding here ensures we are repeating the same observations in each pass
        SimpleGaussianModel.seed(0)
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        obs_gen = obs_generator(generators, target_durations)
        for i in xrange(num_obs):
            obs = obs_gen.next()
            hmm0.adapt_one_sequence(obs)
        
            obs2 = [tuple(a) for a in obs]
            # Uncomment these lines to show observations as nicely formatted sequences; this
            # is what I gave ChrisW to use with his HTK runs.
            # pprint.pprint(obs2)
            # print
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")
        print hmm0.to_string(True)
Ejemplo n.º 11
0
def test2a(num_obs, num_passes):
    dimension = 2
    
    # Data generator setup
    target_means = (1,1)
    target_vars = (0.1,0.1)
    generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE)
    generator.set_model(target_means, target_vars)
    SimpleGaussianModel.seed(0)
    data = [generator.sample() for i in xrange(num_obs)]


    # Gmm setup
    num_mixtures = 2
    gmm0 = make_gmm_diag(dimension, num_mixtures)
    gmm1 = make_gmm_diag(dimension, num_mixtures)
    mm = GmmMgr((gmm1,))

    # Hmm setup
    # A transition probability matrix with a p ~= 1 self-loop for the real state.
    # The entry state feeds into the real state with p=1.  We use p ~= 1 for the
    # second self loop since we need *some* probability of finishing.
    trans = array(((0.0, 1.0, 0.0),
                   (0.0, 0.999999999999, 0.000000000001),
                   (0.0, 0.0, 0.0)))
    hmm0 = Hmm(1, log_domain=True)
    hmm0.build_model(mm, (0,), 1, 1, trans)
    print hmm0.to_string(True) + '\n'
    print gmm0
    print '\n\n'

    # Try some adaptation
    for p in xrange(num_passes):
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        hmm0.adapt_one_sequence(data)
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")

    really_print = False
    with DebugPrint("gaussian", "gaussian_pt", "gaussian_gmm_score") if really_print else DebugPrint():
        gmm0.adapt(data, max_iters = num_passes)

    print hmm0.to_string(True) + '\n'
    print gmm0
Ejemplo n.º 12
0
def test0(num_obs, num_passes):
    dimension = 2
    
    # Data generator setup
    target_means = (1,1)
    target_vars = (0.1,0.1)
    generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE)
    generator.set_model(target_means, target_vars)

    SimpleGaussianModel.seed(0)
    GaussianMixtureModel.seed(0)

    mm = GmmMgr(dimension)

    # Hmm setup
    hmm0 = Hmm(0, log_domain=True)

    # A transition probability matrix with no real state.
    # The entry state feeds into the exit state with p=1.
    trans = array(((0.0, 1.0),
                   (0.0, 0.0)))
    
    hmm0.build_model(mm, (), 1, 1, trans)
    print hmm0.to_string(True)

    # Try some adaptation.  Note that we are feeding the entire data set as one stream
    # to the Hmm adaption call.
    data = [generator.sample() for i in xrange(num_obs)]
    for p in xrange(num_passes):
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        with DebugPrint("hmm_gxfs", "hmm_aos") if False else DebugPrint():
            hmm0.adapt_one_sequence(data)
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")

    print hmm0.to_string(True)
Ejemplo n.º 13
0
 opts = result['options']
 if 'models' not in result:
     raise IOError("No models found in %s!" % (filename,))
 models = result['models']
 if 'vecsize' not in opts:
     raise IOError("No vecsize option found in %s" % (filename,))
 dim = opts['vecsize']
 if 'covar' not in opts:
     covar_type = GaussianMixtureModel.DIAGONAL_COVARIANCE
 else:
     if opts['covar'] not in covar_map:
         raise IOError("Unknown covar option %s found in %s" % (opts['covar'], filename,))
     covar_type = covar_map[opts['covar']]
 dim = opts['vecsize']
 hmm_mgr = HmmMgr(dim)
 gmm_mgr = GmmMgr(dim)
 hmms = []
 names = []
 unnamed_index = 0
 for label, m in models:
     assert label == 'HMM'
     dc and dc("m = \n%s" % (pformat(m),))
     dc and dc("m.keys() = \n%s" % (m.keys(),))
     if m.hasattr.decl:
         name = m.decl
     else:
         name = ("UnnamedModel%d" % unnamed_index)
         unnamed_index += 1
     n = m.numstates - 2   # HTK numstates counts virtual entry and exit states
     hmm = Hmm(n, log_domain)
     gmms = []
Ejemplo n.º 14
0
def test2(num_obs, num_passes):
    dimension = 2
    
    # Data generator setup
    target_means = (1,1)
    target_vars = (0.1,0.1)
    generator = SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE)
    generator.set_model(target_means, target_vars)

    SimpleGaussianModel.seed(0)
    GaussianMixtureModel.seed(0)

    # Gmm setup
    num_mixtures = 2
    gmm0 = make_gmm(dimension, num_mixtures)
    gmm1 = make_gmm(dimension, num_mixtures)
    mm = GmmMgr((gmm1,))

    # Hmm setup
    # A transition probability matrix with a p=1 exit for the real state.
    # The entry state feeds into the real state with p=1.
    trans = array(((0.0, 1.0, 0.0),
                   (0.0, 0.0, 1.0),
                   (0.0, 0.0, 0.0)))
    hmm0 = Hmm(1, log_domain=True)
    hmm0.build_model(mm, (0,), 1, 1, trans)
    print hmm0.to_string(True) + '\n'
    print gmm0
    print '\n\n'

    # Try some adaptation
    data = [generator.sample() for i in xrange(num_obs)]
    for p in xrange(num_passes):
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        for point in data:
            # We treat each point as an entire sequence
            hmm0.adapt_one_sequence((point,))
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")
    gmm0.adapt(data, max_iters = num_passes)

    print hmm0.to_string(True) + '\n'
    print gmm0
Ejemplo n.º 15
0
def test4(num_passes, num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    ret = ""

    dimension = 2

    # Data generator setup and data generation
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 10
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr = GmmMgr(models)

    # Hmm setup
    # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    # Note that here we are using the same HMM in two different TG nodes
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 0))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    # Now adapt original TrainingGraph
    for i in xrange(num_passes):
        gmm_mgr.set_adaptation_state("INITIALIZING")
        gmm_mgr.clear_all_accumulators()
        tg0.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        for obs in obs_list:
            tg0.train_one_sequence(obs)
        tg0.end_training()
        gmm_mgr.set_adaptation_state("APPLYING")
        gmm_mgr.apply_all_accumulators()
        gmm_mgr.set_adaptation_state("NOT_ADAPTING")

    ret = tg0.to_string(full=True)
    return ret
Ejemplo n.º 16
0
def test4(num_passes, num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    ret = ""

    dimension = 2

    # Data generator setup and data generation
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 10
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr = GmmMgr(models)

    # Hmm setup
    # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    # Note that here we are using the same HMM in two different TG nodes
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 0))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    # Now adapt original TrainingGraph
    for i in xrange(num_passes):
        gmm_mgr.set_adaptation_state("INITIALIZING")
        gmm_mgr.clear_all_accumulators()
        tg0.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        for obs in obs_list:
            tg0.train_one_sequence(obs)
        tg0.end_training()
        gmm_mgr.set_adaptation_state("APPLYING")
        gmm_mgr.apply_all_accumulators()
        gmm_mgr.set_adaptation_state("NOT_ADAPTING")

    ret = tg0.to_string(full=True)
    return ret
Ejemplo n.º 17
0
def test7():
    """
    This test builds an Hmm with dummy models which always give a score of 1, but
    with a somewhat unusual topology in which there are 6 actual states chained together
    with 2 virtual inputs and 3 virtual outputs.  The point is to make sure we can handle
    this asymetric case correctly.  This is the same as test6 except that now we'll use
    the network adaptation interface instead.
    """
    import pprint
    num_states = 6
    dimension = 2
    
    # GmmMgr setup
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)
    
    mm = GmmMgr(models)

    # Hmm setup T0:  i1   i2   1    2    3    4    5    6    o1   o2   o3    FROM: 
    trans = array(((0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # i1
                   (0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # i2

                   (0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # 1
                   (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # 2
                   (0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0),  # 3
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0),  # 4
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0),  # 5
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5),  # 6

                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # o1
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),  # o2
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # o3
    
    hmm0 = Hmm(num_states, log_domain=True)
    models = range(num_states)
    hmm0.build_model(mm, models, 2, 3, trans)
    print hmm0.to_string(True)

    num_passes = 1
    for p in xrange(num_passes):
        # Reseeding here ensures we are repeating the same observations in each pass
        SimpleGaussianModel.seed(0)
        mm.set_adaptation_state("INITIALIZING")
        hmm0.begin_adapt("NETWORK")
        mm.set_adaptation_state("ACCUMULATING")

        num_obs = 11
        obs = [array((0,0))] * num_obs  # Dummy sequence

        context = hmm0.init_for_forward_pass(obs, terminal = True)

        # Add some mass into the system for the forward pass.  To match the behavior of
        # standalone adaptation, we divide an initial mass of 1 evenly across the inputs
        hmm0.accum_input_alphas(context, array([1.0/hmm0.num_inputs] * hmm0.num_inputs))

        # Actually do the forward pass.  Note that we must process one more frame than the number of
        # observations - this is because an extra frame is automatically added which scores 1 on the exit
        # states of the Hmm (and 0 on all real states).  XXX we might want clients to do this for
        # themselves at some point rather than this automatic behavior:

        for frame in xrange(num_obs + 1):
            output_alphas = hmm0.process_one_frame_forward(context)
            print output_alphas

        # Likewise, we initialize and then make the backward pass:
        hmm0.init_for_backward_pass(context)
        hmm0.accum_input_betas(context, array([1.0] * hmm0.num_outputs))
        for frame in xrange(num_obs + 1):
            output_betas = hmm0.process_one_frame_backward(context)
            print output_betas

        # Now collect all the gamma sums; here there's only one:
        norm = hmm0.get_initial_gamma_sum()
        hmm0.add_to_gamma_sum(norm, context)

        # Here's where the actual accumulation happens:
        hmm0.do_accumulation(context, norm)
        
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")
        print hmm0.to_string(True)
Ejemplo n.º 18
0
false_vars_prime = N.array((3.0097, 6.0277, 8.3711, 10.7198, 13.4285, 456.7074), dtype=N.float32)

# mfcc, no c0: 20,000 frames of Hugh talking
true_means_prime = N.array((-4.8087, 3.9863, -0.5217, 1.3076, 0.7514, -4.6497), dtype=N.float32)
true_vars_prime = N.array((26.8496, 32.6631, 32.3662, 24.2963, 36.2244, 34.1555), dtype=N.float32)
false_means_prime = N.array((-6.8806, -1.3424, -3.8147, 0.4520, 0.7129, -3.1560), dtype=N.float32)
false_vars_prime = N.array((2.7468, 6.2286, 7.4355, 10.1530, 13.3865, 15.9309), dtype=N.float32)
true_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE)
true_prime.set_model(true_means_prime, true_vars_prime)
false_prime = SimpleGaussianModel(nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE)
false_prime.set_model(false_means_prime, false_vars_prime)

primer = (true_prime, false_prime)

GaussianMixtureModel.seed(0)
gmm_mgr0 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer)
gmm_mgr1 = GmmMgr(ncomps, nfeatures, GaussianModelBase.DIAGONAL_COVARIANCE, primer)
classify0 = AdaptingGmmClassifier(gmm_mgr0, izip(labels, count()))
classify1 = AdaptingGmmClassifier(gmm_mgr1, izip(labels, count()))
classify0.set_relevance(333)
classify1.set_relevance(333)
classify0.set_num_em_iterations(2)
classify1.set_num_em_iterations(2)
classifier0 = AdaptingGmmClassProcessor(classify0)
classifier1 = AdaptingGmmClassProcessor(classify1)

gaussian_trainer = SimpleGaussianTrainer(labels, nfeatures)
trainer = FunctionProcessor(gaussian_trainer)

# audio.mic, fftmag, endpointer, mfcc0, square, mfcc1, classifier0, classifier1, trainer
Ejemplo n.º 19
0
def _test10():
    # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM
    # probabilities come into play
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))

    # The topology here is more complex than previous examples
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id5)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id3, node_id5)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    spd[(3, 4)] = (0.4, 0.3, 0.8)
    spd[(3, 5)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Ejemplo n.º 20
0
def test8_helper(num_obs, num_passes):
    """
    This tests mimics a run ChrisW did with HTK.  The models are 2-D single-mode Gaussians embedded
    in a 3-state Hmm.  Each observation is a sequence of length 11, taken by sampling 2, 3, and 6
    times, respectively, from three target distributions.  This is identical to test5 except that
    here I have built the Hmm with only one Gmm, which is shared by all three states.
    """
    import pprint
    num_states = 3
    dimension = 2
    
    # Data generator setup
    target_means = ((1,1), (2,2), (3,3))
    target_vars = ((0.1,0.1), (0.2,0.2), (0.3,0.3))
    target_durations = (2, 3, 6)
    num_steps = sum(target_durations)
    generators = [SimpleGaussianModel(dimension, SimpleGaussianModel.DIAGONAL_COVARIANCE) for i in xrange(num_states)]
    [m.set_model(tm, tv) for (m, tm, tv) in izip(generators, target_means, target_vars)]
    SimpleGaussianModel.seed(0)

    # Gmm setup
    num_states = 3

    gmm = GaussianMixtureModel(dimension, GaussianMixtureModel.DIAGONAL_COVARIANCE, 1)
    gmm.set_weights(array((1.0,)))
    mu = array(((0.0,0.0),))
    v = array(((1.0,1.0),))
    gmm.set_model(mu, v)
    models = (gmm,)
    
    mm = GmmMgr(models)
    # Here's where we're using the same Gmm in all three states of this Hmm.
    models = (0, 0, 0)

    # Hmm setup
    trans = array(((0.0, 1.0, 0.0, 0.0, 0.0),
                   (0.0, 0.5, 0.5, 0.0, 0.0),
                   (0.0, 0.0, 0.5, 0.5, 0.0),
                   (0.0, 0.0, 0.0, 0.5, 0.5),
                   (0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm0 = Hmm(num_states, log_domain=True)
    hmm0.build_model(mm, models, 1, 1, trans)
    print hmm0.to_string(True)

    for p in xrange(num_passes):
        # Reseeding here ensures we are repeating the same observations in each pass
        SimpleGaussianModel.seed(0)
        mm.set_adaptation_state("INITIALIZING")
        mm.clear_all_accumulators()
        hmm0.begin_adapt("STANDALONE")
        mm.set_adaptation_state("ACCUMULATING")
        obs_gen = obs_generator(generators, target_durations)
        for i in xrange(num_obs):
            obs = obs_gen.next()
            hmm0.adapt_one_sequence(obs)
        mm.set_adaptation_state("APPLYING")
        hmm0.end_adapt()
        mm.apply_all_accumulators()
        mm.set_adaptation_state("NOT_ADAPTING")
        print hmm0.to_string(True)
Ejemplo n.º 21
0
def test1():
    print '============== test1 ============'
    dimension = 3
    target0 = make_target(dimension, 2, (0.75, 0.25), ((1, 1, 1), (2, 3, 4)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    target1 = make_target(dimension, 2, (0.5, 0.5),
                          ((-1, -1, -1), (-2, -3, -4)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    target2 = make_target(dimension, 2, (0.1, 0.9), ((1, 1, -2), (3, 3, 5)),
                          ((1, 1, 1), (0.5, 0.5, 1)))
    print target0
    print target1
    print target2
    GaussianModelBase.seed(0)

    labels = ('A', 'B', 'C')
    ncomps = (1, 2, 2)

    sources = dict((('A', target0), ('B', target1), ('C', target2)))

    GaussianMixtureModel.seed(0)
    gmm_mgr = GmmMgr(ncomps, dimension, GaussianModelBase.DIAGONAL_COVARIANCE)
    c0 = AdaptingGmmClassifier(gmm_mgr, izip(labels, count()))
    print
    print c0

    result = list()
    proc0 = AdaptingGmmClassProcessor(c0, result.append)

    # Prime things a little bit to try to get a good start
    c0.set_relevance(0.001)
    c0.set_num_em_iterations(2)
    for i in xrange(1):
        for label in labels:
            target = sources[label]
            data = (target.sample() for i in xrange(100))
            proc0.process((label, data))

    # Now adapt on more data
    c0.set_relevance(10)
    c0.set_num_em_iterations(2)
    for i in xrange(10):
        for label in labels:
            target = sources[label]
            data = (target.sample() for i in xrange(100))
            proc0.process((label, data))

    print
    print c0
    print
    print len(result)
    # XXX Win32 gets values off in the last 2-3 hex digits.  I'm not sure how to account for this in a
    # logref test, so I'm disabling this printing for now.

    # for training_label, scores in result[-10:]:
    #     print training_label, tuple(((label, float_to_readable_string(score)) for score, label in scores))
    correct = tuple(label for label, scores in result)
    guessed = tuple(scores[0][1] for l, scores in result)
    print len(correct), len(guessed)
    ind = [c == g for (c, g) in izip(correct, guessed)]
    print ind.count(True)
    print ind.count(True) / len(correct)