Example #1
0
    def __init__(self, processors, sendee=None, sending=True):
        super(ChainProcessor, self).__init__(sendee, sending=sending)
        processors = tuple(processors)
        if not processors:
            raise ValueError("expected at least one element in processors chain, got zero")

        # front of the chain, where we push stuff
        self.head = processors[0]


        # create chain of processors, linking each processor's send function to
        # each successor's process() function....
        gb = GraphBuilder()
        nodes, starts, ends = gb.add_graph(processors[0].graph)
        assert len(starts) == 1 and len(ends) == 1
        if len(processors) > 1:
            succ_iter = iter(processors)
            succ_iter.next()
            for pred, succ in izip(processors, succ_iter):
                pred.set_sendee(succ.process)
                nodes, new_starts, new_ends = gb.add_graph(succ.graph)
                gb.new_arc(ends[0], new_starts[0])
                starts, ends = new_starts, new_ends
                assert len(starts) == 1 and len(ends) == 1
            assert succ == processors[-1]

        # set up the list that will collect what the final element pushes
        self.collector = list()
        processors[-1].set_sendee(self.collector.append)

        self._graph = FrozenGraph(gb)
Example #2
0
def test4(num_passes, num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    ret = ""

    dimension = 2

    # Data generator setup and data generation
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 10
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr = GmmMgr(models)

    # Hmm setup
    # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    # Note that here we are using the same HMM in two different TG nodes
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 0))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    # Now adapt original TrainingGraph
    for i in xrange(num_passes):
        gmm_mgr.set_adaptation_state("INITIALIZING")
        gmm_mgr.clear_all_accumulators()
        tg0.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        for obs in obs_list:
            tg0.train_one_sequence(obs)
        tg0.end_training()
        gmm_mgr.set_adaptation_state("APPLYING")
        gmm_mgr.apply_all_accumulators()
        gmm_mgr.set_adaptation_state("NOT_ADAPTING")

    ret = tg0.to_string(full=True)
    return ret
Example #3
0
def _test11():
    # A reduced version of test10
    ret = ""
    # GmmMgr setup

    num_states = 2
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 2))

    # The topology here is slightly complex than the previous example
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id4)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id2, node_id4)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 2 (self loop, forward 1)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, )
    spd[(0, 2)] = (0.6, )

    spd[(2, 3)] = (0.4, )
    spd[(2, 4)] = (0.6, )

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Example #4
0
def test4(num_passes, num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    ret = ""

    dimension = 2

    # Data generator setup and data generation
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 10
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr = GmmMgr(models)

    # Hmm setup
    # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    # Note that here we are using the same HMM in two different TG nodes
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 0))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    # Now adapt original TrainingGraph
    for i in xrange(num_passes):
        gmm_mgr.set_adaptation_state("INITIALIZING")
        gmm_mgr.clear_all_accumulators()
        tg0.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        for obs in obs_list:
            tg0.train_one_sequence(obs)
        tg0.end_training()
        gmm_mgr.set_adaptation_state("APPLYING")
        gmm_mgr.apply_all_accumulators()
        gmm_mgr.set_adaptation_state("NOT_ADAPTING")

    ret = tg0.to_string(full=True)
    return ret
Example #5
0
def _sequence_to_linear_graph(s):
    gb = GraphBuilder()
    start = gb.new_node()
    for item in s:
        end = gb.new_node()
        gb.new_arc(start, end, item)
        start = end
    return FrozenGraph(gb)
def _sequence_to_linear_graph(s):
    gb = GraphBuilder()
    start = gb.new_node()
    for item in s:
        end = gb.new_node()
        gb.new_arc(start, end, item)
        start = end
    return FrozenGraph(gb)
Example #7
0
def _test11():
    # A reduced version of test10
    ret = ""
    # GmmMgr setup

    num_states = 2
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 2))

    # The topology here is slightly complex than the previous example
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id4)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id2, node_id4)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 2 (self loop, forward 1)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0,), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4,)
    spd[(0, 2)] = (0.6,)

    spd[(2, 3)] = (0.4,)
    spd[(2, 4)] = (0.6,)

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(full=True)

    return ret
Example #8
0
def _test9():
    # Like test8, but now HMMs have multiple inputs and outputs.
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip directly
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(
        (
            (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0),
            (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5),
            (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
        )
    )
    hmm1.build_model(gmm_mgr, (0,), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    with DebugPrint("bwt_vrfy") if False else DebugPrint():
        tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(full=True)

    return ret
Example #9
0
def _test9():
    # Like test8, but now HMMs have multiple inputs and outputs.
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip directly
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    with DebugPrint("bwt_vrfy") if False else DebugPrint():
        tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Example #10
0
def test2(num_obs):
    # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    arc_id = gb.new_arc(node_id0, node_id1)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2)
    return ret
Example #11
0
def test2(num_obs):
    # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    arc_id = gb.new_arc(node_id0, node_id1)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Example #12
0
def test3(num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_states = 4
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    # NB: the asymetry between the two successors is a key part of this test; otherwise,
    # there are no differences between the transition probs going to these successors,
    # which is the tricky case
    hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 3))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)
    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)
    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Example #13
0
def test3(num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_states = 4
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    # NB: the asymetry between the two successors is a key part of this test; otherwise,
    # there are no differences between the transition probs going to these successors,
    # which is the tricky case
    hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 3))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)
    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)
    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2)
    return ret
Example #14
0
def test5(num_obs, do_display=False):
    # A test in which one of the HMMs has a transition from an input directly to
    # an output, so it can behave as an epsilon.  This node is between two other
    # nodes in a linear arrangement.

    # Data generator setup and data generation
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model
    # in the middle is special in that it can skip directly from the input state
    # to the output state.
    seed(0)
    num_states = 2
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    # node_id2 = gb.new_node((2,2))
    arc_id = gb.new_arc(node_id0, node_id1)
    # arc_id = gb.new_arc(node_id1, node_id2)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Example #15
0
def test5(num_obs, do_display=False):
    # A test in which one of the HMMs has a transition from an input directly to
    # an output, so it can behave as an epsilon.  This node is between two other
    # nodes in a linear arrangement.

    # Data generator setup and data generation
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model
    # in the middle is special in that it can skip directly from the input state
    # to the output state.
    seed(0)
    num_states = 2
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr1, (0,), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    # node_id2 = gb.new_node((2,2))
    arc_id = gb.new_arc(node_id0, node_id1)
    # arc_id = gb.new_arc(node_id1, node_id2)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2)
    return ret
Example #16
0
    def __init__(self, stream):

        docgen = YamldataGenerator(stream)
        reader = YamldataReader(docgen, stream_type=self.STREAM_TYPE, stream_version=self.STREAM_VERSION, header_only=True)

        self.runtime_objects = IndexedObjectSet(docgen)
        self.dataflow_graph = FrozenGraph(SerializedGraphTables(docgen))

        # until we've sorted out the yaml issues
        # make a linear graph with arc labels indexing into runtime_objects
        builder = GraphBuilder()
        startid = builder.new_node_label_is_id()
        for arc_label, obj in enumerate(self.runtime_objects):
            endid = builder.new_node_label_is_id()
            arcid = builder.new_arc(startid, endid, arc_label)
            startid = endid
        self.graph = FrozenGraph(builder)

        for i in self.runtime_objects:
            self.last = i[-1]
Example #17
0
def _lattice_nbest_align(l1, l2, cost_fn=None, substring_cost=False):
    if not (isinstance(l1, FrozenGraph) and isinstance(l2, FrozenGraph)):
        raise ValueError("lattice_nbest_align needs two FrozenGraph arguments")

    if cost_fn is None:
        cost_fn = _std_cost
    else:
        cost_fn = _make_safe_cost_fn(cost_fn)

    l1_canon = l1.get_canonical_DAG()
    l2_canon = l2.get_canonical_DAG()
    len1 = l1_canon.get_num_arcs()
    len2 = l2_canon.get_num_arcs()

    assert l1_canon.is_lattice()
    assert l2_canon.is_lattice()

    terms = l1_canon.get_terminals()
    lat_start1 = terms[0][0]
    lat_end1 = terms[1][0]
    terms = l2_canon.get_terminals()
    lat_start2 = terms[0][0]
    lat_end2 = terms[1][0]

    # The lattices to be aligned, l1 and l2, have labels on the arcs;
    # any labels on their nodes will be ignored.  We begin by finding
    # a topological ordering of the lattice arcs, so that each arc is
    # assigned a non-negative index.  We construct a 2-D lattice, g,
    # with nodes representing pairs of arcs in the l1 and l2.  An
    # additional row and column on the top and left of g represent an
    # initial position prior consuming any tokens from l1 and l2,
    # respectively.  The arcs in g are labeled with triples giving the
    # orginal labels from l1 and l2 (or None if the arc's start node
    # is on the left side or top row) and the local cost of the edit
    # represented by the arc.  There are two slightly tricky parts.
    # First, because the alignment is done on lattices, the cost
    # lattice may have links which cross several rows or columns,
    # depending on the adjacencies of l1 and l2.  Second, there's an
    # offset of 1 between the arc numbering for l1 and l2 and the node
    # numbering in g, because of the initial row and column.  Thus,
    # the node in g corresponding to the arc pair <a1, a2> is at
    # indices <a1+1, a2+1>.  One egregious (but very handy) abuse of
    # this arrangement is the occasional use of -1 as an ersatz arcID,
    # which will be converted to a 0 index into g.  Finally, because
    # the graph iterpath function requires a single start and end
    # node, we tie all the potential end nodes in g to a special end
    # node with "ground" arcs.  A node in g is a potential end node if
    # the pair of arcs it represents are each incident on the terminal
    # node of their respective lattices.

    gb = GraphBuilder()
    # Initialize cost lattice nodes
    node_array = [[gb.new_node() for j in range(len2 + 1)]
                  for i in range(len1 + 1)]
    # We use this single node to tie all end nodes together
    end_node = gb.new_node()

    # Add first row of arcs
    for i in xrange(len1):
        start, end, x = l1_canon.get_arc(i)
        insert_cost = cost_fn(x, None)
        if end == lat_end1 and len2 == 0:
            gb.new_arc(node_array[i + 1][0], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, x)
        pred_arcs = l1_canon.get_node_in_arcs(start)
        if start == lat_start1:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[arc + 1][0], node_array[i + 1][0],
                       (x, None, insert_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, x, len(pred_arcs)))

    # Add first column of arcs
    for j in xrange(len2):
        start, end, y = l2_canon.get_arc(j)
        delete_cost = 0 if substring_cost else cost_fn(None, y)
        if end == lat_end2 and len1 == 0:
            gb.new_arc(node_array[0][j + 1], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, y)
        pred_arcs = l2_canon.get_node_in_arcs(start)
        if start == lat_start2:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[0][arc + 1], node_array[0][j + 1],
                       (None, y, delete_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, y, len(pred_arcs)))

    # Construct remainder of cost lattice
    for i in xrange(len1):
        for j in xrange(len2):
            start1, end1, x = l1_canon.get_arc(i)
            start2, end2, y = l2_canon.get_arc(j)
            pred_arcs1 = l1_canon.get_node_in_arcs(start1)
            pred_arcs2 = l2_canon.get_node_in_arcs(start2)
            if start1 == lat_start1:
                pred_arcs1.append(-1)
            if start2 == lat_start2:
                pred_arcs2.append(-1)
            insert_cost = cost_fn(x, None)
            delete_cost = 0 if (substring_cost and i == len1 - 1) else cost_fn(
                None, y)
            subst_cost = cost_fn(x, y)

            num_added = 0
            if end1 == lat_end1 and end2 == lat_end2:
                gb.new_arc(node_array[i + 1][j + 1], end_node, (None, None, 0))
                # print "Added ground arc for l1,l1 arcs with labels %s, %s" % (x,y)

            for arc in pred_arcs1:
                gb.new_arc(node_array[arc + 1][j + 1],
                           node_array[i + 1][j + 1], (x, None, insert_cost))
                num_added += 1

            for arc in pred_arcs2:
                gb.new_arc(node_array[i + 1][arc + 1],
                           node_array[i + 1][j + 1], (None, y, delete_cost))
                num_added += 1

            for arc1 in pred_arcs1:
                for arc2 in pred_arcs2:
                    gb.new_arc(node_array[arc1 + 1][arc2 + 1],
                               node_array[i + 1][j + 1], (x, y, subst_cost))
                    num_added += 1
                    # print ("Processed l1 arc from %d to %d with label %s "
                    #        "and l1 arc from %d to %d with label %s  - added %d arcs"
                    #          % (start1, end1, x, start2, end2, y, num_added))

    g = FrozenGraph(gb)
    assert g.is_lattice()

    # print "g.get_terminals() = %s, %s" % g.get_terminals()

    def graph_cost_fn(label):
        return label[2]

    def iter_helper(path):
        arc_labels = [path[2].get_arc_label(arc) for arc in path[1][:-1]]
        return (path[0], tuple(arc_labels))

    return imap(iter_helper,
                g.iterpaths(graph_cost_fn, node_array[0][0], end_node))
Example #18
0
def _lattice_nbest_align(l1, l2, cost_fn = None, substring_cost = False):
    if not (isinstance(l1, FrozenGraph) and isinstance(l2, FrozenGraph)):
        raise ValueError("lattice_nbest_align needs two FrozenGraph arguments")

    if cost_fn is None:
        cost_fn = _std_cost
    else:
        cost_fn = _make_safe_cost_fn(cost_fn)
    
    l1_canon = l1.get_canonical_DAG()
    l2_canon = l2.get_canonical_DAG()
    len1 = l1_canon.get_num_arcs()
    len2 = l2_canon.get_num_arcs()

    assert l1_canon.is_lattice()
    assert l2_canon.is_lattice()

    terms = l1_canon.get_terminals()
    lat_start1 = terms[0][0]
    lat_end1 = terms[1][0] 
    terms = l2_canon.get_terminals()
    lat_start2 = terms[0][0]
    lat_end2 = terms[1][0] 

    # The lattices to be aligned, l1 and l2, have labels on the arcs;
    # any labels on their nodes will be ignored.  We begin by finding
    # a topological ordering of the lattice arcs, so that each arc is
    # assigned a non-negative index.  We construct a 2-D lattice, g,
    # with nodes representing pairs of arcs in the l1 and l2.  An
    # additional row and column on the top and left of g represent an
    # initial position prior consuming any tokens from l1 and l2,
    # respectively.  The arcs in g are labeled with triples giving the
    # orginal labels from l1 and l2 (or None if the arc's start node
    # is on the left side or top row) and the local cost of the edit
    # represented by the arc.  There are two slightly tricky parts.
    # First, because the alignment is done on lattices, the cost
    # lattice may have links which cross several rows or columns,
    # depending on the adjacencies of l1 and l2.  Second, there's an
    # offset of 1 between the arc numbering for l1 and l2 and the node
    # numbering in g, because of the initial row and column.  Thus,
    # the node in g corresponding to the arc pair <a1, a2> is at
    # indices <a1+1, a2+1>.  One egregious (but very handy) abuse of
    # this arrangement is the occasional use of -1 as an ersatz arcID,
    # which will be converted to a 0 index into g.  Finally, because
    # the graph iterpath function requires a single start and end
    # node, we tie all the potential end nodes in g to a special end
    # node with "ground" arcs.  A node in g is a potential end node if
    # the pair of arcs it represents are each incident on the terminal
    # node of their respective lattices.

    gb = GraphBuilder()
    # Initialize cost lattice nodes 
    node_array = [[gb.new_node() for j in range(len2+1)] for i in range(len1+1)]
    # We use this single node to tie all end nodes together
    end_node = gb.new_node()  

    # Add first row of arcs
    for i in xrange(len1):
        start, end, x = l1_canon.get_arc(i)
        insert_cost = cost_fn(x, None)
        if end == lat_end1 and len2 == 0:
            gb.new_arc(node_array[i+1][0], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, x)
        pred_arcs = l1_canon.get_node_in_arcs(start)
        if start == lat_start1:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[arc+1][0], node_array[i+1][0], (x, None, insert_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, x, len(pred_arcs)))
        
    # Add first column of arcs
    for j in xrange(len2):
        start, end, y = l2_canon.get_arc(j)
        delete_cost = 0 if substring_cost else cost_fn(None, y)
        if end == lat_end2 and len1 == 0:
            gb.new_arc(node_array[0][j+1], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, y)
        pred_arcs = l2_canon.get_node_in_arcs(start)
        if start == lat_start2:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[0][arc+1], node_array[0][j+1], (None, y, delete_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, y, len(pred_arcs)))

    # Construct remainder of cost lattice
    for i in xrange(len1):
        for j in xrange(len2):
            start1, end1, x = l1_canon.get_arc(i)
            start2, end2, y = l2_canon.get_arc(j)
            pred_arcs1 = l1_canon.get_node_in_arcs(start1)
            pred_arcs2 = l2_canon.get_node_in_arcs(start2)
            if start1 == lat_start1:
                pred_arcs1.append(-1)
            if start2 == lat_start2:
                pred_arcs2.append(-1)
            insert_cost = cost_fn(x, None)
            delete_cost = 0 if (substring_cost and i == len1 - 1) else cost_fn(None, y)
            subst_cost = cost_fn(x, y)

            num_added = 0
            if end1 == lat_end1 and end2 == lat_end2:
                gb.new_arc(node_array[i+1][j+1], end_node, (None, None, 0))
                # print "Added ground arc for l1,l1 arcs with labels %s, %s" % (x,y)

            for arc in pred_arcs1:
                gb.new_arc(node_array[arc+1][j+1], node_array[i+1][j+1], (x, None, insert_cost))
                num_added += 1

            for arc in pred_arcs2:
                gb.new_arc(node_array[i+1][arc+1], node_array[i+1][j+1], (None, y, delete_cost))
                num_added += 1

            for arc1 in pred_arcs1: 
                for arc2 in pred_arcs2:
                    gb.new_arc(node_array[arc1+1][arc2+1], node_array[i+1][j+1], (x, y, subst_cost))
                    num_added += 1
                    # print ("Processed l1 arc from %d to %d with label %s "
                    #        "and l1 arc from %d to %d with label %s  - added %d arcs"
                    #          % (start1, end1, x, start2, end2, y, num_added))

    g = FrozenGraph(gb)
    assert g.is_lattice()

    # print "g.get_terminals() = %s, %s" % g.get_terminals()

    def graph_cost_fn(label): return label[2]

    def iter_helper(path):
        arc_labels = [path[2].get_arc_label(arc) for arc in path[1][:-1]]
        return(path[0], tuple(arc_labels))

    return imap(iter_helper,
                g.iterpaths(graph_cost_fn, node_array[0][0], end_node))
Example #19
0
def _test10():
    # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM
    # probabilities come into play
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))

    # The topology here is more complex than previous examples
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id5)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id3, node_id5)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    spd[(3, 4)] = (0.4, 0.3, 0.8)
    spd[(3, 5)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Example #20
0
def _test10():
    # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM
    # probabilities come into play
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))

    # The topology here is more complex than previous examples
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id5)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id3, node_id5)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(
        (
            (0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0),
            (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5),
            (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
            (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
        )
    )
    hmm1.build_model(gmm_mgr, (0,), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    spd[(3, 4)] = (0.4, 0.3, 0.8)
    spd[(3, 5)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(full=True)

    return ret