Ejemplo n.º 1
0
def load_segmentation_model(modeldata):
    model = HiddenMarkovModel('model')

    states = {}
    for s in modeldata:
        if len(s['emission']) == 1:
            emission = NormalDistribution(*s['emission'][0][:2])
        else:
            weights = np.array([w for _, _, w in s['emission']])
            dists = [NormalDistribution(mu, sigma)
                     for mu, sigma, _ in s['emission']]
            emission = GeneralMixtureModel(dists, weights=weights)
        state = State(emission, name=s['name'])

        states[s['name']] = state
        model.add_state(state)
        if 'start_prob' in s:
            model.add_transition(model.start, state, s['start_prob'])

    for s in modeldata:
        current = states[s['name']]
        for nextstate, prob in s['transition']:
            model.add_transition(current, states[nextstate], prob)

    model.bake()

    return model
Ejemplo n.º 2
0
def train_hmm_tagger(data):
    # HMM
    # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger.
    #
    # - Add one state per tag
    #     - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$
    # - Add an edge from the starting state `basic_model.start` to each tag
    #     - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$
    # - Add an edge from each tag to the end state `basic_model.end`
    #     - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$
    # - Add an edge between _every_ pair of tags
    #     - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$
    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    state_dict = {}
    states = []
    emission_counts = pair_counts(*list(zip(
        *data.training_set.stream()))[::-1])
    for tag in emission_counts.keys():
        tag_count = tag_unigrams[tag]
        probs = {}
        for w in emission_counts[tag]:
            probs[w] = emission_counts[tag][w] / tag_count
        emission_p = DiscreteDistribution(probs)
        state = State(emission_p, name="" + tag)
        basic_model.add_state(state)
        state_dict[tag] = state

    for tag in tag_starts:
        basic_model.add_transition(basic_model.start, state_dict[tag],
                                   tag_starts[tag] / len(data.training_set.Y))
        basic_model.add_transition(state_dict[tag], basic_model.end,
                                   tag_ends[tag] / tag_unigrams[tag])

    for (tag1, tag2) in tag_bigrams:
        basic_model.add_transition(
            state_dict[tag1], state_dict[tag2],
            tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1])

    # finalize the model
    basic_model.bake()

    assert all(
        tag in set(s.name for s in basic_model.states)
        for tag in data.training_set.tagset
    ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags."
    assert basic_model.edge_count() == 168, (
        "Your network should have an edge from the start node to each state, one edge between every "
        +
        "pair of tags (states), and an edge from each state to the end node.")
    HTML(
        '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>'
    )
    return basic_model
Ejemplo n.º 3
0
class HMMWrapper:
    def __init__(self):
        self.model = HiddenMarkovModel()
        self.start = self.model.start
        self.end = self.model.end
        self.states_before_bake = []
        self.states = None

    def add_state(self, state, start_prob=0):
        self.states_before_bake.append((state, start_prob))
        self.model.add_state(state)

    def add_transition(self, start_state, end_state, prob):
        # print('adding from', start_state.name, 'to', end_state.name, prob)
        self.model.add_transition(start_state, end_state, prob)

    def bake(self):
        starter_states_no_prob = []
        free_start_prob = 1.0
        for state in self.states_before_bake:
            if 'none' not in state[0].name:
                if not state[1]:
                    starter_states_no_prob.append(state)
                else:
                    free_start_prob -= state[1]
                    print('asignado ' + str(state[1]) + ' a ' + state[0].name)
                    self.add_transition(self.start, state[0], state[1])

        len_no_prob = len(starter_states_no_prob)
        starter_prob = free_start_prob / len_no_prob
        print(len_no_prob, starter_prob)
        for state in starter_states_no_prob:
            self.add_transition(self.start, state, starter_prob)

        self.model.bake()
        self.states = self.model.states

    def make_states_from_alignment(self, first_state, last_state, seq_matrix,
                                   name):
        columns = column_clasify(seq_matrix)
        zones = create_zones(columns)
        grouped_states = group_states(zones, name)
        add_states(self, grouped_states)
        trans = calculate_transitions(first_state, last_state, grouped_states)
        apply_transitions(self, trans)

    def predict(self, *args, **kwargs):
        return self.model.predict(*args, **kwargs)
Ejemplo n.º 4
0
def _initialize_new_hmm(hmm, new_states, new_transitions):

    new_hmm = HiddenMarkovModel()
    for state in new_states:
        if state not in (hmm.start, hmm.end):
            new_hmm.add_state(state)
    for source_state, target_state, probability in new_transitions:
        if source_state != hmm.start and target_state != hmm.end:
            new_hmm.add_transition(source_state, target_state, probability)
        elif source_state == hmm.start:
            new_hmm.add_transition(new_hmm.start, target_state, probability)
        elif target_state == hmm.end:
            new_hmm.add_transition(source_state, new_hmm.end, probability)

    new_hmm.bake()
    return new_hmm
Ejemplo n.º 5
0
def insert_delete_main_hmm(data_matrix):
    v_columns = column_clasify(data_matrix)
    v_zones = create_zones(v_columns)
    v_grouped_states = group_states(v_zones, 'test')
    v_model = HiddenMarkovModel()
    v_first_state = State(None, name='ali_start')
    v_last_state = State(None, name='ali_end')
    v_model.add_state(v_first_state)
    v_model.add_transition(v_model.start, v_first_state, 1)
    v_model.add_state(v_last_state)
    add_states(v_model, v_grouped_states)
    v_trans = calculate_transitions(v_first_state, v_last_state,
                                    v_grouped_states)
    apply_transitions(v_model, v_trans)
    v_model.bake()
    return v_model
Ejemplo n.º 6
0
class ModelWrapper:
    def __init__(self):
        self.model = HiddenMarkovModel()

    def add_state(self, distribution, name):
        state = State(distribution, name=name)
        self.model.add_state(state)
        return state

    def bake(self):
        self.model.bake()

    def viterbi(self, seq):
        return self.model.viterbi(seq)

    def add_transition(self, states, next_state_data):
        for state in states:
            for next_data in next_state_data:
                self.model.add_transition(state, next_data[0], next_data[1])
Ejemplo n.º 7
0
def create_hidden_MarkovModel(e_df, q_df, start_p_dict):
    """
    Creates a Hidden Markov Model based on DataFrame
    @args:
        - e_df (pd.Dataframe): contains the emission probabilites
        - q_df (pd.Dataframe): contains the emission probabilites
    """
    model = HiddenMarkovModel(name="Example Model")

    '#1: Create a dict for each key in trans. df'
    model_dict = {}
    for key in q_df.keys().values:
        model_dict[key] = {}

    '#2: Create the states'
    for key in model_dict:
        '#2.1.Step Add teh emission prob. to each state, , P(observation | state)'
        emission_p = DiscreteDistribution(e_df[key].to_dict())
        sunny_state = State(emission_p, name=key)
        model_dict[key] = State(emission_p, name=key)
        model.add_state(model_dict[key])
        '#2.2.Step: Add the start probability for each state'
        model.add_transition(model.start, model_dict[key], start_p_dict[key])

    '#3.Step: Add the transition probability to each state'
    for key, item in q_df.to_dict("index").items():
        for item_name, value in item.items():
            print(key, " , ", item_name, ": ", value)
            tmp_origin = model_dict[key]
            tmp_destination = model_dict[item_name]
            model.add_transition(tmp_origin, tmp_destination,
                                 q_df.loc[key, item_name])
    # finally, call the .bake() method to finalize the model
    model.bake()

    return model
Ejemplo n.º 8
0
    'a': 0.25,
    'c': 0.25,
    'g': 0.25,
    't': 0.25
}),
                   name='back')

fixed_state = State(DiscreteDistribution({
    'a': 0.45,
    'c': 0.45,
    'g': 0.05,
    't': 0.05
}),
                    name='fixed')

hmmodel.add_state(back_state)
hmmodel.add_state(fixed_state)

hmmodel.add_transition(hmmodel.start, back_state, 1)
hmmodel.add_transition(back_state, back_state, 0.9)
hmmodel.add_transition(back_state, fixed_state, 0.1)
hmmodel.add_transition(fixed_state, fixed_state, 0.9)
hmmodel.add_transition(fixed_state, back_state, 0.1)

hmmodel.bake()

seq = list('acgtacgtaaaaccccaaa')

lopg, path = hmmodel.viterbi(seq)

print([x[1].name for x in path])
Ejemplo n.º 9
0
A simple example highlighting how to build a model using states, add
transitions, and then run the algorithms, including showing how training
on a sequence improves the probability of the sequence.
"""

import random
from pomegranate import *
from pomegranate import HiddenMarkovModel as Model

random.seed(0)
model = Model(name="ExampleModel")
distribution = UniformDistribution(0.0, 1.0)
state = State(distribution, name="uniform")
state2 = State(NormalDistribution(0, 2), name="normal")
silent = State(None, name="silent")
model.add_state(state)
model.add_state(state2)

model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()
Ejemplo n.º 10
0
fake_back = State(DiscreteDistribution(intron_distribution.p), name='back2')

in0 = State(DiscreteDistribution(intron_distribution.p), name='in0')
in1 = State(DiscreteDistribution(intron_distribution.p), name='in1')
in2 = State(DiscreteDistribution(intron_distribution.p), name='in2')

in0_spacers = spacer_states_maker(64, intron_distribution.p, 'in0 spacer')
in1_spacers = spacer_states_maker(64, intron_distribution.p, 'in1 spacer')
in2_spacers = spacer_states_maker(64, intron_distribution.p, 'in2 spacer')

coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

coding_model.add_state(back)
coding_model.add_state(fake_back)
coding_model.add_state(coding_state0)
coding_model.add_state(coding_state1)
coding_model.add_state(coding_state2)

coding_model.add_state(in0)
coding_model.add_state(in1)
coding_model.add_state(in2)

coding_model.add_state(exon3_state)
add_sequence(coding_model, poly_a_states)
add_sequence(coding_model, post_poly_spacer)

add_sequence(coding_model, in0_spacers)
add_sequence(coding_model, in1_spacers)
Ejemplo n.º 11
0
cat_states = sequence_state_factory(cat_data, 'CAT')
post_cat_var_spacers_tss = spacer_states_maker(151, no_coding.p, 'post cat var spacer tss')
post_cat_spacers_tss = spacer_states_maker(42, no_coding.p, 'post cat spacer tss')

post_cat_var_spacers_tata = spacer_states_maker(151, no_coding.p, 'post cat var spacer tata')
post_cat_spacers_tata = spacer_states_maker(22, no_coding.p, 'post cat spacer tata')

tata_states = sequence_state_factory(tata_data, 'tata')
post_tata_var_spacers = spacer_states_maker(16, no_coding.p, 'post_tata_var_spacer')
post_tata_spacers = spacer_states_maker(4, no_coding.p, 'post_tata_spacer')

inr_states = sequence_state_factory(inr_data, 'inr')

no_inr_states = sequence_state_factory(no_inr_data, 'no inr')
# Add States
promoter_utr_model.add_state(back)


# Add Sequences

#GC

add_sequence(promoter_utr_model, gc_states)

add_sequence(promoter_utr_model, post_gc_spacers_tata)
add_variable_length_sequence(promoter_utr_model, post_gc_var_spacers_tata, post_gc_spacers_tata[0])

add_sequence(promoter_utr_model, post_gc_spacers_tss)
add_variable_length_sequence(promoter_utr_model, post_gc_var_spacers_tss, post_gc_spacers_tss[0])

add_sequence(promoter_utr_model, inr_states)
Ejemplo n.º 12
0
A simple example highlighting how to build a model using states, add
transitions, and then run the algorithms, including showing how training
on a sequence improves the probability of the sequence.
"""

import random
from pomegranate import *
from pomegranate import HiddenMarkovModel as Model

random.seed(0)
model = Model(name="ExampleModel")
distribution = UniformDistribution(0.0, 1.0)
state = State(distribution, name="uniform")
state2 = State(NormalDistribution(0, 2), name="normal")
silent = State(None, name="silent")
model.add_state(state)
model.add_state(state2)

model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()
Ejemplo n.º 13
0
mdd_states_sequences = []

for index, l_em in enumerate(leaf_emissions):
    sequence = state_sequence_from(l_em, 'donor_' + str(index))
    add_sequence(hm_model, sequence[0])
    set_transition_probabilities(hm_model, sequence[0], sequence[1])
    mdd_states_sequences.append(sequence[0])

background = State(DiscreteDistribution({
    'a': 0.25,
    'c': 0.25,
    'g': 0.25,
    't': 0.25
}),
                   name='background')
hm_model.add_state(background)

hm_model.add_transition(hm_model.start, background, 0.9)
fork_sequence(hm_model, [hm_model.start], mdd_states_sequences,
              [0.025, 0.025, 0.025, 0.025])

hm_model.add_transition(background, background, 0.9)

fork_sequence(hm_model, [background], mdd_states_sequences,
              [0.025, 0.025, 0.025, 0.025])
reunify_sequences(hm_model, mdd_states_sequences, background, [1, 1, 1, 1])

hm_model.bake()
a = 'a'
c = 'c'
g = 'g'
Ejemplo n.º 14
0
from converter_to import converter_to

c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt')
matrixStop = numpy.array(matrix_from_exa('../data extractors/new_stops.exa'))
coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

post = State(DiscreteDistribution(equal_distribution), name='post')

model = HiddenMarkovModel('coding_to_stop')

stop_data = classify(matrixStop, 2)
stop_states = sequence_state_factory(stop_data, 'stop')

model.add_state(coding_state0)
model.add_state(coding_state1)
model.add_state(coding_state2)

add_sequence(model, stop_states)

model.add_state(post)

model.add_transition(model.start, coding_state1, 1)
model.add_transition(coding_state0, coding_state1, 1)
model.add_transition(coding_state1, coding_state2, 1)
model.add_transition(coding_state2, coding_state0, 0.6)
model.add_transition(coding_state2, stop_states[0], 0.4)
model.add_transition(stop_states[-1], post, 1)
model.add_transition(post, post, 0.9)
model.add_transition(post, model.end, 0.1)
Ejemplo n.º 15
0
def crop_type_hmm_model(nn_pobability_matrix, timeseries_steps,
                        n_observed_classes):
    # 0               1              2            3       4        5
    [
        'unknown_plant', 'large_grass', 'small_grass', 'other', 'fallow',
        'no_crop'
    ]

    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    d5 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=5,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_unk = State(d0, name='unknown_plant')
    s1_large = State(d1, name='large_grass')
    s2_small = State(d2, name='small_grass')
    s3_other = State(d3, name='other')
    s4_fallow = State(d4, name='fallow')
    s5_none = State(d5, name='no_crop')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(
        s0_unk, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [95., 0., 0., 0., 0., 5.])
    model.add_transitions(
        s1_large, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [0., 95., 0., 0., 0., 5.])
    model.add_transitions(
        s2_small, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [0., 0., 95., 0., 0., 5.])
    model.add_transitions(
        s3_other, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [0., 0., 0., 95., 0., 5.])
    model.add_transitions(
        s4_fallow, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [0., 0., 0., 0., 95., 5.])
    model.add_transitions(
        s5_none, [s0_unk, s1_large, s2_small, s3_other, s4_fallow, s5_none],
        [2., 2., 2., 2., 2., 90.])

    model.bake(verbose=False)

    return model
Ejemplo n.º 16
0
def train_and_test():
    with open('../data extractors/exons_start_1.txt') as in_file:
        total = []
        for line in in_file:
            no_p_line = line.replace('P', '').lower().replace('\n', '')
            total.append(no_p_line)

    converted_total = [converter_to(x, 2) for x in total]

    matrixDonor0 = numpy.array(
        matrix_from_exa('../data extractors/new_donor1.exa'))

    c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt')
    print(c0.p, c1.p, c2.p)
    coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
    coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
    coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

    donor0_data = classify(matrixDonor0, 2)
    donor0_states = sequence_state_factory(donor0_data, 'donor0')

    post = State(DiscreteDistribution(equal_distribution), name='post')

    model = HiddenMarkovModel('coding to donor')

    model.add_state(coding_state0)
    model.add_state(coding_state1)
    model.add_state(coding_state2)

    add_sequence(model, donor0_states)

    model.add_state(post)

    model.add_transition(model.start, coding_state0, 1)

    model.add_transition(coding_state0, coding_state1, 0.6)
    model.add_transition(coding_state0, donor0_states[0], 0.4)

    model.add_transition(coding_state1, coding_state2, 0.6)
    model.add_transition(coding_state1, donor0_states[0], 0.4)

    model.add_transition(coding_state2, coding_state0, 0.6)
    model.add_transition(coding_state2, donor0_states[0], 0.4)

    model.add_transition(donor0_states[-1], post, 1)

    model.add_transition(post, post, 0.9)
    model.add_transition(post, model.end, 0.1)

    model.bake()
    test_model(model)

    model.fit(converted_total,
              transition_pseudocount=1,
              emission_pseudocount=1,
              verbose=True)

    test_model(model)

    with open('partial_model_coding_to_donor_model0.json', 'w') as out:
        out.write(model.to_json())
Ejemplo n.º 17
0
def crop_status_hmm_model(nn_pobability_matrix, timeseries_steps,
                          n_observed_classes):
    # 0            1       2          3          4          5
    ['emergence', 'growth', 'flowers', 'senescing', 'senesced', 'no_crop']

    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    d5 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=5,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_emerge = State(d0, name='emergence')
    s1_growth = State(d1, name='growth')
    s2_fls = State(d2, name='flowers')
    s3_sencing = State(d3, name='senescing')
    s4_senced = State(d4, name='senesced')
    s5_none = State(d5, name='no_crop')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(
        s0_emerge,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [90., 5., 0., 0., 0., 5.])
    model.add_transitions(
        s1_growth,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 90., 2.5, 2.5, 0., 5.])
    model.add_transitions(
        s2_fls, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 90., 5., 0., 5.])
    model.add_transitions(
        s3_sencing,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 0., 90., 5., 5.])
    model.add_transitions(
        s4_senced,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 0., 0., 90., 10.])
    model.add_transitions(
        s5_none,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [10., 0, 0., 0., 0., 90.])

    model.bake(verbose=False)

    return model
Ejemplo n.º 18
0
basic_model = HiddenMarkovModel(name="base-hmm-tagger")

states = {}

for tag in data.tagset:

    emission_prob = {}

    for word, number in emission_counts[tag].items():
        emission_prob[word] = number / tag_unigrams[tag]

    tag_distribution = DiscreteDistribution(emission_prob)
    state = State(tag_distribution, name=tag)
    states[tag] = state
    basic_model.add_state(state)

for tag in data.tagset:

    state = states[tag]
    start_probability = tag_starts[tag] / sum(tag_starts.values())
    basic_model.add_transition(basic_model.start, state, start_probability)
    end_probability = tag_ends[tag] / sum(tag_ends.values())
    basic_model.add_transition(state, basic_model.end, end_probability)

for tag1 in data.tagset:

    state_1 = states[tag1]

    for tag2 in data.tagset:
Ejemplo n.º 19
0
def dominant_cover_hmm_model(nn_pobability_matrix, timeseries_steps,
                             n_observed_classes):
    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_veg = State(d0, name='vegetation')
    s1_residue = State(d1, name='residue')
    s2_soil = State(d2, name='soil')
    s3_snow = State(d3, name='snow')
    s4_water = State(d4, name='water')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_veg, s1_residue, s2_soil, s3_snow, s4_water]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(s0_veg,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [95., 1.0, 1.0, 1.0, 1.0])
    model.add_transitions(s1_residue,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 95., 1.0, 1.0, 1.0])
    model.add_transitions(s2_soil,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 95., 1.0, 1.0])
    model.add_transitions(s3_snow,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 95., 1.0])
    model.add_transitions(s4_water,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 1.0, 95.])

    model.bake(verbose=False)

    return model
Ejemplo n.º 20
0
from model_maker_utils import add_sequence
from model_maker_utils import equal_distribution
from matrix_from_aln import matrix_from_exa

matrixAcceptor0 = numpy.array(matrix_from_exa('new_acceptor1.exa'))
acceptor0_data = classify(matrixAcceptor0, 2)

model = HiddenMarkovModel('intron_acceptor')

intron = State(DiscreteDistribution(
    calculator.intron_calculator('cuts_intron.txt').p),
               name='in')
acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0')
post = State(DiscreteDistribution(equal_distribution), name='post')

model.add_state(intron)
add_sequence(model, acceptor0_states)
model.add_state(post)

model.add_transition(model.start, intron, 1)
model.add_transition(intron, intron, 0.9)
model.add_transition(intron, acceptor0_states[0], 0.1)
model.add_transition(acceptor0_states[-1], post, 1)
model.add_transition(post, post, 0.5)
model.add_transition(post, model.end, 0.5)

model.bake()
test_l = 'GTAACACTGAATACTCAGGAACAATTAATGGATGGTAACATATGAGGAATATCTAGGAGGCACACCCTCTCTGGCATCTATGATGGGCCAAAAACCCGCATTCGCTTGGCCACAGTATGTGAAATATAACCCAGCTTAGACACAGGGTGCGGCAGCTGTCATGTTTCTCTGTGTGTGCCGAGTGTCATGTCTGCACCGTACAGGGATAGCTGAGTCTTCATCCTCCTCAGCTCCTATCTGTCCAGTGCAATGAACAGCAGCTGCTCTCTTCCTCTCTGGTTCCCATGGCAGCCATGCTCTGTTGCAGAGAGAACAGGATTGCATGTTCCCTCTTAATGGGAACGTCCATTTTGCTTTCTGGGACCACTCTCTTAATGCCGCCTGTCAAAACCAGCTAGGACTCCCTGGGGTCCAATCCCTCTGTGTTTAATCTTCTGTCATCTCTGTCCCACCTGGCTCATCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCATGACTCCAACC'
converted = converter_to(test_l.lower().replace(' ', '').replace('p', ''))

#logp, path = model.viterbi(converted)
Ejemplo n.º 21
0
                oks += 1
            else:
                not_ok += 1
        print(oks / (oks + not_ok))


back = State(DiscreteDistribution(equal_distribution), name='back')
back2 = State(DiscreteDistribution(equal_distribution), name='back2')

matrixZE = numpy.array(matrix_from_exa('../data extractors/starts.exa'))
start_states_data = classify(matrixZE, 2)
start_states = sequence_state_factory(start_states_data, 'start zone')

model = HiddenMarkovModel()

model.add_state(back)
model.add_state(back2)
add_sequence(model, start_states)

model.add_transition(model.start, back, 1)
model.add_transition(back, back, 0.55)
model.add_transition(back, start_states[0], 0.45)
model.add_transition(start_states[-1], back2, 1)
model.add_transition(back2, back2, 0.5)

model.bake()


def train_and_test():
    test(model)
Ejemplo n.º 22
0
tag_starts = starting_counts(data.training_set.Y)
# Calculate the count of each tag ending a sequence
tag_ends = ending_counts(data.training_set.Y)

basic_model = HiddenMarkovModel(name="base-hmm-tagger")

# Create states with emission probability distributions P(word | tag) and add to the model
tag_states = {}

for tag in data.training_set.tagset:
    tag_emissions = DiscreteDistribution({
        word: emission_counts[tag][word] / tag_unigrams[tag]
        for word in emission_counts[tag]
    })
    tag_states[tag] = State(tag_emissions, name=tag)
    basic_model.add_state(tag_states[tag])

# Add edges between states for the observed transition frequencies P(tag_i | tag_i-1)
for tag in data.training_set.tagset:
    basic_model.add_transition(basic_model.start, tag_states[tag],
                               tag_starts[tag] / tag_unigrams[tag])
    for tag1 in data.training_set.tagset:
        basic_model.add_transition(
            tag_states[tag], tag_states[tag1],
            tag_bigrams[(tag, tag1)] / tag_unigrams[tag])
    basic_model.add_transition(tag_states[tag], basic_model.end,
                               tag_ends[tag] / tag_unigrams[tag])

# finalize the model
basic_model.bake()
Ejemplo n.º 23
0
acceptor0_data = classify(matrixAcceptor0, 2)
no_coding_dist = calculator.intron_calculator('cuts_intron.txt').p

donor_states = sequence_state_factory(donor0_data, 'donor0')
acceptor_states = sequence_state_factory(acceptor0_data, 'acceptor0')
intron_spacer_states = spacer_states_maker(10, no_coding_dist, 'intron spacer')

utr_model = HiddenMarkovModel('utr_model')

# States
exon_state = State(DiscreteDistribution(calculator.utr_exon_5('mcutsa.txt').p),
                   name='utr exon')
intron_state = State(DiscreteDistribution(no_coding_dist), name='utr intron')

utr_model.add_model(promoter_model)
utr_model.add_state(exon_state)
utr_model.add_state(intron_state)

add_sequence(utr_model, donor_states)
add_sequence(utr_model, acceptor_states)
add_sequence(utr_model, intron_spacer_states)

utr_model.add_transition(utr_model.start, get_state(promoter_model, 'back'), 1)
utr_model.add_transition(get_state(promoter_model, 'inr7'), exon_state, 1)
utr_model.add_transition(get_state(promoter_model, 'no inr7'), exon_state, 1)

utr_model.add_transition(exon_state, exon_state, 0.7)
utr_model.add_transition(exon_state, donor_states[0], 0.2)
utr_model.add_transition(exon_state, utr_model.end, 0.1)

utr_model.add_transition(donor_states[-1], intron_state, 1)