Beispiel #1
0
def init_cycle_hmm(sequences, steps, states_per_step, model_id):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(n_states)
    trans = trans / trans.sum()
    for i, state in enumerate(states.flatten().tolist()):
        model.add_transition(model.start, state, trans[i])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> step0
    for j in range(states_per_step):
        trans = np.random.ranf(states_per_step + 1)
        trans = trans / trans.sum()
        # self transition
        model.add_transition(states[(steps - 1), j], states[(steps - 1), j],
                             trans[0])
        # out transition
        for x in range(states_per_step):
            model.add_transition(states[(steps - 1), j], states[0, x],
                                 trans[x + 1])
    model.bake()
    print 'Initialized Cyclic State HMM:', '[', \
        steps, states_per_step, ']'
    return model
def gaussian_hmm(n_states, lower, upper, variance, model_id):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    np.random.seed(int(time.time()))

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = np.linspace(lower, upper, n_states)
    states = []
    for i in range(n_states):
        dist = \
            NormalDistribution(x[i], variance)
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add uniform start probabilities
    start_prob = 1.0 / n_states
    start_probs = []
    for i in range(n_states):
        start_probs.append(start_prob + np.random.ranf())
    start_probs = np.array(start_probs)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add transition probabilities proportional to probability of generating
    # one state mean from another
    for state1 in states:
        transitions = []
        for other_state in states:
            transitions.append(np.exp(state1.distribution.log_probability(
                other_state.distribution.parameters[0])) + np.random.ranf())
        transitions = np.array(transitions)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Beispiel #3
0
def bake_model(tags_sequence, words_sequence):
    """
    'tags' are the time-demand labels that generate the emitted demand level.
    Demand level are represented by 'words'
    """
    # rdemand
    words = [x for x in chain(*words_sequence)]
    tag_unigrams = unigram_counts(words)
    tag_bigrams = bigram_counts(words)

    # Uniform distribution for starting and ending labels
    all_labels = list(set(words))
    tag_starts = starting_counts(all_labels)
    tag_ends = ending_counts(all_labels)

    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    # Emission count
    label_train = tags_sequence
    rdemand_train = words_sequence
    emission_count = pair_counts(rdemand_train, label_train)

    # States with emission probability distributions P(word | tag)
    states = []
    for rdemand, label_dict in emission_count.items():
        dist_tag = DiscreteDistribution({
            label: cn / tag_unigrams[rdemand]
            for label, cn in label_dict.items()
        })
        states.append(State(dist_tag, name=rdemand))

    basic_model.add_states(states)
    state_names = [s.name for s in states]
    state_index = {tag: num for num, tag in enumerate(state_names)}

    # Start transition
    total_start = sum(tag_starts.values())
    for tag, cn in tag_starts.items():
        # sname = state_index[tag]
        basic_model.add_transition(basic_model.start, states[state_index[tag]],
                                   cn / total_start)

    # End transition
    total_end = sum(tag_ends.values())
    for tag, cn in tag_ends.items():
        basic_model.add_transition(states[state_index[tag]], basic_model.end,
                                   cn / total_end)

    # Edges between states for the observed transition frequencies P(tag_i | tag_i-1)
    for key, value in tag_bigrams.items():
        basic_model.add_transition(states[state_index[key[0]]],
                                   states[state_index[key[1]]],
                                   value / tag_unigrams[key[0]])

    # Finalize the model
    basic_model.bake()

    return basic_model
Beispiel #4
0
def create_hidden_MarkovModel(e_df, q_df, start_p_dict):
    """
    Creates a Hidden Markov Model based on DataFrame
    @args:
        - e_df (pd.Dataframe): contains the emission probabilites
        - q_df (pd.Dataframe): contains the emission probabilites
    """
    model = HiddenMarkovModel(name="Example Model")

    '#1: Create a dict for each key in trans. df'
    model_dict = {}
    for key in q_df.keys().values:
        model_dict[key] = {}

    '#2: Create the states'
    for key in model_dict:
        '#2.1.Step Add teh emission prob. to each state, , P(observation | state)'
        emission_p = DiscreteDistribution(e_df[key].to_dict())
        sunny_state = State(emission_p, name=key)
        model_dict[key] = State(emission_p, name=key)
        model.add_state(model_dict[key])
        '#2.2.Step: Add the start probability for each state'
        model.add_transition(model.start, model_dict[key], start_p_dict[key])

    '#3.Step: Add the transition probability to each state'
    for key, item in q_df.to_dict("index").items():
        for item_name, value in item.items():
            print(key, " , ", item_name, ": ", value)
            tmp_origin = model_dict[key]
            tmp_destination = model_dict[item_name]
            model.add_transition(tmp_origin, tmp_destination,
                                 q_df.loc[key, item_name])
    # finally, call the .bake() method to finalize the model
    model.bake()

    return model
Beispiel #5
0
def train_hmm_tagger(data):
    # HMM
    # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger.
    #
    # - Add one state per tag
    #     - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$
    # - Add an edge from the starting state `basic_model.start` to each tag
    #     - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$
    # - Add an edge from each tag to the end state `basic_model.end`
    #     - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$
    # - Add an edge between _every_ pair of tags
    #     - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$
    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    state_dict = {}
    states = []
    emission_counts = pair_counts(*list(zip(
        *data.training_set.stream()))[::-1])
    for tag in emission_counts.keys():
        tag_count = tag_unigrams[tag]
        probs = {}
        for w in emission_counts[tag]:
            probs[w] = emission_counts[tag][w] / tag_count
        emission_p = DiscreteDistribution(probs)
        state = State(emission_p, name="" + tag)
        basic_model.add_state(state)
        state_dict[tag] = state

    for tag in tag_starts:
        basic_model.add_transition(basic_model.start, state_dict[tag],
                                   tag_starts[tag] / len(data.training_set.Y))
        basic_model.add_transition(state_dict[tag], basic_model.end,
                                   tag_ends[tag] / tag_unigrams[tag])

    for (tag1, tag2) in tag_bigrams:
        basic_model.add_transition(
            state_dict[tag1], state_dict[tag2],
            tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1])

    # finalize the model
    basic_model.bake()

    assert all(
        tag in set(s.name for s in basic_model.states)
        for tag in data.training_set.tagset
    ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags."
    assert basic_model.edge_count() == 168, (
        "Your network should have an edge from the start node to each state, one edge between every "
        +
        "pair of tags (states), and an edge from each state to the end node.")
    HTML(
        '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>'
    )
    return basic_model
Beispiel #6
0
def _initialize_new_hmm(hmm, new_states, new_transitions):

    new_hmm = HiddenMarkovModel()
    for state in new_states:
        if state not in (hmm.start, hmm.end):
            new_hmm.add_state(state)
    for source_state, target_state, probability in new_transitions:
        if source_state != hmm.start and target_state != hmm.end:
            new_hmm.add_transition(source_state, target_state, probability)
        elif source_state == hmm.start:
            new_hmm.add_transition(new_hmm.start, target_state, probability)
        elif target_state == hmm.end:
            new_hmm.add_transition(source_state, new_hmm.end, probability)

    new_hmm.bake()
    return new_hmm
Beispiel #7
0
def ghmm_model(states_labels: tuple,
               transitions: tuple,
               init_prob: tuple,
               end_prob: tuple,
               means: list,
               vars: list) -> HiddenMarkovModel:
    """

    :param states_labels:
    :param transitions:
    :param init_prob:
    :param end_prob:
    :param means:
    :param vars:
    :return:
    """
    hmm_model = HiddenMarkovModel()

    mix_num = len(vars[0])
    states = []
    for state_i, state in enumerate(states_labels):
        mixture = []
        for mix_i in range(mix_num):
            init_mean = means[state_i][mix_i]
            init_var = vars[state_i][mix_i]
            mixture.append(NormalDistribution(init_mean, init_var))
        states.append(State(GeneralMixtureModel(mixture), name=str(state_i)))
    hmm_model.add_states(*tuple(states))

    for row in range(len(states_labels)):
        for col in range(len(states_labels)):
            prob = transitions[row][col]
            if prob != 0.:
                hmm_model.add_transition(states[row], states[col], prob)
    for state_i, prob in enumerate(init_prob):
        if prob != 0.:
            hmm_model.add_transition(hmm_model.start, states[state_i], prob)
    for state_i, prob in enumerate(end_prob):
        if prob != 0.:
            hmm_model.add_transition(states[state_i], hmm_model.end, prob)

    hmm_model.bake()

    return hmm_model
Beispiel #8
0
def get_suffix_matcher_hmm(pattern):
    model = Model(name="Suffix Matcher HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })
    insert_states = []
    match_states = []
    delete_states = []
    hmm_name = 'suffix'
    for i in range(len(pattern) + 1):
        insert_states.append(
            State(insert_distribution, name='I%s_%s' % (i, hmm_name)))

    for i in range(len(pattern)):
        distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01})
        distribution_map[pattern[i]] = 0.97
        match_states.append(
            State(DiscreteDistribution(distribution_map),
                  name='M%s_%s' % (str(i + 1), hmm_name)))

    for i in range(len(pattern)):
        delete_states.append(
            State(None, name='D%s_%s' % (str(i + 1), hmm_name)))

    unit_start = State(None, name='suffix_start_%s' % hmm_name)
    unit_end = State(None, name='suffix_end_%s' % hmm_name)
    model.add_states(insert_states + match_states + delete_states +
                     [unit_start, unit_end])
    last = len(delete_states) - 1

    model.add_transition(model.start, unit_start, 1)

    model.add_transition(unit_end, model.end, 1)

    model.add_transition(unit_start, delete_states[0], 0.01)
    model.add_transition(unit_start, insert_states[0], 0.01)
    for i in range(len(pattern)):
        model.add_transition(unit_start, match_states[i], 0.98 / len(pattern))

    model.add_transition(insert_states[0], insert_states[0], 0.01)
    model.add_transition(insert_states[0], delete_states[0], 0.01)
    model.add_transition(insert_states[0], match_states[0], 0.98)

    model.add_transition(delete_states[last], unit_end, 0.99)
    model.add_transition(delete_states[last], insert_states[last + 1], 0.01)

    model.add_transition(match_states[last], unit_end, 0.99)
    model.add_transition(match_states[last], insert_states[last + 1], 0.01)

    model.add_transition(insert_states[last + 1], insert_states[last + 1],
                         0.01)
    model.add_transition(insert_states[last + 1], unit_end, 0.99)

    for i in range(0, len(pattern)):
        model.add_transition(match_states[i], insert_states[i + 1], 0.01)
        model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
        model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01)
        if i < len(pattern) - 1:
            model.add_transition(insert_states[i + 1], match_states[i + 1],
                                 0.98)
            model.add_transition(insert_states[i + 1], delete_states[i + 1],
                                 0.01)

            model.add_transition(match_states[i], match_states[i + 1], 0.98)
            model.add_transition(match_states[i], delete_states[i + 1], 0.01)

            model.add_transition(delete_states[i], delete_states[i + 1], 0.01)
            model.add_transition(delete_states[i], match_states[i + 1], 0.98)

    model.bake(merge=None)

    return model
Beispiel #9
0
def build_reference_repeat_finder_hmm(patterns, copies=1):
    pattern = patterns[0]
    model = Model(name="HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })

    last_end = None
    start_random_matches = State(insert_distribution,
                                 name='start_random_matches')
    end_random_matches = State(insert_distribution, name='end_random_matches')
    model.add_states([start_random_matches, end_random_matches])
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(pattern) + 1):
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(len(pattern)):
            distribution_map = dict({
                'A': 0.01,
                'C': 0.01,
                'G': 0.01,
                'T': 0.01
            })
            distribution_map[pattern[i]] = 0.97
            match_states.append(
                State(DiscreteDistribution(distribution_map),
                      name='M%s_%s' % (str(i + 1), repeat)))

        for i in range(len(pattern)):
            delete_states.append(
                State(None, name='D%s_%s' % (str(i + 1), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        last = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 0.5)
        else:
            model.add_transition(model.start, unit_start, 0.5)
            model.add_transition(model.start, start_random_matches, 0.5)
            model.add_transition(start_random_matches, unit_start, 0.5)
            model.add_transition(start_random_matches, start_random_matches,
                                 0.5)

        model.add_transition(unit_end, end_random_matches, 0.5)
        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 0.5)
            model.add_transition(end_random_matches, end_random_matches, 0.5)
            model.add_transition(end_random_matches, model.end, 0.5)

        model.add_transition(unit_start, match_states[0], 0.98)
        model.add_transition(unit_start, delete_states[0], 0.01)
        model.add_transition(unit_start, insert_states[0], 0.01)

        model.add_transition(insert_states[0], insert_states[0], 0.01)
        model.add_transition(insert_states[0], delete_states[0], 0.01)
        model.add_transition(insert_states[0], match_states[0], 0.98)

        model.add_transition(delete_states[last], unit_end, 0.99)
        model.add_transition(delete_states[last], insert_states[last + 1],
                             0.01)

        model.add_transition(match_states[last], unit_end, 0.99)
        model.add_transition(match_states[last], insert_states[last + 1], 0.01)

        model.add_transition(insert_states[last + 1], insert_states[last + 1],
                             0.01)
        model.add_transition(insert_states[last + 1], unit_end, 0.99)

        for i in range(0, len(pattern)):
            model.add_transition(match_states[i], insert_states[i + 1], 0.01)
            model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
            model.add_transition(insert_states[i + 1], insert_states[i + 1],
                                 0.01)
            if i < len(pattern) - 1:
                model.add_transition(insert_states[i + 1], match_states[i + 1],
                                     0.98)
                model.add_transition(insert_states[i + 1],
                                     delete_states[i + 1], 0.01)

                model.add_transition(match_states[i], match_states[i + 1],
                                     0.98)
                model.add_transition(match_states[i], delete_states[i + 1],
                                     0.01)

                model.add_transition(delete_states[i], delete_states[i + 1],
                                     0.01)
                model.add_transition(delete_states[i], match_states[i + 1],
                                     0.98)

        last_end = unit_end

    model.bake()
    if len(patterns) > 1:
        # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True)
        fit_patterns = [pattern * copies for pattern in patterns]
        model.fit(fit_patterns,
                  algorithm='viterbi',
                  transition_pseudocount=1,
                  use_pseudocount=True)

    return model
Beispiel #10
0
skf = StratifiedKFold(full_labels, n_folds=folds)

for train_index, test_index in skf:

    model = HMM(name="Gait")
    hmm_states = []

    for i in range(0, 2):
        # dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i]))
        dis = MGD.from_samples(class_data[i])
        st = State(dis, name=state_names[i])
        distros.append(dis)
        hmm_states.append(st)

    model.add_states(hmm_states)
    model.add_transition(model.start, hmm_states[0], 0.5)
    model.add_transition(model.start, hmm_states[1], 0.5)

    for i in range(0, 2):
        for j in range(0, 2):
            model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

    model.bake()
    rospy.logwarn("Baked model")
    print("TRAIN:", train_index, "TEST:", test_index)
    train_data = full_data[train_index]
    # print(len(train_data))
    train_class = full_labels[train_index]
    # print(len(train_class))
    test_data = full_data[test_index]
    # print(len(test_data))
"""

import random
from pomegranate import *
from pomegranate import HiddenMarkovModel as Model

random.seed(0)
model = Model(name="ExampleModel")
distribution = UniformDistribution(0.0, 1.0)
state = State(distribution, name="uniform")
state2 = State(NormalDistribution(0, 2), name="normal")
silent = State(None, name="silent")
model.add_state(state)
model.add_state(state2)

model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()

print sequence
print
print model.forward(sequence)[ len(sequence), model.end_index ]
def train_and_test():
    with open('../data extractors/exons_start_1.txt') as in_file:
        total = []
        for line in in_file:
            no_p_line = line.replace('P', '').lower().replace('\n', '')
            total.append(no_p_line)

    converted_total = [converter_to(x, 2) for x in total]

    matrixDonor0 = numpy.array(
        matrix_from_exa('../data extractors/new_donor1.exa'))

    c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt')
    print(c0.p, c1.p, c2.p)
    coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
    coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
    coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

    donor0_data = classify(matrixDonor0, 2)
    donor0_states = sequence_state_factory(donor0_data, 'donor0')

    post = State(DiscreteDistribution(equal_distribution), name='post')

    model = HiddenMarkovModel('coding to donor')

    model.add_state(coding_state0)
    model.add_state(coding_state1)
    model.add_state(coding_state2)

    add_sequence(model, donor0_states)

    model.add_state(post)

    model.add_transition(model.start, coding_state0, 1)

    model.add_transition(coding_state0, coding_state1, 0.6)
    model.add_transition(coding_state0, donor0_states[0], 0.4)

    model.add_transition(coding_state1, coding_state2, 0.6)
    model.add_transition(coding_state1, donor0_states[0], 0.4)

    model.add_transition(coding_state2, coding_state0, 0.6)
    model.add_transition(coding_state2, donor0_states[0], 0.4)

    model.add_transition(donor0_states[-1], post, 1)

    model.add_transition(post, post, 0.9)
    model.add_transition(post, model.end, 0.1)

    model.bake()
    test_model(model)

    model.fit(converted_total,
              transition_pseudocount=1,
              emission_pseudocount=1,
              verbose=True)

    test_model(model)

    with open('partial_model_coding_to_donor_model0.json', 'w') as out:
        out.write(model.to_json())
Beispiel #13
0
    distribution = {word: count/total for word, count in words_dict.items()}
    tag_emissions = DiscreteDistribution(distribution)
    tag_state = State(tag_emissions, name=tag)
    to_pass_states.append(tag_state)


basic_model.add_states()    
    

start_prob={}

for tag in tags:
    start_prob[tag]=starting_tag_count[tag]/tags_count[tag]

for tag_state in to_pass_states :
    basic_model.add_transition(basic_model.start,tag_state,start_prob[tag_state.name])    

end_prob={}

for tag in tags:
    end_prob[tag]=ending_tag_count[tag]/tags_count[tag]
for tag_state in to_pass_states :
    basic_model.add_transition(tag_state,basic_model.end,end_prob[tag_state.name])
    


transition_prob_pair={}

for key in tag_bigrams.keys():
    transition_prob_pair[key]=tag_bigrams.get(key)/tags_count[key[0]]
for tag_state in to_pass_states :
Beispiel #14
0

back = State(DiscreteDistribution(equal_distribution), name='back')
back2 = State(DiscreteDistribution(equal_distribution), name='back2')

matrixZE = numpy.array(matrix_from_exa('../data extractors/starts.exa'))
start_states_data = classify(matrixZE, 2)
start_states = sequence_state_factory(start_states_data, 'start zone')

model = HiddenMarkovModel()

model.add_state(back)
model.add_state(back2)
add_sequence(model, start_states)

model.add_transition(model.start, back, 1)
model.add_transition(back, back, 0.55)
model.add_transition(back, start_states[0], 0.45)
model.add_transition(start_states[-1], back2, 1)
model.add_transition(back2, back2, 0.5)

model.bake()


def train_and_test():
    test(model)

    lines = []
    with open('../data extractors/train_start2.exa') as fi:
        for line in fi:
            lines.append(converter_to(line.replace('\n', '')))
distros = []
hmm_states = []
state_names = ['ff', 'ho', 'sw', 'hs']
for i in range(0, n_classes):
    dis = MGD\
        (np.array(class_means[i]).flatten(),
         np.array(class_cov[i]))
    st = State(dis, name=state_names[i])
    distros.append(dis)
    hmm_states.append(st)
model = HMM(name="Gait")

model.add_states(hmm_states)
"""Initial transitions"""
for i in range(0, n_classes):
    model.add_transition(model.start, hmm_states[i], startprob[i])
"""Left-right model"""
for i in range(0, n_classes):
    for j in range(0, n_classes):
        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

model.bake()

# print (model.name)
rospy.logwarn("N. observations: " + str(model.d))
# print (model.edges)
rospy.logwarn("N. hidden states: " + str(model.silent_start))
# print model
"""Training"""
limit = int(len(ff) * (8 / 10.0))  # 80% of data to test, 20% to train
# seq = list([ff[:limit]])
"""

from pomegranate import *
from pomegranate import HiddenMarkovModel as Model
import random
import math

random.seed(0)

model = Model( name="Rainy-Sunny" )

# Emission probabilities
rainy = State( DiscreteDistribution({ 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }), name='Rainy' )
sunny = State( DiscreteDistribution({ 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 }), name='Sunny' )

model.add_transition( model.start, rainy, 0.6 )
model.add_transition( model.start, sunny, 0.4 )

# Transition matrix, with 0.05 subtracted from each probability to add to
# the probability of exiting the hmm
model.add_transition( rainy, rainy, 0.65 )
model.add_transition( rainy, sunny, 0.25 )
model.add_transition( sunny, rainy, 0.35 )
model.add_transition( sunny, sunny, 0.55 )

# Add transitions to the end of the model
model.add_transition( rainy, model.end, 0.1 )
model.add_transition( sunny, model.end, 0.1 )

# Finalize the model structure
model.bake( verbose=True )
def dominant_cover_hmm_model(nn_pobability_matrix, timeseries_steps,
                             n_observed_classes):
    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_veg = State(d0, name='vegetation')
    s1_residue = State(d1, name='residue')
    s2_soil = State(d2, name='soil')
    s3_snow = State(d3, name='snow')
    s4_water = State(d4, name='water')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_veg, s1_residue, s2_soil, s3_snow, s4_water]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(s0_veg,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [95., 1.0, 1.0, 1.0, 1.0])
    model.add_transitions(s1_residue,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 95., 1.0, 1.0, 1.0])
    model.add_transitions(s2_soil,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 95., 1.0, 1.0])
    model.add_transitions(s3_snow,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 95., 1.0])
    model.add_transitions(s4_water,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 1.0, 95.])

    model.bake(verbose=False)

    return model
Beispiel #18
0
bb = MultivariateGaussianDistribution.from_samples(X_22)

c = MultivariateGaussianDistribution.from_samples(X_3)

s1 = State(a, name="M1")
s11 = State(aa, name="M11")

s2 = State(b, name="M2")
s22 = State(bb, name="M22")

s3 = State(c, name="M3")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s11, s2, s22, s3)

hmm.add_transition(hmm.start, s1, 0.2)
hmm.add_transition(hmm.start, s11, 0.2)

hmm.add_transition(hmm.start, s2, 0.2)
hmm.add_transition(hmm.start, s22, 0.2)

hmm.add_transition(hmm.start, s3, 0.2)


hmm.add_transition(s1, s1, 0.92)
hmm.add_transition(s1, s11, 0.02)
hmm.add_transition(s1, s2, 0.02)
hmm.add_transition(s1, s22, 0.02)
hmm.add_transition(s1, s3, 0.02)

hmm.add_transition(s11, s1, 0.02)
Beispiel #19
0
def init_lr_hmm(sequences, steps, states_per_step,
                force_end=False, model_id='Left-Righ HMM', seed=None):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    force_end if we require sequence to end in end state
    """

    # seed random number generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(states_per_step)
    trans = trans / trans.sum()
    for j in range(states_per_step):
        model.add_transition(model.start, states[0, j], trans[j])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> end
    if force_end:
        for j in range(states_per_step):
            trans = np.random.ranf(2)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[(steps - 1), j],
                                 states[(steps - 1), j], trans[0])
            # end transition
            model.add_transition(states[(steps - 1), j], model.end, trans[1])

    model.bake()
    print 'Initialized Left-Right HMM:', model.name, '[', \
        steps, states_per_step, ']'
    return model
Beispiel #20
0
def init_gaussian_hmm(sequences, n_states, model_id, seed=None):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    """
    # make random transition probability matrix
    # scale each row to sum to 1
    trans = np.random.ranf((n_states, n_states))
    for i in range(n_states):
        trans[i, :] = trans[i, :] / trans[i, :].sum()

    # make distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    # x = math.min(3, x)

    dists = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dists.append(NormalDistribution.from_samples
                     (sequences[:, temp_assignment]))

    # random start probabilities
    # scale to sum to 1
    starts = np.random.ranf(n_states)
    starts = starts / sum(starts)

    model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id)
    """
    # seed random numer generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    states = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dist = \
            NormalDistribution.from_samples(sequences[:, temp_assignment])
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add random start probabilities
    start_probs = np.random.ranf(n_states)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add random transition probabilites out of each state
    for state1 in states:
        transitions = np.random.ranf(n_states)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Beispiel #21
0
s22222 = State(bbbbb, name="M22222")
s222222 = State(bbbbbb, name="M222222")

s3 = State(c, name="M3")
s33 = State(cc, name="M33")
s333 = State(ccc, name="M333")
s3333 = State(cccc, name="M3333")
s33333 = State(ccccc, name="M33333")
s333333 = State(cccccc, name="M333333")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s11, s111, s2, s22, s222, s3, s33, s333,
               s1111, s11111, s111111, s2222, s22222, s222222, s3333, s33333,
               s333333)

hmm.add_transition(hmm.start, s1, 1.)
hmm.add_transition(hmm.start, s11, 1.)
hmm.add_transition(hmm.start, s111, 1.)
hmm.add_transition(hmm.start, s2, 1.)
hmm.add_transition(hmm.start, s22, 1.)
hmm.add_transition(hmm.start, s222, 1.)
hmm.add_transition(hmm.start, s3, 1.)
hmm.add_transition(hmm.start, s33, 1.)
hmm.add_transition(hmm.start, s333, 1.)
hmm.add_transition(hmm.start, s1111, 1.)
hmm.add_transition(hmm.start, s11111, 1.)
hmm.add_transition(hmm.start, s111111, 1.)
hmm.add_transition(hmm.start, s2222, 1.)
hmm.add_transition(hmm.start, s22222, 1.)
hmm.add_transition(hmm.start, s222222, 1.)
hmm.add_transition(hmm.start, s3333, 1.)
Beispiel #22
0
    emission_prob = {}

    for word, number in emission_counts[tag].items():
        emission_prob[word] = number / tag_unigrams[tag]

    tag_distribution = DiscreteDistribution(emission_prob)
    state = State(tag_distribution, name=tag)
    states[tag] = state
    basic_model.add_state(state)

for tag in data.tagset:

    state = states[tag]
    start_probability = tag_starts[tag] / sum(tag_starts.values())
    basic_model.add_transition(basic_model.start, state, start_probability)
    end_probability = tag_ends[tag] / sum(tag_ends.values())
    basic_model.add_transition(state, basic_model.end, end_probability)

for tag1 in data.tagset:

    state_1 = states[tag1]

    for tag2 in data.tagset:

        state_2 = states[tag2]
        bigram = (tag1, tag2)
        transition_probability = tag_bigrams[bigram] / tag_unigrams[tag1]
        basic_model.add_transition(state_1, state_2, transition_probability)

basic_model.bake()
Beispiel #23
0
post = State(DiscreteDistribution(equal_distribution), name='post')

model = HiddenMarkovModel('coding_to_stop')

stop_data = classify(matrixStop, 2)
stop_states = sequence_state_factory(stop_data, 'stop')

model.add_state(coding_state0)
model.add_state(coding_state1)
model.add_state(coding_state2)

add_sequence(model, stop_states)

model.add_state(post)

model.add_transition(model.start, coding_state1, 1)
model.add_transition(coding_state0, coding_state1, 1)
model.add_transition(coding_state1, coding_state2, 1)
model.add_transition(coding_state2, coding_state0, 0.6)
model.add_transition(coding_state2, stop_states[0], 0.4)
model.add_transition(stop_states[-1], post, 1)
model.add_transition(post, post, 0.9)
model.add_transition(post, model.end, 0.1)

model.bake()

with open('../data extractors/exons_end_start_2.txt') as in_file:
    total = []
    for line in in_file:
        no_p_line = line.replace('P', '').replace('\n', '').lower()
        total.append(no_p_line)
    def build_dis_classifier(self):
        skf = StratifiedKFold(self.full_labels, n_folds=self.folds)
        classifier_array = []
        stats_array = []
        num_class = len(self.full_data[0])
        print (num_class)
        for cl in range(0, num_class):
            lel = -1
            tp_total = 0.0
            tn_total = 0.0
            fp_total = 0.0
            fn_total = 0.0
            tests = 0
            for train_index, test_index in skf:
                if lel > 0:
                    lel -= 1
                    continue
                stats = []
                distros = []
                hmm_states = []
                state_names = ['swing', 'stance']
                swings = 0
                stances = 0
                for i in range(0, 2):
                    dis = MGD.from_samples(self.class_data[i])
                    st = State(dis, name=state_names[i])
                    distros.append(dis)
                    hmm_states.append(st)

                model = HMM()
                print(model.states)
                model.add_states(hmm_states)
                model.add_transition(model.start, hmm_states[0], 0.5)
                model.add_transition(model.start, hmm_states[1], 0.5)
                model.add_transition(hmm_states[1], model.end, 0.000000000000000001)
                model.add_transition(hmm_states[0], model.end, 0.000000000000000001)

                for i in range(0, 2):
                    for j in range(0, 2):
                        model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j])
                model.bake()

                tp = 0.0
                tn = 0.0
                fp = 0.0
                fn = 0.0

                train_data = self.full_data[train_index, cl]
                train_class = self.full_labels[train_index, cl]
                test_data = self.full_data[test_index]
                test_class = self.full_labels[test_index]

                print(np.isfinite(train_data).all())
                print(np.isfinite(test_data).all())
                print(np.isnan(train_data.any()))
                print(np.isinf(train_data.any()))
                print(np.isnan(test_data.any()))
                print(np.isinf(test_data.any()))

                if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \
                        or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())):
                    rospy.logerr("NaN or Inf Detected")
                    exit()

                try:
                    rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests))
                    seq = np.array(train_data)
                    model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150)

                except ValueError:
                    rospy.logwarn("Something went wrong, exiting")
                    rospy.shutdown()
                    exit()

                seq = []
                if self.batch_test == 1:
                    s = 0
                    # for s in range(0, len(test_data)):
                    while s < len(test_data):
                        k = 0
                        seq_entry = []
                        while k < 20 and s < len(test_data):
                            seq_entry.append(test_data[s])
                            k += 1
                            s += 1
                        seq.append(seq_entry)
                else:
                    seq = np.array(test_data)

                if seq == [] or test_data == []:
                    rospy.logerr("Empty testing sequence")
                    continue

                log, path = model.viterbi(test_data)
                if (len(path) - 2) != len(test_data):
                    rospy.logerr(len(path))
                    rospy.logerr(path[0][1].name)
                    rospy.logerr(path[len(path) - 1][1].name)
                    rospy.logerr(len(test_data))
                    exit()

                tests += 1
                for i in range(0, len(path) - 2):
                    if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end':
                        if path[i + 1][1].name == 'swing':  # prediction is 0
                            swings += 1
                            if test_class[i] == 0:  # class is 0
                                tn += 1.0
                            elif test_class[i] == 1:
                                fn += 1.0  # class is 1

                        elif path[i + 1][1].name == 'stance':  # prediction is 1
                            stances += 1
                            if test_class[i] == 1:  # class is 1
                                tp += 1.0
                            elif test_class[i] == 0:  # class is 0
                                fp += 1.0
                print (swings)
                print (stances)
                if (tp + fn) != 0.0:
                    rospy.logwarn("Sensitivity : " + str(tp / (tp + fn)))
                    # sensitivity = tp / (tp + fn)
                else:
                    rospy.logwarn("Sensitivity : 0.0")
                    # sensitivity = 0.0
                if (tn + fp) != 0.0:
                    rospy.logwarn("Specificity : " + str(tn / (tn + fp)))
                    # specificity = tn_total / (tn_total + fp_total)
                else:
                    rospy.logwarn("Specificity : 0.0")
                    # specificity = 0.0
                if (tn + tp + fn + fp) != 0.0:
                    rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp)))
                    # accuracy = (tn + tp) / (tn + tp + fn + fp)
                else:
                    rospy.logwarn("Accuracy : 0.0")
                    # accuracy = 0.0

                tn_total += tn
                tp_total += tp
                fn_total += fn
                fp_total += fp

            tp_total /= tests
            tn_total /= tests
            fp_total /= tests
            fn_total /= tests
            rospy.logerr("TP :" + str(tp_total))
            rospy.logerr("TN :" + str(tn_total))
            rospy.logerr("FP :" + str(fp_total))
            rospy.logerr("FN :" + str(fn_total))
            rospy.logerr("Tests :" + str(tests))
            if (tp_total + fn_total) != 0.0:
                sensitivity = tp_total / (tp_total + fn_total)
            else:
                sensitivity = 0.0
            if (tn_total + fp_total) != 0.0:
                specificity = tn_total / (tn_total + fp_total)
            else:
                specificity = 0.0
            if (tn_total + tp_total + fn_total + fp_total) != 0.0:
                accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total)
            else:
                accuracy = 0.0

            rospy.logwarn("----------------------------------------------------------")
            rospy.logerr("Total accuracy: " + str(accuracy))
            rospy.logerr("Total sensitivity: " + str(sensitivity))
            rospy.logerr("Total specificity: " + str(specificity))
            stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests,
                     accuracy, sensitivity, specificity]
            rospy.logwarn("-------------------DONE-------------------------")
            classifier_array.append(model)
            stats_array.append(stats)

        pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb'))
        pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb'))
        scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
algorithms.
'''

from pomegranate import *
from pomegranate import HiddenMarkovModel as Model
import itertools as it
import numpy as np

# Define the states
s1 = State( NormalDistribution( 5, 2 ), name="S1" )
s2 = State( NormalDistribution( 15, 2 ), name="S2" )
s3 = State( NormalDistribution( 25, 2 ), name="S3 ")

# Define the transitions
model = Model( "infinite" )
model.add_transition( model.start, s1, 0.7 )
model.add_transition( model.start, s2, 0.2 )
model.add_transition( model.start, s3, 0.1 )
model.add_transition( s1, s1, 0.6 )
model.add_transition( s1, s2, 0.1 )
model.add_transition( s1, s3, 0.3 )
model.add_transition( s2, s1, 0.4 )
model.add_transition( s2, s2, 0.4 )
model.add_transition( s2, s3, 0.2 )
model.add_transition( s3, s1, 0.05 )
model.add_transition( s3, s2, 0.15 )
model.add_transition( s3, s3, 0.8 )
model.bake()

sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ]
Beispiel #26
0
algorithms.
'''

from pomegranate import *
from pomegranate import HiddenMarkovModel as Model
import itertools as it
import numpy as np

# Define the states
s1 = State(NormalDistribution(5, 2), name="S1")
s2 = State(NormalDistribution(15, 2), name="S2")
s3 = State(NormalDistribution(25, 2), name="S3 ")

# Define the transitions
model = Model("infinite")
model.add_transition(model.start, s1, 0.7)
model.add_transition(model.start, s2, 0.2)
model.add_transition(model.start, s3, 0.1)
model.add_transition(s1, s1, 0.6)
model.add_transition(s1, s2, 0.1)
model.add_transition(s1, s3, 0.3)
model.add_transition(s2, s1, 0.4)
model.add_transition(s2, s2, 0.4)
model.add_transition(s2, s3, 0.2)
model.add_transition(s3, s1, 0.05)
model.add_transition(s3, s2, 0.15)
model.add_transition(s3, s3, 0.8)
model.bake()

sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]
Beispiel #27
0
    p = {}
    for word in emission_counts[tag]:
        p[word] = emission_counts[tag][
            word] / tag_count  # P(word | tag) = C(tag | word)/C(tag), C = count
    emission_p = DiscreteDistribution(p)
    state = State(emission_p, name="" + tag)
    basic_model.add_state(state)
    s[tag] = state

# TODO: add edges between states for the observed transition frequencies P(tag_i | tag_i-1)

# Start & End Transitions
# Start - Number of senteces starting with tag over total number of sentences
# End - Number of senteces ending with tag over count of tag appereances
for tag in tag_starts:
    basic_model.add_transition(basic_model.start, s[tag],
                               tag_starts[tag] / len(data.training_set.Y))
    basic_model.add_transition(s[tag], basic_model.end,
                               tag_ends[tag] / tag_unigrams[tag])

for (tag1, tag2) in tag_bigrams:
    basic_model.add_transition(s[tag1], s[tag2],
                               tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1])

basic_model.bake()

assert all(tag in set(s.name for s in basic_model.states) for tag in data.training_set.tagset), \
       "Every state in your network should use the name of the associated tag, which must be one of the training set tags."
assert basic_model.edge_count() == 168, \
       ("Your network should have an edge from the start node to each state, one edge between every " +
        "pair of tags (states), and an edge from each state to the end node.")
Beispiel #28
0
            if fl[i] == 1:
                positive_data.append(fd[i])
            else:
                negative_data.append(fd[i])

        posdis = MGD.from_samples(positive_data)
        st = State(posdis, name='swing')
        distros.append(st)
        hmm_states.append(st)
        negdis = MGD.from_samples(negative_data)
        st2 = State(negdis, name='stance')
        distros.append(st2)
        hmm_states.append(st2)

        cl.add_states(hmm_states)
        cl.add_transition(cl.start, hmm_states[0], 0.5)
        cl.add_transition(cl.start, hmm_states[1], 0.5)

        for i in range(0, 2):
            for j in range(0, 2):
                cl.add_transition(hmm_states[i], hmm_states[j], t[i][j])
        cl.bake()

        f += 1
        train_data = fd[train_index]
        train_class = fl[train_index]
        test_data = fd[test_index]
        test_class = fl[test_index]
        seq = []
        if batch_training == 1:
            s = 0
# TODO: create a discrete distribution for the rainy emissions from the probability table
# above & use that distribution to create a state named Rainy
rainy_emissions = DiscreteDistribution({"yes": 0.8, "no": 0.2})
rainy_state = State(rainy_emissions, name="Rainy")

# add the states to the model
model.add_states(sunny_state, rainy_state)

assert rainy_emissions.probability("yes") == 0.8, "The director brings his umbrella with probability 0.8 on rainy days"
print("Looks good so farget_ipython().getoutput("")")


# create edges for each possible state transition in the model
# equal probability of a sequence starting on either a rainy or sunny day
model.add_transition(model.start, sunny_state, 0.5)
model.add_transition(model.start, rainy_state, 0.5)

# add sunny day transitions (we already know estimates of these probabilities
# from the problem statement)
model.add_transition(sunny_state, sunny_state, 0.8)  # 80% sunny->sunny
model.add_transition(sunny_state, rainy_state, 0.2)  # 20% sunny->rainy

# TODO: add rainy day transitions using the probabilities specified in the transition table
model.add_transition(rainy_state, sunny_state, 0.4)  # 40% rainy->sunny
model.add_transition(rainy_state, rainy_state, 0.6)  # 60% rainy->rainy

# finally, call the .bake() method to finalize the model
model.bake()

assert model.edge_count() == 6, "There should be two edges from model.start, two from Rainy, and two from Sunny"
Beispiel #30
0
    X_3 = X[y == 2]

else:
    X_1 = X[2000:4000]
    X_2 = X[400:800]
    X_3 = X[7000:8000]
a = MultivariateGaussianDistribution.from_samples(X_1)
b = MultivariateGaussianDistribution.from_samples(X_2)
c = MultivariateGaussianDistribution.from_samples(X_3)
s1 = State(a, name="M1")
s2 = State(b, name="M2")
s3 = State(c, name="M3")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s2, s3)
hmm.add_transition(hmm.start, s1, 0.34)
hmm.add_transition(hmm.start, s3, 0.33)
hmm.add_transition(hmm.start, s2, 0.33)

hmm.add_transition(s1, s1, 0.9)
hmm.add_transition(s1, s2, 0.05)
hmm.add_transition(s1, s3, 0.05)

hmm.add_transition(s2, s1, 0.05)
hmm.add_transition(s2, s3, 0.05)
hmm.add_transition(s2, s2, 0.9)

hmm.add_transition(s3, s3, 0.9)
hmm.add_transition(s3, s2, 0.05)
hmm.add_transition(s3, s1, 0.05)
hmm.bake()
Beispiel #31
0
add_sequence(coding_model, ze_states)

add_sequence(coding_model, ez_states_taa)
add_sequence(coding_model, ez_states_tga)
add_sequence(coding_model, ez_states_tag)

add_sequence(coding_model, donor0_states)
add_sequence(coding_model, donor1_states)
add_sequence(coding_model, donor2_states)

add_sequence(coding_model, acceptor0_states)
add_sequence(coding_model, acceptor1_states)
add_sequence(coding_model, acceptor2_states)

coding_model.add_transition(coding_model.start, back, 1.0)

coding_model.add_transition(back, back, 0.99)
coding_model.add_transition(back, ze_states[0], 0.01)

coding_model.add_transition(in0, in0, 0.99999999)
coding_model.add_transition(in0, in0_spacers[0], 0.00000001)

coding_model.add_transition(in1, in1, 0.99999999)
coding_model.add_transition(in1, in1_spacers[0], 0.00000001)

coding_model.add_transition(in2, in2, 0.99999999)
coding_model.add_transition(in2, in2_spacers[0], 0.00000001)

coding_model.add_transition(coding_state0, coding_state1, 1.0)
coding_model.add_transition(coding_state1, coding_state2, 1.0)
Beispiel #32
0
prob_emission = {}
states = {}
for tag, word_counts in emission_counts.items():
    prob_emission = {
        word: word_count / sum(word_counts.values())
        for word, word_count in word_counts.items()
    }
    states[tag] = State(DiscreteDistribution(prob_emission), name=tag)

unique_tags = list(data.training_set.tagset)
for tag in unique_tags:
    basic_model.add_states(states[tag])

# add the starting edges
for tag, tag_count in tag_starts.items():
    basic_model.add_transition(basic_model.start, states[tag],
                               tag_count / len(data.training_set.X))

# add the ending edges
for tag, tag_count in tag_ends.items():
    basic_model.add_transition(states[tag], basic_model.end,
                               tag_count / len(data.training_set.X))

# add the transitions
for bi_tag, tag_count in tag_bigrams.items():
    tag0 = bi_tag[0]
    tag1 = bi_tag[1]
    prob = tag_count / tag_unigrams[tag0]
    basic_model.add_transition(states[tag0], states[tag1], prob)

# finalize the model
basic_model.bake()
matrixAcceptor0 = numpy.array(matrix_from_exa('new_acceptor1.exa'))
acceptor0_data = classify(matrixAcceptor0, 2)

model = HiddenMarkovModel('intron_acceptor')

intron = State(DiscreteDistribution(
    calculator.intron_calculator('cuts_intron.txt').p),
               name='in')
acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0')
post = State(DiscreteDistribution(equal_distribution), name='post')

model.add_state(intron)
add_sequence(model, acceptor0_states)
model.add_state(post)

model.add_transition(model.start, intron, 1)
model.add_transition(intron, intron, 0.9)
model.add_transition(intron, acceptor0_states[0], 0.1)
model.add_transition(acceptor0_states[-1], post, 1)
model.add_transition(post, post, 0.5)
model.add_transition(post, model.end, 0.5)

model.bake()
test_l = 'GTAACACTGAATACTCAGGAACAATTAATGGATGGTAACATATGAGGAATATCTAGGAGGCACACCCTCTCTGGCATCTATGATGGGCCAAAAACCCGCATTCGCTTGGCCACAGTATGTGAAATATAACCCAGCTTAGACACAGGGTGCGGCAGCTGTCATGTTTCTCTGTGTGTGCCGAGTGTCATGTCTGCACCGTACAGGGATAGCTGAGTCTTCATCCTCCTCAGCTCCTATCTGTCCAGTGCAATGAACAGCAGCTGCTCTCTTCCTCTCTGGTTCCCATGGCAGCCATGCTCTGTTGCAGAGAGAACAGGATTGCATGTTCCCTCTTAATGGGAACGTCCATTTTGCTTTCTGGGACCACTCTCTTAATGCCGCCTGTCAAAACCAGCTAGGACTCCCTGGGGTCCAATCCCTCTGTGTTTAATCTTCTGTCATCTCTGTCCCACCTGGCTCATCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCATGACTCCAACC'
converted = converter_to(test_l.lower().replace(' ', '').replace('p', ''))

#logp, path = model.viterbi(converted)
#print(logp, [x[1].name + str(i) for i, x in enumerate(path)])

with open('new_intron_acceptor.txt') as in_file:
    total = []
Beispiel #34
0
    't': 0.25
}),
                   name='back')

fixed_state = State(DiscreteDistribution({
    'a': 0.45,
    'c': 0.45,
    'g': 0.05,
    't': 0.05
}),
                    name='fixed')

hmmodel.add_state(back_state)
hmmodel.add_state(fixed_state)

hmmodel.add_transition(hmmodel.start, back_state, 1)
hmmodel.add_transition(back_state, back_state, 0.9)
hmmodel.add_transition(back_state, fixed_state, 0.1)
hmmodel.add_transition(fixed_state, fixed_state, 0.9)
hmmodel.add_transition(fixed_state, back_state, 0.1)

hmmodel.bake()

seq = list('acgtacgtaaaaccccaaa')

lopg, path = hmmodel.viterbi(seq)

print([x[1].name for x in path])

print(hmmodel.to_json())
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # pathology = 0
    for pathology in range(len(dataset)):
        if pathology == 0:
            rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
            print "**Leave-one-out cross validation with HEALTHY subjects**"
        else:
            rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
            print "**Leave-one-out cross validation with PATIENTS**"
    # if True:
        for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            # full_lou_data = []
            # full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    # full_lou_data.append(d)
                    # full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            # lou_trial = 1
            # if True:
            for lou_trial in range(n_trials):
                rospy.logwarn("Trial {}".format(lou_trial+1))
                print("Trial {}".format(lou_trial+1))

                """Classifier initialization"""
                # distros = []
                hmm_states = []
                for state in range(n_classes):
                    dis = MGD\
                        (np.array(class_means[state]).flatten(),
                         np.array(class_cov[state]))
                    st = State(dis, name=state_names[state])
                    # distros.append(dis)
                    hmm_states.append(st)
                model = HMM(name="Gait")

                model.add_states(hmm_states)
                """Initial transitions"""
                for state in range(n_classes):
                    model.add_transition(model.start, hmm_states[state], startprob[state])
                """Left-right model"""
                for i in range(n_classes):
                    for j in range(n_classes):
                        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

                model.bake()

                """Create training and test data"""
                x_train = []
                x_test = []
                test_gyro_y = lou_data["gyro_y"][lou_trial]
                test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial]
                """Create test data with n-th trial of leave-one-out subject"""
                for sample in range(len(test_gyro_y)):
                    x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])

                """Create training data with n-1 trials of the rest of subjects (healthy group)"""
                for train_sub,train_data in dataset[0].iteritems():
                    count_trials = 0
                    if lou_sub != train_sub:
                    # if train_sub == "daniel":
                        for trial in range(n_trials):
                            if trial != lou_trial and count_trials < 1:
                                # rospy.logwarn(trial)
                                train_gyro_y = train_data["gyro_y"][trial]
                                train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                                for sample in range(len(train_gyro_y)):
                                    x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
                                count_trials += 1
                rospy.logwarn(len(x_train))
                x_train = list([x_train])

                """Training"""
                rospy.logwarn("Training HMM...")
                model.fit(x_train, algorithm='baum-welch', verbose=True)
                # model.fit(x_train, algorithm='viterbi', verbose='True')

                """Find most-likely sequence"""
                rospy.logwarn("Finding most-likely sequence...")
                logp, path = model.viterbi(x_test)
                # rospy.logwarn(len(path))
                # rospy.logwarn(len(lou_data["labels"][lou_trial]))

                class_labels = []
                for i in range(len(lou_data["labels"][lou_trial])):
                    path_phase = path[i][1].name
                    for state in range(n_classes):
                        if path_phase == state_names[state]:
                            class_labels.append(state)
                '''Saving classifier labels into csv file'''
                # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
                # rospy.logwarn("csv file with classifier labels was saved.")
                # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:]

                """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
                rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
                print "Mean time (MT) and Coefficient of Variance (CoV)"

                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
Beispiel #36
0
def main():
    rospy.init_node('hmm_trainer')
    param_vec = []
    rospack = rospkg.RosPack()
    if (len(sys.argv) < 2):
        print("Missing the prefix argument.")
        exit()
    else:
        prefix = sys.argv[1]
    use_measurements = np.zeros(3)

    # patient = rospy.get_param('~patient', 'None')
    # if prefix == 'None':
    #     rospy.logerr("No filename given ,exiting")
    #     exit()

    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    packpath = rospack.get_path('exo_gait_phase_det')
    datapath = packpath + "/log/mat_files/"
    rospy.logwarn("Patient: {}".format(prefix))
    print("Patient: {}".format(prefix))
    verbose = rospy.get_param('~verbose', False)
    """Print console output into text file"""
    # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w")
    """Data loading"""
    n_trials = 3
    data = [[] for x in range(0, n_trials)]
    for i in range(0, n_trials):
        data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) +
                               ".mat")

    accel_x = [[] for x in range(0, n_trials)]
    accel_y = [[] for x in range(0, n_trials)]
    accel_z = [[] for x in range(0, n_trials)]
    gyro_x = [[] for x in range(0, n_trials)]
    gyro_y = [[] for x in range(0, n_trials)]
    gyro_z = [[] for x in range(0, n_trials)]
    time_array = [[] for x in range(0, n_trials)]
    labels = [[] for x in range(0, n_trials)]
    fs_fsr = []
    for i in range(0, n_trials):
        # accel_x[i] = data[i]["accel_x"][0]
        # accel_y[i] = data[i]["accel_y"][0]
        # accel_z[i] = data[i]["accel_z"][0]
        gyro_x[i] = data[i]["gyro_x"][0]
        gyro_y[i] = data[i]["gyro_y"][0]
        gyro_z[i] = data[i]["gyro_z"][0]
        time_array[i] = data[i]["time"][0]
        labels[i] = data[i]["labels"][0]
        fs_fsr.append(data[i]["Fs_fsr"][0][0])
    """Feature extraction"""
    """First derivative"""
    # fder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_x[i][0])
    #     for j in range(1,len(gyro_x[i])-1):
    #         der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2)
    #     der.append(gyro_x[i][-1])
    #     fder_gyro_x.append(der)

    fder_gyro_y = []
    for i in range(n_trials):
        der = []
        der.append(gyro_y[i][0])
        for j in range(1, len(gyro_y[i]) - 1):
            der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2)
        der.append(gyro_y[i][-1])
        fder_gyro_y.append(der)

    # fder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_z[i][0])
    #     for j in range(1,len(gyro_z[i])-1):
    #         der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2)
    #     der.append(gyro_z[i][-1])
    #     fder_gyro_z.append(der)
    """Second derivative"""
    # sder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_x[i][0])
    #     for j in range(1,len(fder_gyro_x[i])-1):
    #         der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2)
    #     der.append(fder_gyro_x[i][-1])
    #     sder_gyro_x.append(der)
    #
    # sder_gyro_y = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_y[i][0])
    #     for j in range(1,len(fder_gyro_y[i])-1):
    #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
    #     der.append(fder_gyro_y[i][-1])
    #     sder_gyro_y.append(der)
    #
    # sder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_z[i][0])
    #     for j in range(1,len(fder_gyro_z[i])-1):
    #         der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2)
    #     der.append(fder_gyro_z[i][-1])
    #     sder_gyro_z.append(der)
    """Peak detector"""
    # window_wid = 15        # Window width should be odd
    # search_ratio = window_wid/2
    # pdet_gyro_x = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_x[i])):
    #         if j <= search_ratio:
    #             win = gyro_x[i][:j+search_ratio+1]
    #         elif j >= len(gyro_x[i])-search_ratio-1:
    #             win = gyro_x[i][j-search_ratio:]
    #         else:
    #             win = gyro_x[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_x[i][j]/max(win))
    #     pdet_gyro_x.append(pdet)

    # print len(gyro_x)
    # print len(pdet_gyro_x)
    # for i in range(3):
    #     print len(gyro_x[i])
    #     print len(pdet_gyro_x[i])

    # pdet_gyro_y = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_y[i])):
    #         if j <= search_ratio:
    #             win = gyro_y[i][:j+search_ratio+1]
    #         elif j >= len(gyro_y[i])-search_ratio-1:
    #             win = gyro_y[i][j-search_ratio:]
    #         else:
    #             win = gyro_y[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_y[i][j]/max(win))
    #     pdet_gyro_y.append(pdet)
    #
    # pdet_gyro_z = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_z[i])):
    #         if j <= search_ratio:
    #             win = gyro_z[i][:j+search_ratio+1]
    #         elif j >= len(gyro_z[i])-search_ratio-1:
    #             win = gyro_z[i][j-search_ratio:]
    #         else:
    #             win = gyro_z[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_z[i][j]/max(win))
    #     pdet_gyro_z.append(pdet)
    """Create training and test data"""
    ff = [[] for x in range(0, n_trials)]
    for j in range(0, n_trials):
        for k in range(0, len(time_array[j])):
            f_ = []
            # f_.append(accel_x[j][k])
            # f_.append(accel_y[j][k])
            # f_.append(accel_z[j][k])
            # f_.append(gyro_x[j][k])
            # f_.append(fder_gyro_x[j][k])
            # f_.append(sder_gyro_x[j][k])
            # f_.append(pdet_gyro_x[j][k])
            f_.append(gyro_y[j][k])
            f_.append(fder_gyro_y[j][k])
            # f_.append(sder_gyro_y[j][k])
            # f_.append(pdet_gyro_y[j][k])
            # f_.append(gyro_z[j][k])
            # f_.append(fder_gyro_z[j][k])
            # f_.append(sder_gyro_z[j][k])
            # f_.append(pdet_gyro_z[j][k])
            ff[j].append(f_)
    n_signals = len(ff[0][0])
    """cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    rospy.logwarn("""Intra-subject training""")
    print("""Intra-subject training""")
    # for leave_one_out in range(0, n_trials):
    for leave_one_out in range(1, 2):
        rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1))
        print("-------TRIAL {}-------".format(leave_one_out + 1))
        """Transition matrix"""
        t = np.zeros((4, 4))  # Transition matrix
        prev = -1
        for i in range(0, len(labels[leave_one_out])):
            # data[i]._replace(label = correct_mapping[data[i].label])
            if prev == -1:
                prev = labels[leave_one_out][i]
            t[prev][labels[leave_one_out][i]] += 1.0
            prev = labels[leave_one_out][i]
        t = normalize(t, axis=1, norm='l1')
        if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

        n_classes = 4
        class_data = [[] for x in range(n_classes)]
        full_data = []
        full_labels = []
        for i in range(len(ff[leave_one_out])):
            full_data.append(ff[leave_one_out][i])
            full_labels.append(labels[leave_one_out][i])
        # print full_data == ff[leave_one_out]
        # print full_labels == labels[leave_one_out]
        # print len(full_data) == len(full_labels)
        # for i in range(0,len(ff[leave_one_out-1])):
        #     full_data.append(ff[leave_one_out-1][i])
        #     full_labels.append(labels[leave_one_out-1][i])
        # for i in range(0,len(ff[(leave_one_out+1) % n_trials])):
        #     full_data.append(ff[(leave_one_out+1) % n_trials][i])
        #     full_labels.append(labels[(leave_one_out+1) % n_trials][i])

        # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials]))
        # print full_data
        # print len(full_data)
        # print full_labels
        # print len(full_labels)

        for i in range(0, len(full_data)):
            class_data[full_labels[i]].append(full_data[i])
        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(n_signals)]
                       for i in range(n_classes)]
        class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_cov = []
        classifiers = []

        for i in range(0, n_classes):
            # cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            cov = np.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(0, n_signals):
                class_means[i][j] = np.array(
                    class_data[i][:])[:, [j]].mean(axis=0)
                class_vars[i][j] = np.array(class_data[i][:])[:,
                                                              [j]].var(axis=0)
                class_std[i][j] = np.array(class_data[i][:])[:,
                                                             [j]].std(axis=0)
        print "\n" + str(class_cov) + "\n"
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(n_classes):
            dis = MGD\
                (np.array(class_means[i]).flatten(),
                 np.array(class_cov[i]))
            st = State(dis, name=state_names[i])
            distros.append(dis)
            hmm_states.append(st)
        model = HMM(name="Gait")

        model.add_states(hmm_states)
        """Initial transitions"""
        for i in range(0, n_classes):
            model.add_transition(model.start, hmm_states[i], startprob[i])
        """Left-right model"""
        for i in range(0, n_classes):
            for j in range(0, n_classes):
                model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

        model.bake()

        # print (model.name)
        # rospy.logwarn("N. observations: " + str(model.d))
        # print (model.edges)
        # rospy.logwarn("N. hidden states: " + str(model.silent_start))
        # print model
        """Training"""
        # limit = int(len(ff1)*(8/10.0))    # 80% of data to test, 20% to train
        # x_train = list([ff1[:limit]])
        # x_train = list([ff1,ff2])
        # x_train = list([ff2])
        x_train = []
        for i in range(0, len(ff[leave_one_out - 1])):
            x_train.append(ff[leave_one_out - 1][i])
        for i in range(0, len(ff[(leave_one_out + 1) % n_trials])):
            x_train.append(ff[(leave_one_out + 1) % n_trials][i])
        x_train = list([x_train])
        rospy.logwarn("Training...")
        model.fit(x_train, algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(seq, algorithm='viterbi', verbose='True')
        """Find most-likely sequence"""
        # logp, path = model.viterbi(ff[limit:])
        logp, path = model.viterbi(ff[leave_one_out])
        # print logp
        # print path
        class_labels = []
        for i in range(len(labels[leave_one_out])):
            path_phase = path[i][1].name
            for state in range(n_classes):
                if path_phase == state_names[state]:
                    class_labels.append(state)
        labels[leave_one_out] = list(labels[leave_one_out][1:])
        # Saving classifier labels into csv file
        # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s')
        # rospy.logwarn("csv file with classifier labels was saved.")

        sum = 0.0
        true_pos = 0.0
        false_pos = 0.0
        true_neg = 0.0
        false_neg = 0.0
        tol = 6e-2  # Tolerance window of 60 ms
        tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out])))
        print "FSR freq: " + str(fs_fsr[leave_one_out])
        print "Tolerance win: " + str(tol_window)
        # print tol_window
        # # print type(tol_window)
        # for i in range(0, len(labels[leave_one_out])):
        #     """Tolerance window"""
        #     if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window:
        #         # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
        #         # print curr_tol
        #         win = []
        #         for j in range(i-tol_window,i+tol_window+1):
        #             win.append(state_names[labels[leave_one_out][j]])
        #         if path[i][1].name in win:
        #             sum += 1.0
        #     else:
        #         if path[i][1].name == labels[leave_one_out][i]:
        #             sum += 1.0
        """Performance Evaluation"""
        rospy.logwarn("Calculating results...")
        time_error = [[] for x in range(n_classes)]
        for phase in range(n_classes):
            for i in range(len(labels[leave_one_out])):
                """Tolerance window"""
                if i >= tol_window and i < len(
                        labels[leave_one_out]) - tol_window:
                    # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
                    # print curr_tol
                    win = []
                    for j in range(i - tol_window, i + tol_window + 1):
                        win.append(labels[leave_one_out][j])
                    """Calculate time error with true positives"""
                    if class_labels[i] == phase:
                        if class_labels[i] in win:
                            for k in range(len(win)):
                                if win[k] == phase:
                                    time_error[phase].append(
                                        (k - tol_window) /
                                        fs_fsr[leave_one_out])
                                    break
                            true_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_pos"
                        else:
                            false_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_pos"
                    else:
                        if phase != labels[leave_one_out][i]:
                            # if phase not in win:
                            true_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_neg"
                        else:
                            false_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_neg"
                else:
                    if class_labels[i] == phase:
                        if class_labels[i] == labels[leave_one_out][i]:
                            true_pos += 1.0
                        else:
                            false_pos += 1.0
                    else:
                        if phase != labels[leave_one_out][i]:
                            true_neg += 1.0
                        else:
                            false_neg += 1.0

        rospy.logwarn("Timing error")
        print("Timing error")
        for phase in range(n_classes):
            rospy.logwarn("(" + state_names[phase] + ")")
            print "(" + state_names[phase] + ")"
            if len(time_error[phase]) > 0:
                rospy.logwarn(
                    str(np.mean(time_error[phase])) + " + " +
                    str(np.std(time_error[phase])))
                print str(np.mean(time_error[phase])) + " + " + str(
                    np.std(time_error[phase]))
            else:
                rospy.logwarn("0.06 + 0")
                print "0.06 + 0"
        """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
        rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
        print("Mean time (MT) and Coefficient of Variance (CoV)")
        n_group = 0
        for label_group in [class_labels, labels[leave_one_out]]:
            if n_group == 0:
                rospy.logwarn("Results for HMM:")
                print("Results for HMM:")
            else:
                rospy.logwarn("Results for FSR:")
                print("Results for FSR:")
            curr_label = -1
            count = 0
            n_phases = 0
            stride_samples = 0
            phases_time = [[] for x in range(n_classes)]
            stride_time = []
            for label in label_group:
                # for label in class_labels:
                if curr_label != label:
                    n_phases += 1
                    stride_samples += count
                    if label == 0:  # Gait start: HS
                        if n_phases == 4:  # If a whole gait cycle has past
                            stride_time.append(stride_samples /
                                               fs_fsr[leave_one_out])
                        n_phases = 0
                        stride_samples = 0
                    phases_time[label - 1].append(count /
                                                  fs_fsr[leave_one_out])
                    curr_label = label
                    count = 1
                else:
                    count += 1.0
            for phase in range(n_classes):
                mean_time = np.mean(phases_time[phase])
                phase_std = np.std(phases_time[phase])
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                              str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
                print("CoV: " + str(phase_std / mean_time * 100.0))
            mean_time = np.mean(stride_time)
            phase_std = np.std(stride_time)
            rospy.logwarn("(Stride)")
            print "(Stride)"
            rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                          str(phase_std))
            print "Mean time: " + str(mean_time) + " + " + str(phase_std)
            rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
            print("CoV: " + str(phase_std / mean_time * 100.0))
            n_group += 1
        """Accuracy"""
        # acc = sum/len(labels[leave_one_out])
        if (true_neg + true_pos + false_neg + false_pos) != 0.0:
            acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg +
                                           false_pos)
        else:
            acc = 0.0
        """Sensitivity or True Positive Rate"""
        if true_pos + false_neg != 0:
            tpr = true_pos / (true_pos + false_neg)
        else:
            tpr = 0.0
        """Specificity or True Negative Rate"""
        if false_pos + true_neg != 0:
            tnr = true_neg / (false_pos + true_neg)
        else:
            tnr = 0.0
        # rospy.logwarn("Accuracy: {}%".format(acc*100))
        rospy.logwarn("Accuracy: {}%".format(acc * 100.0))
        # print("Accuracy: {}%".format(acc*100.0))
        rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0))
        # print("Sensitivity: {}%".format(tpr*100.0))
        rospy.logwarn("Specificity: {}%".format(tnr * 100.0))
        # print("Specificity: {}%".format(tnr*100.0))
        """Goodness index"""
        G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2)
        if G <= 0.25:
            rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
            # print("Optimum classifier (G = {} <= 0.25)".format(G))
        elif G > 0.25 and G <= 0.7:
            rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            # print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
        elif G == 0.7:
            rospy.logwarn("Random classifier (G = 0.7)")
            # print("Random classifier (G = 0.7)")
        else:
            rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
Beispiel #37
0
s1 = State(a, name="M1")
s11 = State(aa, name="M11")
s111 = State(aaa, name="M111")

s2 = State(b, name="M2")
s22 = State(bb, name="M22")
s222 = State(bbb, name="M222")

s3 = State(c, name="M3")
s33 = State(cc, name="M33")
s333 = State(ccc, name="M333")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s11, s111, s2, s22, s222, s3, s33, s333)

hmm.add_transition(hmm.start, s1, 0.12)
hmm.add_transition(hmm.start, s11, 0.11)
hmm.add_transition(hmm.start, s111, 0.11)
hmm.add_transition(hmm.start, s2, 0.11)
hmm.add_transition(hmm.start, s22, 0.11)
hmm.add_transition(hmm.start, s222, 0.11)
hmm.add_transition(hmm.start, s3, 0.11)
hmm.add_transition(hmm.start, s33, 0.11)
hmm.add_transition(hmm.start, s333, 0.11)


hmm.add_transition(s1, s1, 0.92)
hmm.add_transition(s1, s11, 0.01)
hmm.add_transition(s1, s111, 0.01)
hmm.add_transition(s1, s2, 0.01)
hmm.add_transition(s1, s22, 0.01)
Beispiel #38
0
    def _segment(self, arr, components=2):

        nonzero = arr[arr > 0]
        idx = self.hampel_filter(np.log2(nonzero))
        filtered = nonzero[idx]

        log_gmm = self.get_states(np.log2(filtered))
        log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_
        ln_gmm = self.get_states(filtered) # to improve the sensitivity
        ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_
        if (len(log_means) == 1):
            means, probs = ln_means, ln_probs
            scale = 'linear'
        else:
            means, probs = log_means, log_probs
            scale = 'log'

        logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale))
        model = HiddenMarkovModel()
        # GMM emissions
        dists = []
        for m in means:
            tmp = []
            for i in range(components):
                e = m + (-1)**i * ((i+1)//2) * 0.5
                s = 0.5
                tmp.append(NormalDistribution(e, s))
            mixture = State(GeneralMixtureModel(tmp), name=str(m))
            dists.append(mixture)
        model.add_states(*tuple(dists))
        # transition matrix
        for i in range(len(means)):
            for j in range(len(means)):
                if i==j:
                    model.add_transition(dists[i], dists[j], 0.8)
                else:
                    model.add_transition(dists[i], dists[j], 0.2/(len(means)-1))
        
        # starts and ends
        for i in range(len(means)):
            model.add_transition(model.start, dists[i], probs[i])
        
        model.bake()

        # training sequences
        tmp = np.zeros(nonzero.size)
        tmp[idx] = filtered
        newarr = np.zeros(arr.size)
        newarr[arr > 0] = tmp

        if len(means) > 1:
            model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs,
                    max_iterations=5000, stop_threshold=2e-4)
            
            queue = newarr[newarr > 0]
            
            if scale=='log':
                seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]]
            else:
                seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]]
            seg = self.assign_cnv(queue, seq)
            
            predicted = np.zeros(newarr.size)
            predicted[newarr > 0] = seg
            seg = self.call_intervals(predicted)
        else:
            seg = [(0, newarr.size)]
        
        return newarr, seg, scale
Beispiel #39
0
def get_constant_number_of_repeats_matcher_hmm(patterns, copies):
    model = Model(name="Repeating Pattern Matcher HMM Model")

    transitions, emissions = build_profile_hmm_for_repeats(
        patterns, settings.MAX_ERROR_RATE)
    matches = [m for m in emissions.keys() if m.startswith('M')]

    last_end = None
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(matches) + 1):
            insert_distribution = DiscreteDistribution(emissions['I%s' % i])
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(1, len(matches) + 1):
            match_distribution = DiscreteDistribution(emissions['M%s' % i])
            match_states.append(
                State(match_distribution, name='M%s_%s' % (str(i), repeat)))

        for i in range(1, len(matches) + 1):
            delete_states.append(State(None, name='D%s_%s' % (str(i), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        n = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 1)
        else:
            model.add_transition(model.start, unit_start, 1)

        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 1)

        model.add_transition(unit_start, match_states[0],
                             transitions['unit_start']['M1'])
        model.add_transition(unit_start, delete_states[0],
                             transitions['unit_start']['D1'])
        model.add_transition(unit_start, insert_states[0],
                             transitions['unit_start']['I0'])

        model.add_transition(insert_states[0], insert_states[0],
                             transitions['I0']['I0'])
        model.add_transition(insert_states[0], delete_states[0],
                             transitions['I0']['D1'])
        model.add_transition(insert_states[0], match_states[0],
                             transitions['I0']['M1'])

        model.add_transition(delete_states[n], unit_end,
                             transitions['D%s' % (n + 1)]['unit_end'])
        model.add_transition(delete_states[n], insert_states[n + 1],
                             transitions['D%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(match_states[n], unit_end,
                             transitions['M%s' % (n + 1)]['unit_end'])
        model.add_transition(match_states[n], insert_states[n + 1],
                             transitions['M%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(insert_states[n + 1], insert_states[n + 1],
                             transitions['I%s' % (n + 1)]['I%s' % (n + 1)])
        model.add_transition(insert_states[n + 1], unit_end,
                             transitions['I%s' % (n + 1)]['unit_end'])

        for i in range(1, len(matches) + 1):
            model.add_transition(match_states[i - 1], insert_states[i],
                                 transitions['M%s' % i]['I%s' % i])
            model.add_transition(delete_states[i - 1], insert_states[i],
                                 transitions['D%s' % i]['I%s' % i])
            model.add_transition(insert_states[i], insert_states[i],
                                 transitions['I%s' % i]['I%s' % i])
            if i < len(matches):
                model.add_transition(insert_states[i], match_states[i],
                                     transitions['I%s' % i]['M%s' % (i + 1)])
                model.add_transition(insert_states[i], delete_states[i],
                                     transitions['I%s' % i]['D%s' % (i + 1)])

                model.add_transition(match_states[i - 1], match_states[i],
                                     transitions['M%s' % i]['M%s' % (i + 1)])
                model.add_transition(match_states[i - 1], delete_states[i],
                                     transitions['M%s' % i]['D%s' % (i + 1)])

                model.add_transition(delete_states[i - 1], match_states[i],
                                     transitions['D%s' % i]['M%s' % (i + 1)])
                model.add_transition(delete_states[i - 1], delete_states[i],
                                     transitions['D%s' % i]['D%s' % (i + 1)])

        last_end = unit_end

    model.bake(merge=None)
    return model
Beispiel #40
0
distros = []
hmm_states = []
state_names = ['ff', 'ho', 'sw', 'hs']
hmm_states = []
for i in range(0, n_classes):
    dis = MGD\
        (np.array(class_means[i]).flatten(),
         np.array(class_cov[i]))
    st = State(dis, name=state_names[i])
    distros.append(dis)
    hmm_states.append(st)
model = HMM(name="Gait")
print(t)

model.add_states(hmm_states)
model.add_transition(model.start, hmm_states[0], 1.00)
model.add_transition(model.start, hmm_states[1], 0.0)
model.add_transition(model.start, hmm_states[2], 0.0)
model.add_transition(model.start, hmm_states[3], 0.0)

for i in range(0, n_classes):
    for j in range(0, n_classes):
        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

model.bake()

seq = list([ff[:limit]])
print(model.name)
print(model.d)
print(model.edges)
print(model.silent_start)
Beispiel #41
0
basic_model = HiddenMarkovModel(name="base-hmm-tagger")

# Create states with emission probability distributions P(word | tag) and add to the model
tag_states = {}

for tag in data.training_set.tagset:
    tag_emissions = DiscreteDistribution({
        word: emission_counts[tag][word] / tag_unigrams[tag]
        for word in emission_counts[tag]
    })
    tag_states[tag] = State(tag_emissions, name=tag)
    basic_model.add_state(tag_states[tag])

# Add edges between states for the observed transition frequencies P(tag_i | tag_i-1)
for tag in data.training_set.tagset:
    basic_model.add_transition(basic_model.start, tag_states[tag],
                               tag_starts[tag] / tag_unigrams[tag])
    for tag1 in data.training_set.tagset:
        basic_model.add_transition(
            tag_states[tag], tag_states[tag1],
            tag_bigrams[(tag, tag1)] / tag_unigrams[tag])
    basic_model.add_transition(tag_states[tag], basic_model.end,
                               tag_ends[tag] / tag_unigrams[tag])

# finalize the model
basic_model.bake()

# Evaluate the accuracy of HMM tagger on the training and test corp
hmm_training_acc = accuracy(data.training_set.X, data.training_set.Y,
                            basic_model)
print("training accuracy basic hmm model: {:.2f}%".format(100 *
                                                          hmm_training_acc))
Beispiel #42
0
    X_1 = X[2000:4000]
    X_11 = X[2000:3000]
    X_2 = X[400:800]
    X_3 = X[7000:8000]
a = MultivariateGaussianDistribution.from_samples(X_1)
aa = MultivariateGaussianDistribution.from_samples(X_11)
b = MultivariateGaussianDistribution.from_samples(X_2)
c = MultivariateGaussianDistribution.from_samples(X_3)
s1 = State(a, name="M1")
s11 = State(aa, name="M11")
s2 = State(b, name="M2")
s3 = State(c, name="M3")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s11, s2, s3)
hmm.add_transition(hmm.start, s1, 0.25)
hmm.add_transition(hmm.start, s3, 0.25)
hmm.add_transition(hmm.start, s11, 0.25)
hmm.add_transition(hmm.start, s2, 0.25)

hmm.add_transition(s1, s1, 0.91)
hmm.add_transition(s1, s11, 0.03)
hmm.add_transition(s1, s2, 0.03)
hmm.add_transition(s1, s3, 0.03)

hmm.add_transition(s11, s11, 0.91)
hmm.add_transition(s11, s1, 0.03)
hmm.add_transition(s11, s2, 0.03)
hmm.add_transition(s11, s3, 0.03)

hmm.add_transition(s2, s1, 0.03)