def init_cycle_hmm(sequences, steps, states_per_step, model_id): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(n_states) trans = trans / trans.sum() for i, state in enumerate(states.flatten().tolist()): model.add_transition(model.start, state, trans[i]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> step0 for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[(steps - 1), j], states[0, x], trans[x + 1]) model.bake() print 'Initialized Cyclic State HMM:', '[', \ steps, states_per_step, ']' return model
def gaussian_hmm(n_states, lower, upper, variance, model_id): """ insantiate a model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ np.random.seed(int(time.time())) model = HiddenMarkovModel(model_id) # make states with distrobutions from random subsets of timepoints x = np.linspace(lower, upper, n_states) states = [] for i in range(n_states): dist = \ NormalDistribution(x[i], variance) states.append(State(dist, name=str(i))) model.add_states(states) # add uniform start probabilities start_prob = 1.0 / n_states start_probs = [] for i in range(n_states): start_probs.append(start_prob + np.random.ranf()) start_probs = np.array(start_probs) start_probs = start_probs / start_probs.sum() for i, state in enumerate(states): model.add_transition(model.start, state, start_probs[i]) # add transition probabilities proportional to probability of generating # one state mean from another for state1 in states: transitions = [] for other_state in states: transitions.append(np.exp(state1.distribution.log_probability( other_state.distribution.parameters[0])) + np.random.ranf()) transitions = np.array(transitions) transitions = transitions / transitions.sum() for i, state2 in enumerate(states): model.add_transition(state1, state2, transitions[i]) model.bake() print 'Initialized HMM: ', model.name return model
def bake_model(tags_sequence, words_sequence): """ 'tags' are the time-demand labels that generate the emitted demand level. Demand level are represented by 'words' """ # rdemand words = [x for x in chain(*words_sequence)] tag_unigrams = unigram_counts(words) tag_bigrams = bigram_counts(words) # Uniform distribution for starting and ending labels all_labels = list(set(words)) tag_starts = starting_counts(all_labels) tag_ends = ending_counts(all_labels) basic_model = HiddenMarkovModel(name="base-hmm-tagger") # Emission count label_train = tags_sequence rdemand_train = words_sequence emission_count = pair_counts(rdemand_train, label_train) # States with emission probability distributions P(word | tag) states = [] for rdemand, label_dict in emission_count.items(): dist_tag = DiscreteDistribution({ label: cn / tag_unigrams[rdemand] for label, cn in label_dict.items() }) states.append(State(dist_tag, name=rdemand)) basic_model.add_states(states) state_names = [s.name for s in states] state_index = {tag: num for num, tag in enumerate(state_names)} # Start transition total_start = sum(tag_starts.values()) for tag, cn in tag_starts.items(): # sname = state_index[tag] basic_model.add_transition(basic_model.start, states[state_index[tag]], cn / total_start) # End transition total_end = sum(tag_ends.values()) for tag, cn in tag_ends.items(): basic_model.add_transition(states[state_index[tag]], basic_model.end, cn / total_end) # Edges between states for the observed transition frequencies P(tag_i | tag_i-1) for key, value in tag_bigrams.items(): basic_model.add_transition(states[state_index[key[0]]], states[state_index[key[1]]], value / tag_unigrams[key[0]]) # Finalize the model basic_model.bake() return basic_model
def create_hidden_MarkovModel(e_df, q_df, start_p_dict): """ Creates a Hidden Markov Model based on DataFrame @args: - e_df (pd.Dataframe): contains the emission probabilites - q_df (pd.Dataframe): contains the emission probabilites """ model = HiddenMarkovModel(name="Example Model") '#1: Create a dict for each key in trans. df' model_dict = {} for key in q_df.keys().values: model_dict[key] = {} '#2: Create the states' for key in model_dict: '#2.1.Step Add teh emission prob. to each state, , P(observation | state)' emission_p = DiscreteDistribution(e_df[key].to_dict()) sunny_state = State(emission_p, name=key) model_dict[key] = State(emission_p, name=key) model.add_state(model_dict[key]) '#2.2.Step: Add the start probability for each state' model.add_transition(model.start, model_dict[key], start_p_dict[key]) '#3.Step: Add the transition probability to each state' for key, item in q_df.to_dict("index").items(): for item_name, value in item.items(): print(key, " , ", item_name, ": ", value) tmp_origin = model_dict[key] tmp_destination = model_dict[item_name] model.add_transition(tmp_origin, tmp_destination, q_df.loc[key, item_name]) # finally, call the .bake() method to finalize the model model.bake() return model
def train_hmm_tagger(data): # HMM # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger. # # - Add one state per tag # - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$ # - Add an edge from the starting state `basic_model.start` to each tag # - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$ # - Add an edge from each tag to the end state `basic_model.end` # - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$ # - Add an edge between _every_ pair of tags # - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$ basic_model = HiddenMarkovModel(name="base-hmm-tagger") state_dict = {} states = [] emission_counts = pair_counts(*list(zip( *data.training_set.stream()))[::-1]) for tag in emission_counts.keys(): tag_count = tag_unigrams[tag] probs = {} for w in emission_counts[tag]: probs[w] = emission_counts[tag][w] / tag_count emission_p = DiscreteDistribution(probs) state = State(emission_p, name="" + tag) basic_model.add_state(state) state_dict[tag] = state for tag in tag_starts: basic_model.add_transition(basic_model.start, state_dict[tag], tag_starts[tag] / len(data.training_set.Y)) basic_model.add_transition(state_dict[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) for (tag1, tag2) in tag_bigrams: basic_model.add_transition( state_dict[tag1], state_dict[tag2], tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1]) # finalize the model basic_model.bake() assert all( tag in set(s.name for s in basic_model.states) for tag in data.training_set.tagset ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags." assert basic_model.edge_count() == 168, ( "Your network should have an edge from the start node to each state, one edge between every " + "pair of tags (states), and an edge from each state to the end node.") HTML( '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>' ) return basic_model
def _initialize_new_hmm(hmm, new_states, new_transitions): new_hmm = HiddenMarkovModel() for state in new_states: if state not in (hmm.start, hmm.end): new_hmm.add_state(state) for source_state, target_state, probability in new_transitions: if source_state != hmm.start and target_state != hmm.end: new_hmm.add_transition(source_state, target_state, probability) elif source_state == hmm.start: new_hmm.add_transition(new_hmm.start, target_state, probability) elif target_state == hmm.end: new_hmm.add_transition(source_state, new_hmm.end, probability) new_hmm.bake() return new_hmm
def ghmm_model(states_labels: tuple, transitions: tuple, init_prob: tuple, end_prob: tuple, means: list, vars: list) -> HiddenMarkovModel: """ :param states_labels: :param transitions: :param init_prob: :param end_prob: :param means: :param vars: :return: """ hmm_model = HiddenMarkovModel() mix_num = len(vars[0]) states = [] for state_i, state in enumerate(states_labels): mixture = [] for mix_i in range(mix_num): init_mean = means[state_i][mix_i] init_var = vars[state_i][mix_i] mixture.append(NormalDistribution(init_mean, init_var)) states.append(State(GeneralMixtureModel(mixture), name=str(state_i))) hmm_model.add_states(*tuple(states)) for row in range(len(states_labels)): for col in range(len(states_labels)): prob = transitions[row][col] if prob != 0.: hmm_model.add_transition(states[row], states[col], prob) for state_i, prob in enumerate(init_prob): if prob != 0.: hmm_model.add_transition(hmm_model.start, states[state_i], prob) for state_i, prob in enumerate(end_prob): if prob != 0.: hmm_model.add_transition(states[state_i], hmm_model.end, prob) hmm_model.bake() return hmm_model
def get_suffix_matcher_hmm(pattern): model = Model(name="Suffix Matcher HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) insert_states = [] match_states = [] delete_states = [] hmm_name = 'suffix' for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, hmm_name))) for i in range(len(pattern)): distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01}) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), hmm_name))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), hmm_name))) unit_start = State(None, name='suffix_start_%s' % hmm_name) unit_end = State(None, name='suffix_end_%s' % hmm_name) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 model.add_transition(model.start, unit_start, 1) model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) for i in range(len(pattern)): model.add_transition(unit_start, match_states[i], 0.98 / len(pattern)) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) model.bake(merge=None) return model
def build_reference_repeat_finder_hmm(patterns, copies=1): pattern = patterns[0] model = Model(name="HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) last_end = None start_random_matches = State(insert_distribution, name='start_random_matches') end_random_matches = State(insert_distribution, name='end_random_matches') model.add_states([start_random_matches, end_random_matches]) for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(len(pattern)): distribution_map = dict({ 'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01 }) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), repeat))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 0.5) else: model.add_transition(model.start, unit_start, 0.5) model.add_transition(model.start, start_random_matches, 0.5) model.add_transition(start_random_matches, unit_start, 0.5) model.add_transition(start_random_matches, start_random_matches, 0.5) model.add_transition(unit_end, end_random_matches, 0.5) if repeat == copies - 1: model.add_transition(unit_end, model.end, 0.5) model.add_transition(end_random_matches, end_random_matches, 0.5) model.add_transition(end_random_matches, model.end, 0.5) model.add_transition(unit_start, match_states[0], 0.98) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) last_end = unit_end model.bake() if len(patterns) > 1: # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True) fit_patterns = [pattern * copies for pattern in patterns] model.fit(fit_patterns, algorithm='viterbi', transition_pseudocount=1, use_pseudocount=True) return model
skf = StratifiedKFold(full_labels, n_folds=folds) for train_index, test_index in skf: model = HMM(name="Gait") hmm_states = [] for i in range(0, 2): # dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i])) dis = MGD.from_samples(class_data[i]) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model.add_states(hmm_states) model.add_transition(model.start, hmm_states[0], 0.5) model.add_transition(model.start, hmm_states[1], 0.5) for i in range(0, 2): for j in range(0, 2): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() rospy.logwarn("Baked model") print("TRAIN:", train_index, "TEST:", test_index) train_data = full_data[train_index] # print(len(train_data)) train_class = full_labels[train_index] # print(len(train_class)) test_data = full_data[test_index] # print(len(test_data))
""" import random from pomegranate import * from pomegranate import HiddenMarkovModel as Model random.seed(0) model = Model(name="ExampleModel") distribution = UniformDistribution(0.0, 1.0) state = State(distribution, name="uniform") state2 = State(NormalDistribution(0, 2), name="normal") silent = State(None, name="silent") model.add_state(state) model.add_state(state2) model.add_transition(state, state, 0.4) model.add_transition(state, state2, 0.4) model.add_transition(state2, state2, 0.4) model.add_transition(state2, state, 0.4) model.add_transition(model.start, state, 0.5) model.add_transition(model.start, state2, 0.5) model.add_transition(state, model.end, 0.2) model.add_transition(state2, model.end, 0.2) model.bake() sequence = model.sample() print sequence print print model.forward(sequence)[ len(sequence), model.end_index ]
def train_and_test(): with open('../data extractors/exons_start_1.txt') as in_file: total = [] for line in in_file: no_p_line = line.replace('P', '').lower().replace('\n', '') total.append(no_p_line) converted_total = [converter_to(x, 2) for x in total] matrixDonor0 = numpy.array( matrix_from_exa('../data extractors/new_donor1.exa')) c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt') print(c0.p, c1.p, c2.p) coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0') coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1') coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2') donor0_data = classify(matrixDonor0, 2) donor0_states = sequence_state_factory(donor0_data, 'donor0') post = State(DiscreteDistribution(equal_distribution), name='post') model = HiddenMarkovModel('coding to donor') model.add_state(coding_state0) model.add_state(coding_state1) model.add_state(coding_state2) add_sequence(model, donor0_states) model.add_state(post) model.add_transition(model.start, coding_state0, 1) model.add_transition(coding_state0, coding_state1, 0.6) model.add_transition(coding_state0, donor0_states[0], 0.4) model.add_transition(coding_state1, coding_state2, 0.6) model.add_transition(coding_state1, donor0_states[0], 0.4) model.add_transition(coding_state2, coding_state0, 0.6) model.add_transition(coding_state2, donor0_states[0], 0.4) model.add_transition(donor0_states[-1], post, 1) model.add_transition(post, post, 0.9) model.add_transition(post, model.end, 0.1) model.bake() test_model(model) model.fit(converted_total, transition_pseudocount=1, emission_pseudocount=1, verbose=True) test_model(model) with open('partial_model_coding_to_donor_model0.json', 'w') as out: out.write(model.to_json())
distribution = {word: count/total for word, count in words_dict.items()} tag_emissions = DiscreteDistribution(distribution) tag_state = State(tag_emissions, name=tag) to_pass_states.append(tag_state) basic_model.add_states() start_prob={} for tag in tags: start_prob[tag]=starting_tag_count[tag]/tags_count[tag] for tag_state in to_pass_states : basic_model.add_transition(basic_model.start,tag_state,start_prob[tag_state.name]) end_prob={} for tag in tags: end_prob[tag]=ending_tag_count[tag]/tags_count[tag] for tag_state in to_pass_states : basic_model.add_transition(tag_state,basic_model.end,end_prob[tag_state.name]) transition_prob_pair={} for key in tag_bigrams.keys(): transition_prob_pair[key]=tag_bigrams.get(key)/tags_count[key[0]] for tag_state in to_pass_states :
back = State(DiscreteDistribution(equal_distribution), name='back') back2 = State(DiscreteDistribution(equal_distribution), name='back2') matrixZE = numpy.array(matrix_from_exa('../data extractors/starts.exa')) start_states_data = classify(matrixZE, 2) start_states = sequence_state_factory(start_states_data, 'start zone') model = HiddenMarkovModel() model.add_state(back) model.add_state(back2) add_sequence(model, start_states) model.add_transition(model.start, back, 1) model.add_transition(back, back, 0.55) model.add_transition(back, start_states[0], 0.45) model.add_transition(start_states[-1], back2, 1) model.add_transition(back2, back2, 0.5) model.bake() def train_and_test(): test(model) lines = [] with open('../data extractors/train_start2.exa') as fi: for line in fi: lines.append(converter_to(line.replace('\n', '')))
distros = [] hmm_states = [] state_names = ['ff', 'ho', 'sw', 'hs'] for i in range(0, n_classes): dis = MGD\ (np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for i in range(0, n_classes): model.add_transition(model.start, hmm_states[i], startprob[i]) """Left-right model""" for i in range(0, n_classes): for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() # print (model.name) rospy.logwarn("N. observations: " + str(model.d)) # print (model.edges) rospy.logwarn("N. hidden states: " + str(model.silent_start)) # print model """Training""" limit = int(len(ff) * (8 / 10.0)) # 80% of data to test, 20% to train # seq = list([ff[:limit]])
""" from pomegranate import * from pomegranate import HiddenMarkovModel as Model import random import math random.seed(0) model = Model( name="Rainy-Sunny" ) # Emission probabilities rainy = State( DiscreteDistribution({ 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }), name='Rainy' ) sunny = State( DiscreteDistribution({ 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 }), name='Sunny' ) model.add_transition( model.start, rainy, 0.6 ) model.add_transition( model.start, sunny, 0.4 ) # Transition matrix, with 0.05 subtracted from each probability to add to # the probability of exiting the hmm model.add_transition( rainy, rainy, 0.65 ) model.add_transition( rainy, sunny, 0.25 ) model.add_transition( sunny, rainy, 0.35 ) model.add_transition( sunny, sunny, 0.55 ) # Add transitions to the end of the model model.add_transition( rainy, model.end, 0.1 ) model.add_transition( sunny, model.end, 0.1 ) # Finalize the model structure model.bake( verbose=True )
def dominant_cover_hmm_model(nn_pobability_matrix, timeseries_steps, n_observed_classes): d0 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=0, n_samples=timeseries_steps, n_classes=n_observed_classes) d1 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=1, n_samples=timeseries_steps, n_classes=n_observed_classes) d2 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=2, n_samples=timeseries_steps, n_classes=n_observed_classes) d3 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=3, n_samples=timeseries_steps, n_classes=n_observed_classes) d4 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=4, n_samples=timeseries_steps, n_classes=n_observed_classes) s0_veg = State(d0, name='vegetation') s1_residue = State(d1, name='residue') s2_soil = State(d2, name='soil') s3_snow = State(d3, name='snow') s4_water = State(d4, name='water') model = HiddenMarkovModel() # Initialize each hidden state. # All states have an equal chance of being the starting state. for s in [s0_veg, s1_residue, s2_soil, s3_snow, s4_water]: model.add_state(s) model.add_transition(model.start, s, 1) model.add_transitions(s0_veg, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [95., 1.0, 1.0, 1.0, 1.0]) model.add_transitions(s1_residue, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 95., 1.0, 1.0, 1.0]) model.add_transitions(s2_soil, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 95., 1.0, 1.0]) model.add_transitions(s3_snow, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 1.0, 95., 1.0]) model.add_transitions(s4_water, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 1.0, 1.0, 95.]) model.bake(verbose=False) return model
bb = MultivariateGaussianDistribution.from_samples(X_22) c = MultivariateGaussianDistribution.from_samples(X_3) s1 = State(a, name="M1") s11 = State(aa, name="M11") s2 = State(b, name="M2") s22 = State(bb, name="M22") s3 = State(c, name="M3") hmm = HiddenMarkovModel() hmm.add_states(s1, s11, s2, s22, s3) hmm.add_transition(hmm.start, s1, 0.2) hmm.add_transition(hmm.start, s11, 0.2) hmm.add_transition(hmm.start, s2, 0.2) hmm.add_transition(hmm.start, s22, 0.2) hmm.add_transition(hmm.start, s3, 0.2) hmm.add_transition(s1, s1, 0.92) hmm.add_transition(s1, s11, 0.02) hmm.add_transition(s1, s2, 0.02) hmm.add_transition(s1, s22, 0.02) hmm.add_transition(s1, s3, 0.02) hmm.add_transition(s11, s1, 0.02)
def init_lr_hmm(sequences, steps, states_per_step, force_end=False, model_id='Left-Righ HMM', seed=None): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences force_end if we require sequence to end in end state """ # seed random number generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(states_per_step) trans = trans / trans.sum() for j in range(states_per_step): model.add_transition(model.start, states[0, j], trans[j]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> end if force_end: for j in range(states_per_step): trans = np.random.ranf(2) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # end transition model.add_transition(states[(steps - 1), j], model.end, trans[1]) model.bake() print 'Initialized Left-Right HMM:', model.name, '[', \ steps, states_per_step, ']' return model
def init_gaussian_hmm(sequences, n_states, model_id, seed=None): """ insantiate a model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ """ # make random transition probability matrix # scale each row to sum to 1 trans = np.random.ranf((n_states, n_states)) for i in range(n_states): trans[i, :] = trans[i, :] / trans[i, :].sum() # make distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) # x = math.min(3, x) dists = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dists.append(NormalDistribution.from_samples (sequences[:, temp_assignment])) # random start probabilities # scale to sum to 1 starts = np.random.ranf(n_states) starts = starts / sum(starts) model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id) """ # seed random numer generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) # make states with distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) states = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) states.append(State(dist, name=str(i))) model.add_states(states) # add random start probabilities start_probs = np.random.ranf(n_states) start_probs = start_probs / start_probs.sum() for i, state in enumerate(states): model.add_transition(model.start, state, start_probs[i]) # add random transition probabilites out of each state for state1 in states: transitions = np.random.ranf(n_states) transitions = transitions / transitions.sum() for i, state2 in enumerate(states): model.add_transition(state1, state2, transitions[i]) model.bake() print 'Initialized HMM: ', model.name return model
s22222 = State(bbbbb, name="M22222") s222222 = State(bbbbbb, name="M222222") s3 = State(c, name="M3") s33 = State(cc, name="M33") s333 = State(ccc, name="M333") s3333 = State(cccc, name="M3333") s33333 = State(ccccc, name="M33333") s333333 = State(cccccc, name="M333333") hmm = HiddenMarkovModel() hmm.add_states(s1, s11, s111, s2, s22, s222, s3, s33, s333, s1111, s11111, s111111, s2222, s22222, s222222, s3333, s33333, s333333) hmm.add_transition(hmm.start, s1, 1.) hmm.add_transition(hmm.start, s11, 1.) hmm.add_transition(hmm.start, s111, 1.) hmm.add_transition(hmm.start, s2, 1.) hmm.add_transition(hmm.start, s22, 1.) hmm.add_transition(hmm.start, s222, 1.) hmm.add_transition(hmm.start, s3, 1.) hmm.add_transition(hmm.start, s33, 1.) hmm.add_transition(hmm.start, s333, 1.) hmm.add_transition(hmm.start, s1111, 1.) hmm.add_transition(hmm.start, s11111, 1.) hmm.add_transition(hmm.start, s111111, 1.) hmm.add_transition(hmm.start, s2222, 1.) hmm.add_transition(hmm.start, s22222, 1.) hmm.add_transition(hmm.start, s222222, 1.) hmm.add_transition(hmm.start, s3333, 1.)
emission_prob = {} for word, number in emission_counts[tag].items(): emission_prob[word] = number / tag_unigrams[tag] tag_distribution = DiscreteDistribution(emission_prob) state = State(tag_distribution, name=tag) states[tag] = state basic_model.add_state(state) for tag in data.tagset: state = states[tag] start_probability = tag_starts[tag] / sum(tag_starts.values()) basic_model.add_transition(basic_model.start, state, start_probability) end_probability = tag_ends[tag] / sum(tag_ends.values()) basic_model.add_transition(state, basic_model.end, end_probability) for tag1 in data.tagset: state_1 = states[tag1] for tag2 in data.tagset: state_2 = states[tag2] bigram = (tag1, tag2) transition_probability = tag_bigrams[bigram] / tag_unigrams[tag1] basic_model.add_transition(state_1, state_2, transition_probability) basic_model.bake()
post = State(DiscreteDistribution(equal_distribution), name='post') model = HiddenMarkovModel('coding_to_stop') stop_data = classify(matrixStop, 2) stop_states = sequence_state_factory(stop_data, 'stop') model.add_state(coding_state0) model.add_state(coding_state1) model.add_state(coding_state2) add_sequence(model, stop_states) model.add_state(post) model.add_transition(model.start, coding_state1, 1) model.add_transition(coding_state0, coding_state1, 1) model.add_transition(coding_state1, coding_state2, 1) model.add_transition(coding_state2, coding_state0, 0.6) model.add_transition(coding_state2, stop_states[0], 0.4) model.add_transition(stop_states[-1], post, 1) model.add_transition(post, post, 0.9) model.add_transition(post, model.end, 0.1) model.bake() with open('../data extractors/exons_end_start_2.txt') as in_file: total = [] for line in in_file: no_p_line = line.replace('P', '').replace('\n', '').lower() total.append(no_p_line)
def build_dis_classifier(self): skf = StratifiedKFold(self.full_labels, n_folds=self.folds) classifier_array = [] stats_array = [] num_class = len(self.full_data[0]) print (num_class) for cl in range(0, num_class): lel = -1 tp_total = 0.0 tn_total = 0.0 fp_total = 0.0 fn_total = 0.0 tests = 0 for train_index, test_index in skf: if lel > 0: lel -= 1 continue stats = [] distros = [] hmm_states = [] state_names = ['swing', 'stance'] swings = 0 stances = 0 for i in range(0, 2): dis = MGD.from_samples(self.class_data[i]) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM() print(model.states) model.add_states(hmm_states) model.add_transition(model.start, hmm_states[0], 0.5) model.add_transition(model.start, hmm_states[1], 0.5) model.add_transition(hmm_states[1], model.end, 0.000000000000000001) model.add_transition(hmm_states[0], model.end, 0.000000000000000001) for i in range(0, 2): for j in range(0, 2): model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j]) model.bake() tp = 0.0 tn = 0.0 fp = 0.0 fn = 0.0 train_data = self.full_data[train_index, cl] train_class = self.full_labels[train_index, cl] test_data = self.full_data[test_index] test_class = self.full_labels[test_index] print(np.isfinite(train_data).all()) print(np.isfinite(test_data).all()) print(np.isnan(train_data.any())) print(np.isinf(train_data.any())) print(np.isnan(test_data.any())) print(np.isinf(test_data.any())) if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \ or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())): rospy.logerr("NaN or Inf Detected") exit() try: rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests)) seq = np.array(train_data) model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150) except ValueError: rospy.logwarn("Something went wrong, exiting") rospy.shutdown() exit() seq = [] if self.batch_test == 1: s = 0 # for s in range(0, len(test_data)): while s < len(test_data): k = 0 seq_entry = [] while k < 20 and s < len(test_data): seq_entry.append(test_data[s]) k += 1 s += 1 seq.append(seq_entry) else: seq = np.array(test_data) if seq == [] or test_data == []: rospy.logerr("Empty testing sequence") continue log, path = model.viterbi(test_data) if (len(path) - 2) != len(test_data): rospy.logerr(len(path)) rospy.logerr(path[0][1].name) rospy.logerr(path[len(path) - 1][1].name) rospy.logerr(len(test_data)) exit() tests += 1 for i in range(0, len(path) - 2): if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end': if path[i + 1][1].name == 'swing': # prediction is 0 swings += 1 if test_class[i] == 0: # class is 0 tn += 1.0 elif test_class[i] == 1: fn += 1.0 # class is 1 elif path[i + 1][1].name == 'stance': # prediction is 1 stances += 1 if test_class[i] == 1: # class is 1 tp += 1.0 elif test_class[i] == 0: # class is 0 fp += 1.0 print (swings) print (stances) if (tp + fn) != 0.0: rospy.logwarn("Sensitivity : " + str(tp / (tp + fn))) # sensitivity = tp / (tp + fn) else: rospy.logwarn("Sensitivity : 0.0") # sensitivity = 0.0 if (tn + fp) != 0.0: rospy.logwarn("Specificity : " + str(tn / (tn + fp))) # specificity = tn_total / (tn_total + fp_total) else: rospy.logwarn("Specificity : 0.0") # specificity = 0.0 if (tn + tp + fn + fp) != 0.0: rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp))) # accuracy = (tn + tp) / (tn + tp + fn + fp) else: rospy.logwarn("Accuracy : 0.0") # accuracy = 0.0 tn_total += tn tp_total += tp fn_total += fn fp_total += fp tp_total /= tests tn_total /= tests fp_total /= tests fn_total /= tests rospy.logerr("TP :" + str(tp_total)) rospy.logerr("TN :" + str(tn_total)) rospy.logerr("FP :" + str(fp_total)) rospy.logerr("FN :" + str(fn_total)) rospy.logerr("Tests :" + str(tests)) if (tp_total + fn_total) != 0.0: sensitivity = tp_total / (tp_total + fn_total) else: sensitivity = 0.0 if (tn_total + fp_total) != 0.0: specificity = tn_total / (tn_total + fp_total) else: specificity = 0.0 if (tn_total + tp_total + fn_total + fp_total) != 0.0: accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total) else: accuracy = 0.0 rospy.logwarn("----------------------------------------------------------") rospy.logerr("Total accuracy: " + str(accuracy)) rospy.logerr("Total sensitivity: " + str(sensitivity)) rospy.logerr("Total specificity: " + str(specificity)) stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests, accuracy, sensitivity, specificity] rospy.logwarn("-------------------DONE-------------------------") classifier_array.append(model) stats_array.append(stats) pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb')) pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb')) scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
algorithms. ''' from pomegranate import * from pomegranate import HiddenMarkovModel as Model import itertools as it import numpy as np # Define the states s1 = State( NormalDistribution( 5, 2 ), name="S1" ) s2 = State( NormalDistribution( 15, 2 ), name="S2" ) s3 = State( NormalDistribution( 25, 2 ), name="S3 ") # Define the transitions model = Model( "infinite" ) model.add_transition( model.start, s1, 0.7 ) model.add_transition( model.start, s2, 0.2 ) model.add_transition( model.start, s3, 0.1 ) model.add_transition( s1, s1, 0.6 ) model.add_transition( s1, s2, 0.1 ) model.add_transition( s1, s3, 0.3 ) model.add_transition( s2, s1, 0.4 ) model.add_transition( s2, s2, 0.4 ) model.add_transition( s2, s3, 0.2 ) model.add_transition( s3, s1, 0.05 ) model.add_transition( s3, s2, 0.15 ) model.add_transition( s3, s3, 0.8 ) model.bake() sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ]
algorithms. ''' from pomegranate import * from pomegranate import HiddenMarkovModel as Model import itertools as it import numpy as np # Define the states s1 = State(NormalDistribution(5, 2), name="S1") s2 = State(NormalDistribution(15, 2), name="S2") s3 = State(NormalDistribution(25, 2), name="S3 ") # Define the transitions model = Model("infinite") model.add_transition(model.start, s1, 0.7) model.add_transition(model.start, s2, 0.2) model.add_transition(model.start, s3, 0.1) model.add_transition(s1, s1, 0.6) model.add_transition(s1, s2, 0.1) model.add_transition(s1, s3, 0.3) model.add_transition(s2, s1, 0.4) model.add_transition(s2, s2, 0.4) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s1, 0.05) model.add_transition(s3, s2, 0.15) model.add_transition(s3, s3, 0.8) model.bake() sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]
p = {} for word in emission_counts[tag]: p[word] = emission_counts[tag][ word] / tag_count # P(word | tag) = C(tag | word)/C(tag), C = count emission_p = DiscreteDistribution(p) state = State(emission_p, name="" + tag) basic_model.add_state(state) s[tag] = state # TODO: add edges between states for the observed transition frequencies P(tag_i | tag_i-1) # Start & End Transitions # Start - Number of senteces starting with tag over total number of sentences # End - Number of senteces ending with tag over count of tag appereances for tag in tag_starts: basic_model.add_transition(basic_model.start, s[tag], tag_starts[tag] / len(data.training_set.Y)) basic_model.add_transition(s[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) for (tag1, tag2) in tag_bigrams: basic_model.add_transition(s[tag1], s[tag2], tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1]) basic_model.bake() assert all(tag in set(s.name for s in basic_model.states) for tag in data.training_set.tagset), \ "Every state in your network should use the name of the associated tag, which must be one of the training set tags." assert basic_model.edge_count() == 168, \ ("Your network should have an edge from the start node to each state, one edge between every " + "pair of tags (states), and an edge from each state to the end node.")
if fl[i] == 1: positive_data.append(fd[i]) else: negative_data.append(fd[i]) posdis = MGD.from_samples(positive_data) st = State(posdis, name='swing') distros.append(st) hmm_states.append(st) negdis = MGD.from_samples(negative_data) st2 = State(negdis, name='stance') distros.append(st2) hmm_states.append(st2) cl.add_states(hmm_states) cl.add_transition(cl.start, hmm_states[0], 0.5) cl.add_transition(cl.start, hmm_states[1], 0.5) for i in range(0, 2): for j in range(0, 2): cl.add_transition(hmm_states[i], hmm_states[j], t[i][j]) cl.bake() f += 1 train_data = fd[train_index] train_class = fl[train_index] test_data = fd[test_index] test_class = fl[test_index] seq = [] if batch_training == 1: s = 0
# TODO: create a discrete distribution for the rainy emissions from the probability table # above & use that distribution to create a state named Rainy rainy_emissions = DiscreteDistribution({"yes": 0.8, "no": 0.2}) rainy_state = State(rainy_emissions, name="Rainy") # add the states to the model model.add_states(sunny_state, rainy_state) assert rainy_emissions.probability("yes") == 0.8, "The director brings his umbrella with probability 0.8 on rainy days" print("Looks good so farget_ipython().getoutput("")") # create edges for each possible state transition in the model # equal probability of a sequence starting on either a rainy or sunny day model.add_transition(model.start, sunny_state, 0.5) model.add_transition(model.start, rainy_state, 0.5) # add sunny day transitions (we already know estimates of these probabilities # from the problem statement) model.add_transition(sunny_state, sunny_state, 0.8) # 80% sunny->sunny model.add_transition(sunny_state, rainy_state, 0.2) # 20% sunny->rainy # TODO: add rainy day transitions using the probabilities specified in the transition table model.add_transition(rainy_state, sunny_state, 0.4) # 40% rainy->sunny model.add_transition(rainy_state, rainy_state, 0.6) # 60% rainy->rainy # finally, call the .bake() method to finalize the model model.bake() assert model.edge_count() == 6, "There should be two edges from model.start, two from Rainy, and two from Sunny"
X_3 = X[y == 2] else: X_1 = X[2000:4000] X_2 = X[400:800] X_3 = X[7000:8000] a = MultivariateGaussianDistribution.from_samples(X_1) b = MultivariateGaussianDistribution.from_samples(X_2) c = MultivariateGaussianDistribution.from_samples(X_3) s1 = State(a, name="M1") s2 = State(b, name="M2") s3 = State(c, name="M3") hmm = HiddenMarkovModel() hmm.add_states(s1, s2, s3) hmm.add_transition(hmm.start, s1, 0.34) hmm.add_transition(hmm.start, s3, 0.33) hmm.add_transition(hmm.start, s2, 0.33) hmm.add_transition(s1, s1, 0.9) hmm.add_transition(s1, s2, 0.05) hmm.add_transition(s1, s3, 0.05) hmm.add_transition(s2, s1, 0.05) hmm.add_transition(s2, s3, 0.05) hmm.add_transition(s2, s2, 0.9) hmm.add_transition(s3, s3, 0.9) hmm.add_transition(s3, s2, 0.05) hmm.add_transition(s3, s1, 0.05) hmm.bake()
add_sequence(coding_model, ze_states) add_sequence(coding_model, ez_states_taa) add_sequence(coding_model, ez_states_tga) add_sequence(coding_model, ez_states_tag) add_sequence(coding_model, donor0_states) add_sequence(coding_model, donor1_states) add_sequence(coding_model, donor2_states) add_sequence(coding_model, acceptor0_states) add_sequence(coding_model, acceptor1_states) add_sequence(coding_model, acceptor2_states) coding_model.add_transition(coding_model.start, back, 1.0) coding_model.add_transition(back, back, 0.99) coding_model.add_transition(back, ze_states[0], 0.01) coding_model.add_transition(in0, in0, 0.99999999) coding_model.add_transition(in0, in0_spacers[0], 0.00000001) coding_model.add_transition(in1, in1, 0.99999999) coding_model.add_transition(in1, in1_spacers[0], 0.00000001) coding_model.add_transition(in2, in2, 0.99999999) coding_model.add_transition(in2, in2_spacers[0], 0.00000001) coding_model.add_transition(coding_state0, coding_state1, 1.0) coding_model.add_transition(coding_state1, coding_state2, 1.0)
prob_emission = {} states = {} for tag, word_counts in emission_counts.items(): prob_emission = { word: word_count / sum(word_counts.values()) for word, word_count in word_counts.items() } states[tag] = State(DiscreteDistribution(prob_emission), name=tag) unique_tags = list(data.training_set.tagset) for tag in unique_tags: basic_model.add_states(states[tag]) # add the starting edges for tag, tag_count in tag_starts.items(): basic_model.add_transition(basic_model.start, states[tag], tag_count / len(data.training_set.X)) # add the ending edges for tag, tag_count in tag_ends.items(): basic_model.add_transition(states[tag], basic_model.end, tag_count / len(data.training_set.X)) # add the transitions for bi_tag, tag_count in tag_bigrams.items(): tag0 = bi_tag[0] tag1 = bi_tag[1] prob = tag_count / tag_unigrams[tag0] basic_model.add_transition(states[tag0], states[tag1], prob) # finalize the model basic_model.bake()
matrixAcceptor0 = numpy.array(matrix_from_exa('new_acceptor1.exa')) acceptor0_data = classify(matrixAcceptor0, 2) model = HiddenMarkovModel('intron_acceptor') intron = State(DiscreteDistribution( calculator.intron_calculator('cuts_intron.txt').p), name='in') acceptor0_states = sequence_state_factory(acceptor0_data, 'acceptor0') post = State(DiscreteDistribution(equal_distribution), name='post') model.add_state(intron) add_sequence(model, acceptor0_states) model.add_state(post) model.add_transition(model.start, intron, 1) model.add_transition(intron, intron, 0.9) model.add_transition(intron, acceptor0_states[0], 0.1) model.add_transition(acceptor0_states[-1], post, 1) model.add_transition(post, post, 0.5) model.add_transition(post, model.end, 0.5) model.bake() test_l = 'GTAACACTGAATACTCAGGAACAATTAATGGATGGTAACATATGAGGAATATCTAGGAGGCACACCCTCTCTGGCATCTATGATGGGCCAAAAACCCGCATTCGCTTGGCCACAGTATGTGAAATATAACCCAGCTTAGACACAGGGTGCGGCAGCTGTCATGTTTCTCTGTGTGTGCCGAGTGTCATGTCTGCACCGTACAGGGATAGCTGAGTCTTCATCCTCCTCAGCTCCTATCTGTCCAGTGCAATGAACAGCAGCTGCTCTCTTCCTCTCTGGTTCCCATGGCAGCCATGCTCTGTTGCAGAGAGAACAGGATTGCATGTTCCCTCTTAATGGGAACGTCCATTTTGCTTTCTGGGACCACTCTCTTAATGCCGCCTGTCAAAACCAGCTAGGACTCCCTGGGGTCCAATCCCTCTGTGTTTAATCTTCTGTCATCTCTGTCCCACCTGGCTCATCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCATGACTCCAACC' converted = converter_to(test_l.lower().replace(' ', '').replace('p', '')) #logp, path = model.viterbi(converted) #print(logp, [x[1].name + str(i) for i, x in enumerate(path)]) with open('new_intron_acceptor.txt') as in_file: total = []
't': 0.25 }), name='back') fixed_state = State(DiscreteDistribution({ 'a': 0.45, 'c': 0.45, 'g': 0.05, 't': 0.05 }), name='fixed') hmmodel.add_state(back_state) hmmodel.add_state(fixed_state) hmmodel.add_transition(hmmodel.start, back_state, 1) hmmodel.add_transition(back_state, back_state, 0.9) hmmodel.add_transition(back_state, fixed_state, 0.1) hmmodel.add_transition(fixed_state, fixed_state, 0.9) hmmodel.add_transition(fixed_state, back_state, 0.1) hmmodel.bake() seq = list('acgtacgtaaaaccccaaa') lopg, path = hmmodel.viterbi(seq) print([x[1].name for x in path]) print(hmmodel.to_json())
def main(): rospy.init_node('hmm_trainer') phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) rospack = rospkg.RosPack() packpath = rospack.get_path('exo_control') datapath = packpath + "/log/mat_files/" verbose = rospy.get_param('~verbose', False) """Print console output into text file""" sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w") """Data loading""" n_trials = 3 n_sub = 9 healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"] patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"] study_subs = [healthy_subs, patients] dataset = [{} for x in range(len(study_subs))] for i in range(len(study_subs)): for sub in study_subs[i]: dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)], "fder_gyro_y": [[] for x in range(n_trials)], "time": [[] for x in range(n_trials)], "labels": [[] for x in range(n_trials)], "Fs_fsr": 0.0} for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat") for signal in data: if signal not in ["pathol","fder_gyro_y"]: if signal == "Fs_fsr": data[signal] = mat_file[signal][0][0] else: data[signal][trial] = mat_file[signal][0] del mat_file """Feature extraction""" """First derivative""" for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): der = [] gyro_y = data["gyro_y"][trial] der.append(gyro_y[0]) for i in range(1,len(gyro_y)-1): der.append((gyro_y[i+1]-gyro_y[i-1])/2) der.append(gyro_y[-1]) data["fder_gyro_y"][trial] = der del der, sub, data """Global variables of cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] n_classes = 4 n_signals = 2 tol = 6e-2 # Tolerance window of 60 ms # pathology = 0 for pathology in range(len(dataset)): if pathology == 0: rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**") print "**Leave-one-out cross validation with HEALTHY subjects**" else: rospy.logwarn("**Leave-one-out cross validation with PATIENTS**") print "**Leave-one-out cross validation with PATIENTS**" # if True: for lou_sub,lou_data in dataset[pathology].iteritems(): # Iterate through leave-one-out subject's data rospy.logwarn("Leave " + lou_sub + " out:") print "Leave " + lou_sub + " out:" t = np.zeros((4, 4)) # Transition matrix prev = -1 for trial in range(n_trials): for label in lou_data["labels"][trial]: if prev == -1: prev = label t[prev][label] += 1.0 prev = label t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) class_data = [[] for x in range(n_classes)] # full_lou_data = [] # full_lou_labels = [] for trial in range(n_trials): for sample in range(len(lou_data["gyro_y"][trial])): d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]] l = lou_data["labels"][trial][sample] # full_lou_data.append(d) # full_lou_labels.append(l) class_data[l].append(d) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] for state in range(n_classes): cov = np.ma.cov(np.array(class_data[state]), rowvar=False) class_cov.append(cov) for signal in range(n_signals): class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0) class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0) class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0) # lou_trial = 1 # if True: for lou_trial in range(n_trials): rospy.logwarn("Trial {}".format(lou_trial+1)) print("Trial {}".format(lou_trial+1)) """Classifier initialization""" # distros = [] hmm_states = [] for state in range(n_classes): dis = MGD\ (np.array(class_means[state]).flatten(), np.array(class_cov[state])) st = State(dis, name=state_names[state]) # distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for state in range(n_classes): model.add_transition(model.start, hmm_states[state], startprob[state]) """Left-right model""" for i in range(n_classes): for j in range(n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() """Create training and test data""" x_train = [] x_test = [] test_gyro_y = lou_data["gyro_y"][lou_trial] test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial] """Create test data with n-th trial of leave-one-out subject""" for sample in range(len(test_gyro_y)): x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]]) """Create training data with n-1 trials of the rest of subjects (healthy group)""" for train_sub,train_data in dataset[0].iteritems(): count_trials = 0 if lou_sub != train_sub: # if train_sub == "daniel": for trial in range(n_trials): if trial != lou_trial and count_trials < 1: # rospy.logwarn(trial) train_gyro_y = train_data["gyro_y"][trial] train_fder_gyro_y = train_data["fder_gyro_y"][trial] for sample in range(len(train_gyro_y)): x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]]) count_trials += 1 rospy.logwarn(len(x_train)) x_train = list([x_train]) """Training""" rospy.logwarn("Training HMM...") model.fit(x_train, algorithm='baum-welch', verbose=True) # model.fit(x_train, algorithm='viterbi', verbose='True') """Find most-likely sequence""" rospy.logwarn("Finding most-likely sequence...") logp, path = model.viterbi(x_test) # rospy.logwarn(len(path)) # rospy.logwarn(len(lou_data["labels"][lou_trial])) class_labels = [] for i in range(len(lou_data["labels"][lou_trial])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) '''Saving classifier labels into csv file''' # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:] """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print "Mean time (MT) and Coefficient of Variance (CoV)" curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples/lou_data["Fs_fsr"]) n_phases = 0 stride_samples = 0 phases_time[label-1].append(count/lou_data["Fs_fsr"]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0))
def main(): rospy.init_node('hmm_trainer') param_vec = [] rospack = rospkg.RosPack() if (len(sys.argv) < 2): print("Missing the prefix argument.") exit() else: prefix = sys.argv[1] use_measurements = np.zeros(3) # patient = rospy.get_param('~patient', 'None') # if prefix == 'None': # rospy.logerr("No filename given ,exiting") # exit() phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) packpath = rospack.get_path('exo_gait_phase_det') datapath = packpath + "/log/mat_files/" rospy.logwarn("Patient: {}".format(prefix)) print("Patient: {}".format(prefix)) verbose = rospy.get_param('~verbose', False) """Print console output into text file""" # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w") """Data loading""" n_trials = 3 data = [[] for x in range(0, n_trials)] for i in range(0, n_trials): data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) + ".mat") accel_x = [[] for x in range(0, n_trials)] accel_y = [[] for x in range(0, n_trials)] accel_z = [[] for x in range(0, n_trials)] gyro_x = [[] for x in range(0, n_trials)] gyro_y = [[] for x in range(0, n_trials)] gyro_z = [[] for x in range(0, n_trials)] time_array = [[] for x in range(0, n_trials)] labels = [[] for x in range(0, n_trials)] fs_fsr = [] for i in range(0, n_trials): # accel_x[i] = data[i]["accel_x"][0] # accel_y[i] = data[i]["accel_y"][0] # accel_z[i] = data[i]["accel_z"][0] gyro_x[i] = data[i]["gyro_x"][0] gyro_y[i] = data[i]["gyro_y"][0] gyro_z[i] = data[i]["gyro_z"][0] time_array[i] = data[i]["time"][0] labels[i] = data[i]["labels"][0] fs_fsr.append(data[i]["Fs_fsr"][0][0]) """Feature extraction""" """First derivative""" # fder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(gyro_x[i][0]) # for j in range(1,len(gyro_x[i])-1): # der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2) # der.append(gyro_x[i][-1]) # fder_gyro_x.append(der) fder_gyro_y = [] for i in range(n_trials): der = [] der.append(gyro_y[i][0]) for j in range(1, len(gyro_y[i]) - 1): der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2) der.append(gyro_y[i][-1]) fder_gyro_y.append(der) # fder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(gyro_z[i][0]) # for j in range(1,len(gyro_z[i])-1): # der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2) # der.append(gyro_z[i][-1]) # fder_gyro_z.append(der) """Second derivative""" # sder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_x[i][0]) # for j in range(1,len(fder_gyro_x[i])-1): # der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2) # der.append(fder_gyro_x[i][-1]) # sder_gyro_x.append(der) # # sder_gyro_y = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_y[i][0]) # for j in range(1,len(fder_gyro_y[i])-1): # der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2) # der.append(fder_gyro_y[i][-1]) # sder_gyro_y.append(der) # # sder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_z[i][0]) # for j in range(1,len(fder_gyro_z[i])-1): # der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2) # der.append(fder_gyro_z[i][-1]) # sder_gyro_z.append(der) """Peak detector""" # window_wid = 15 # Window width should be odd # search_ratio = window_wid/2 # pdet_gyro_x = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_x[i])): # if j <= search_ratio: # win = gyro_x[i][:j+search_ratio+1] # elif j >= len(gyro_x[i])-search_ratio-1: # win = gyro_x[i][j-search_ratio:] # else: # win = gyro_x[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_x[i][j]/max(win)) # pdet_gyro_x.append(pdet) # print len(gyro_x) # print len(pdet_gyro_x) # for i in range(3): # print len(gyro_x[i]) # print len(pdet_gyro_x[i]) # pdet_gyro_y = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_y[i])): # if j <= search_ratio: # win = gyro_y[i][:j+search_ratio+1] # elif j >= len(gyro_y[i])-search_ratio-1: # win = gyro_y[i][j-search_ratio:] # else: # win = gyro_y[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_y[i][j]/max(win)) # pdet_gyro_y.append(pdet) # # pdet_gyro_z = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_z[i])): # if j <= search_ratio: # win = gyro_z[i][:j+search_ratio+1] # elif j >= len(gyro_z[i])-search_ratio-1: # win = gyro_z[i][j-search_ratio:] # else: # win = gyro_z[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_z[i][j]/max(win)) # pdet_gyro_z.append(pdet) """Create training and test data""" ff = [[] for x in range(0, n_trials)] for j in range(0, n_trials): for k in range(0, len(time_array[j])): f_ = [] # f_.append(accel_x[j][k]) # f_.append(accel_y[j][k]) # f_.append(accel_z[j][k]) # f_.append(gyro_x[j][k]) # f_.append(fder_gyro_x[j][k]) # f_.append(sder_gyro_x[j][k]) # f_.append(pdet_gyro_x[j][k]) f_.append(gyro_y[j][k]) f_.append(fder_gyro_y[j][k]) # f_.append(sder_gyro_y[j][k]) # f_.append(pdet_gyro_y[j][k]) # f_.append(gyro_z[j][k]) # f_.append(fder_gyro_z[j][k]) # f_.append(sder_gyro_z[j][k]) # f_.append(pdet_gyro_z[j][k]) ff[j].append(f_) n_signals = len(ff[0][0]) """cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] rospy.logwarn("""Intra-subject training""") print("""Intra-subject training""") # for leave_one_out in range(0, n_trials): for leave_one_out in range(1, 2): rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1)) print("-------TRIAL {}-------".format(leave_one_out + 1)) """Transition matrix""" t = np.zeros((4, 4)) # Transition matrix prev = -1 for i in range(0, len(labels[leave_one_out])): # data[i]._replace(label = correct_mapping[data[i].label]) if prev == -1: prev = labels[leave_one_out][i] t[prev][labels[leave_one_out][i]] += 1.0 prev = labels[leave_one_out][i] t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) n_classes = 4 class_data = [[] for x in range(n_classes)] full_data = [] full_labels = [] for i in range(len(ff[leave_one_out])): full_data.append(ff[leave_one_out][i]) full_labels.append(labels[leave_one_out][i]) # print full_data == ff[leave_one_out] # print full_labels == labels[leave_one_out] # print len(full_data) == len(full_labels) # for i in range(0,len(ff[leave_one_out-1])): # full_data.append(ff[leave_one_out-1][i]) # full_labels.append(labels[leave_one_out-1][i]) # for i in range(0,len(ff[(leave_one_out+1) % n_trials])): # full_data.append(ff[(leave_one_out+1) % n_trials][i]) # full_labels.append(labels[(leave_one_out+1) % n_trials][i]) # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials])) # print full_data # print len(full_data) # print full_labels # print len(full_labels) for i in range(0, len(full_data)): class_data[full_labels[i]].append(full_data[i]) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] classifiers = [] for i in range(0, n_classes): # cov = np.ma.cov(np.array(class_data[i]), rowvar=False) cov = np.cov(np.array(class_data[i]), rowvar=False) class_cov.append(cov) for j in range(0, n_signals): class_means[i][j] = np.array( class_data[i][:])[:, [j]].mean(axis=0) class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0) class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0) print "\n" + str(class_cov) + "\n" """Classifier initialization""" distros = [] hmm_states = [] for i in range(n_classes): dis = MGD\ (np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for i in range(0, n_classes): model.add_transition(model.start, hmm_states[i], startprob[i]) """Left-right model""" for i in range(0, n_classes): for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() # print (model.name) # rospy.logwarn("N. observations: " + str(model.d)) # print (model.edges) # rospy.logwarn("N. hidden states: " + str(model.silent_start)) # print model """Training""" # limit = int(len(ff1)*(8/10.0)) # 80% of data to test, 20% to train # x_train = list([ff1[:limit]]) # x_train = list([ff1,ff2]) # x_train = list([ff2]) x_train = [] for i in range(0, len(ff[leave_one_out - 1])): x_train.append(ff[leave_one_out - 1][i]) for i in range(0, len(ff[(leave_one_out + 1) % n_trials])): x_train.append(ff[(leave_one_out + 1) % n_trials][i]) x_train = list([x_train]) rospy.logwarn("Training...") model.fit(x_train, algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose) # model.fit(seq, algorithm='viterbi', verbose='True') """Find most-likely sequence""" # logp, path = model.viterbi(ff[limit:]) logp, path = model.viterbi(ff[leave_one_out]) # print logp # print path class_labels = [] for i in range(len(labels[leave_one_out])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) labels[leave_one_out] = list(labels[leave_one_out][1:]) # Saving classifier labels into csv file # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") sum = 0.0 true_pos = 0.0 false_pos = 0.0 true_neg = 0.0 false_neg = 0.0 tol = 6e-2 # Tolerance window of 60 ms tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out]))) print "FSR freq: " + str(fs_fsr[leave_one_out]) print "Tolerance win: " + str(tol_window) # print tol_window # # print type(tol_window) # for i in range(0, len(labels[leave_one_out])): # """Tolerance window""" # if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window: # # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # # print curr_tol # win = [] # for j in range(i-tol_window,i+tol_window+1): # win.append(state_names[labels[leave_one_out][j]]) # if path[i][1].name in win: # sum += 1.0 # else: # if path[i][1].name == labels[leave_one_out][i]: # sum += 1.0 """Performance Evaluation""" rospy.logwarn("Calculating results...") time_error = [[] for x in range(n_classes)] for phase in range(n_classes): for i in range(len(labels[leave_one_out])): """Tolerance window""" if i >= tol_window and i < len( labels[leave_one_out]) - tol_window: # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # print curr_tol win = [] for j in range(i - tol_window, i + tol_window + 1): win.append(labels[leave_one_out][j]) """Calculate time error with true positives""" if class_labels[i] == phase: if class_labels[i] in win: for k in range(len(win)): if win[k] == phase: time_error[phase].append( (k - tol_window) / fs_fsr[leave_one_out]) break true_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_pos" else: false_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_pos" else: if phase != labels[leave_one_out][i]: # if phase not in win: true_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_neg" else: false_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_neg" else: if class_labels[i] == phase: if class_labels[i] == labels[leave_one_out][i]: true_pos += 1.0 else: false_pos += 1.0 else: if phase != labels[leave_one_out][i]: true_neg += 1.0 else: false_neg += 1.0 rospy.logwarn("Timing error") print("Timing error") for phase in range(n_classes): rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" if len(time_error[phase]) > 0: rospy.logwarn( str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))) print str(np.mean(time_error[phase])) + " + " + str( np.std(time_error[phase])) else: rospy.logwarn("0.06 + 0") print "0.06 + 0" """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print("Mean time (MT) and Coefficient of Variance (CoV)") n_group = 0 for label_group in [class_labels, labels[leave_one_out]]: if n_group == 0: rospy.logwarn("Results for HMM:") print("Results for HMM:") else: rospy.logwarn("Results for FSR:") print("Results for FSR:") curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in label_group: # for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples / fs_fsr[leave_one_out]) n_phases = 0 stride_samples = 0 phases_time[label - 1].append(count / fs_fsr[leave_one_out]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) n_group += 1 """Accuracy""" # acc = sum/len(labels[leave_one_out]) if (true_neg + true_pos + false_neg + false_pos) != 0.0: acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg + false_pos) else: acc = 0.0 """Sensitivity or True Positive Rate""" if true_pos + false_neg != 0: tpr = true_pos / (true_pos + false_neg) else: tpr = 0.0 """Specificity or True Negative Rate""" if false_pos + true_neg != 0: tnr = true_neg / (false_pos + true_neg) else: tnr = 0.0 # rospy.logwarn("Accuracy: {}%".format(acc*100)) rospy.logwarn("Accuracy: {}%".format(acc * 100.0)) # print("Accuracy: {}%".format(acc*100.0)) rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0)) # print("Sensitivity: {}%".format(tpr*100.0)) rospy.logwarn("Specificity: {}%".format(tnr * 100.0)) # print("Specificity: {}%".format(tnr*100.0)) """Goodness index""" G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2) if G <= 0.25: rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G)) # print("Optimum classifier (G = {} <= 0.25)".format(G)) elif G > 0.25 and G <= 0.7: rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G)) # print("Good classifier (0.25 < G = {} <= 0.7)".format(G)) elif G == 0.7: rospy.logwarn("Random classifier (G = 0.7)") # print("Random classifier (G = 0.7)") else: rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
s1 = State(a, name="M1") s11 = State(aa, name="M11") s111 = State(aaa, name="M111") s2 = State(b, name="M2") s22 = State(bb, name="M22") s222 = State(bbb, name="M222") s3 = State(c, name="M3") s33 = State(cc, name="M33") s333 = State(ccc, name="M333") hmm = HiddenMarkovModel() hmm.add_states(s1, s11, s111, s2, s22, s222, s3, s33, s333) hmm.add_transition(hmm.start, s1, 0.12) hmm.add_transition(hmm.start, s11, 0.11) hmm.add_transition(hmm.start, s111, 0.11) hmm.add_transition(hmm.start, s2, 0.11) hmm.add_transition(hmm.start, s22, 0.11) hmm.add_transition(hmm.start, s222, 0.11) hmm.add_transition(hmm.start, s3, 0.11) hmm.add_transition(hmm.start, s33, 0.11) hmm.add_transition(hmm.start, s333, 0.11) hmm.add_transition(s1, s1, 0.92) hmm.add_transition(s1, s11, 0.01) hmm.add_transition(s1, s111, 0.01) hmm.add_transition(s1, s2, 0.01) hmm.add_transition(s1, s22, 0.01)
def _segment(self, arr, components=2): nonzero = arr[arr > 0] idx = self.hampel_filter(np.log2(nonzero)) filtered = nonzero[idx] log_gmm = self.get_states(np.log2(filtered)) log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_ ln_gmm = self.get_states(filtered) # to improve the sensitivity ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_ if (len(log_means) == 1): means, probs = ln_means, ln_probs scale = 'linear' else: means, probs = log_means, log_probs scale = 'log' logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale)) model = HiddenMarkovModel() # GMM emissions dists = [] for m in means: tmp = [] for i in range(components): e = m + (-1)**i * ((i+1)//2) * 0.5 s = 0.5 tmp.append(NormalDistribution(e, s)) mixture = State(GeneralMixtureModel(tmp), name=str(m)) dists.append(mixture) model.add_states(*tuple(dists)) # transition matrix for i in range(len(means)): for j in range(len(means)): if i==j: model.add_transition(dists[i], dists[j], 0.8) else: model.add_transition(dists[i], dists[j], 0.2/(len(means)-1)) # starts and ends for i in range(len(means)): model.add_transition(model.start, dists[i], probs[i]) model.bake() # training sequences tmp = np.zeros(nonzero.size) tmp[idx] = filtered newarr = np.zeros(arr.size) newarr[arr > 0] = tmp if len(means) > 1: model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs, max_iterations=5000, stop_threshold=2e-4) queue = newarr[newarr > 0] if scale=='log': seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]] else: seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]] seg = self.assign_cnv(queue, seq) predicted = np.zeros(newarr.size) predicted[newarr > 0] = seg seg = self.call_intervals(predicted) else: seg = [(0, newarr.size)] return newarr, seg, scale
def get_constant_number_of_repeats_matcher_hmm(patterns, copies): model = Model(name="Repeating Pattern Matcher HMM Model") transitions, emissions = build_profile_hmm_for_repeats( patterns, settings.MAX_ERROR_RATE) matches = [m for m in emissions.keys() if m.startswith('M')] last_end = None for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(matches) + 1): insert_distribution = DiscreteDistribution(emissions['I%s' % i]) insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(1, len(matches) + 1): match_distribution = DiscreteDistribution(emissions['M%s' % i]) match_states.append( State(match_distribution, name='M%s_%s' % (str(i), repeat))) for i in range(1, len(matches) + 1): delete_states.append(State(None, name='D%s_%s' % (str(i), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) n = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 1) else: model.add_transition(model.start, unit_start, 1) if repeat == copies - 1: model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, match_states[0], transitions['unit_start']['M1']) model.add_transition(unit_start, delete_states[0], transitions['unit_start']['D1']) model.add_transition(unit_start, insert_states[0], transitions['unit_start']['I0']) model.add_transition(insert_states[0], insert_states[0], transitions['I0']['I0']) model.add_transition(insert_states[0], delete_states[0], transitions['I0']['D1']) model.add_transition(insert_states[0], match_states[0], transitions['I0']['M1']) model.add_transition(delete_states[n], unit_end, transitions['D%s' % (n + 1)]['unit_end']) model.add_transition(delete_states[n], insert_states[n + 1], transitions['D%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(match_states[n], unit_end, transitions['M%s' % (n + 1)]['unit_end']) model.add_transition(match_states[n], insert_states[n + 1], transitions['M%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], insert_states[n + 1], transitions['I%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], unit_end, transitions['I%s' % (n + 1)]['unit_end']) for i in range(1, len(matches) + 1): model.add_transition(match_states[i - 1], insert_states[i], transitions['M%s' % i]['I%s' % i]) model.add_transition(delete_states[i - 1], insert_states[i], transitions['D%s' % i]['I%s' % i]) model.add_transition(insert_states[i], insert_states[i], transitions['I%s' % i]['I%s' % i]) if i < len(matches): model.add_transition(insert_states[i], match_states[i], transitions['I%s' % i]['M%s' % (i + 1)]) model.add_transition(insert_states[i], delete_states[i], transitions['I%s' % i]['D%s' % (i + 1)]) model.add_transition(match_states[i - 1], match_states[i], transitions['M%s' % i]['M%s' % (i + 1)]) model.add_transition(match_states[i - 1], delete_states[i], transitions['M%s' % i]['D%s' % (i + 1)]) model.add_transition(delete_states[i - 1], match_states[i], transitions['D%s' % i]['M%s' % (i + 1)]) model.add_transition(delete_states[i - 1], delete_states[i], transitions['D%s' % i]['D%s' % (i + 1)]) last_end = unit_end model.bake(merge=None) return model
distros = [] hmm_states = [] state_names = ['ff', 'ho', 'sw', 'hs'] hmm_states = [] for i in range(0, n_classes): dis = MGD\ (np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") print(t) model.add_states(hmm_states) model.add_transition(model.start, hmm_states[0], 1.00) model.add_transition(model.start, hmm_states[1], 0.0) model.add_transition(model.start, hmm_states[2], 0.0) model.add_transition(model.start, hmm_states[3], 0.0) for i in range(0, n_classes): for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() seq = list([ff[:limit]]) print(model.name) print(model.d) print(model.edges) print(model.silent_start)
basic_model = HiddenMarkovModel(name="base-hmm-tagger") # Create states with emission probability distributions P(word | tag) and add to the model tag_states = {} for tag in data.training_set.tagset: tag_emissions = DiscreteDistribution({ word: emission_counts[tag][word] / tag_unigrams[tag] for word in emission_counts[tag] }) tag_states[tag] = State(tag_emissions, name=tag) basic_model.add_state(tag_states[tag]) # Add edges between states for the observed transition frequencies P(tag_i | tag_i-1) for tag in data.training_set.tagset: basic_model.add_transition(basic_model.start, tag_states[tag], tag_starts[tag] / tag_unigrams[tag]) for tag1 in data.training_set.tagset: basic_model.add_transition( tag_states[tag], tag_states[tag1], tag_bigrams[(tag, tag1)] / tag_unigrams[tag]) basic_model.add_transition(tag_states[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) # finalize the model basic_model.bake() # Evaluate the accuracy of HMM tagger on the training and test corp hmm_training_acc = accuracy(data.training_set.X, data.training_set.Y, basic_model) print("training accuracy basic hmm model: {:.2f}%".format(100 * hmm_training_acc))
X_1 = X[2000:4000] X_11 = X[2000:3000] X_2 = X[400:800] X_3 = X[7000:8000] a = MultivariateGaussianDistribution.from_samples(X_1) aa = MultivariateGaussianDistribution.from_samples(X_11) b = MultivariateGaussianDistribution.from_samples(X_2) c = MultivariateGaussianDistribution.from_samples(X_3) s1 = State(a, name="M1") s11 = State(aa, name="M11") s2 = State(b, name="M2") s3 = State(c, name="M3") hmm = HiddenMarkovModel() hmm.add_states(s1, s11, s2, s3) hmm.add_transition(hmm.start, s1, 0.25) hmm.add_transition(hmm.start, s3, 0.25) hmm.add_transition(hmm.start, s11, 0.25) hmm.add_transition(hmm.start, s2, 0.25) hmm.add_transition(s1, s1, 0.91) hmm.add_transition(s1, s11, 0.03) hmm.add_transition(s1, s2, 0.03) hmm.add_transition(s1, s3, 0.03) hmm.add_transition(s11, s11, 0.91) hmm.add_transition(s11, s1, 0.03) hmm.add_transition(s11, s2, 0.03) hmm.add_transition(s11, s3, 0.03) hmm.add_transition(s2, s1, 0.03)