def load_segmentation_model(modeldata): model = HiddenMarkovModel('model') states = {} for s in modeldata: if len(s['emission']) == 1: emission = NormalDistribution(*s['emission'][0][:2]) else: weights = np.array([w for _, _, w in s['emission']]) dists = [NormalDistribution(mu, sigma) for mu, sigma, _ in s['emission']] emission = GeneralMixtureModel(dists, weights=weights) state = State(emission, name=s['name']) states[s['name']] = state model.add_state(state) if 'start_prob' in s: model.add_transition(model.start, state, s['start_prob']) for s in modeldata: current = states[s['name']] for nextstate, prob in s['transition']: model.add_transition(current, states[nextstate], prob) model.bake() return model
def oriHMMParams(self): """ Set initial parameters for the Hidden Markov Model (HMM). Attributes ---------- HMMParams : dict Has 3 keys: "A", state transition matrix, "B" (emission probabilities), specifying parameters (Means, Variances, Weights) of the mixture Gaussian distributions for each hidden state, and "pi", indicating the hidden state weights. This dict will be updated after learning procedure. """ hmm = HiddenMarkovModel() # GMM emissions # 5 Hidden States: # 0--start, 1--downstream, 2--no bias, 3--upstream, 4--end numdists = 3 # Three-distribution Gaussian Mixtures var = 7.5 / (numdists - 1) means = [[], [], [], [], []] for i in range(numdists): means[4].append(i * 7.5 / (numdists - 1) + 2.5) means[3].append(i * 7.5 / (numdists - 1)) means[2].append((i - (numdists - 1) / 2) * 7.5 / (numdists - 1)) means[1].append(-i * 7.5 / (numdists - 1)) means[0].append(-i * 7.5 / (numdists - 1) - 2.5) states = [] for i, m in enumerate(means): tmp = [] for j in m: tmp.append(NormalDistribution(j, var)) mixture = GeneralMixtureModel(tmp) states.append(State(mixture, name=str(i))) hmm.add_states(*tuple(states)) # Transmission matrix #A = [[0., 1., 0., 0., 0.], # [0., 0.4, 0.3, 0.3, 0.], # [0.05, 0., 0.5, 0.45, 0.], # [0., 0., 0., 0.5, 0.5], # [0.99, 0., 0.01, 0., 0.]] hmm.add_transition(states[0], states[1], 1) hmm.add_transition(states[1], states[1], 0.4) hmm.add_transition(states[1], states[2], 0.3) hmm.add_transition(states[1], states[3], 0.3) hmm.add_transition(states[2], states[0], 0.05) hmm.add_transition(states[2], states[2], 0.5) hmm.add_transition(states[2], states[3], 0.45) hmm.add_transition(states[3], states[3], 0.5) hmm.add_transition(states[3], states[4], 0.5) hmm.add_transition(states[4], states[0], 0.99) hmm.add_transition(states[4], states[2], 0.01) pi = [0.05, 0.3, 0.3, 0.3, 0.05] for i in range(len(states)): hmm.add_transition(hmm.start, states[i], pi[i]) hmm.bake() return hmm
def build_an_hmm_example(): # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state # because it says the probability of seeing each character when the system is in that state d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40}) d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}) d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10}) s1 = State(d1, name="s1") s2 = State(d2, name="s2") s3 = State(d3, name="s3") model = HiddenMarkovModel('example') model.add_states([s1, s2, s3]) model.add_transition(model.start, s1, 0.90) model.add_transition(model.start, s2, 0.10) model.add_transition(s1, s1, 0.80) model.add_transition(s1, s2, 0.20) model.add_transition(s2, s2, 0.90) model.add_transition(s2, s3, 0.10) model.add_transition(s3, s3, 0.70) model.add_transition(s3, model.end, 0.30) model.bake() for i in range(len(model.states)): print(model.states[i].name) model.plot() #print(model.log_probability(list('ACGACTATTCGAT'))) #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1])) print("forward:", model.forward(list('ACG')))
def init_cycle_hmm(sequences, steps, states_per_step, model_id): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(n_states) trans = trans / trans.sum() for i, state in enumerate(states.flatten().tolist()): model.add_transition(model.start, state, trans[i]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> step0 for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[(steps - 1), j], states[0, x], trans[x + 1]) model.bake() print 'Initialized Cyclic State HMM:', '[', \ steps, states_per_step, ']' return model
def bake_model(tags_sequence, words_sequence): """ 'tags' are the time-demand labels that generate the emitted demand level. Demand level are represented by 'words' """ # rdemand words = [x for x in chain(*words_sequence)] tag_unigrams = unigram_counts(words) tag_bigrams = bigram_counts(words) # Uniform distribution for starting and ending labels all_labels = list(set(words)) tag_starts = starting_counts(all_labels) tag_ends = ending_counts(all_labels) basic_model = HiddenMarkovModel(name="base-hmm-tagger") # Emission count label_train = tags_sequence rdemand_train = words_sequence emission_count = pair_counts(rdemand_train, label_train) # States with emission probability distributions P(word | tag) states = [] for rdemand, label_dict in emission_count.items(): dist_tag = DiscreteDistribution({ label: cn / tag_unigrams[rdemand] for label, cn in label_dict.items() }) states.append(State(dist_tag, name=rdemand)) basic_model.add_states(states) state_names = [s.name for s in states] state_index = {tag: num for num, tag in enumerate(state_names)} # Start transition total_start = sum(tag_starts.values()) for tag, cn in tag_starts.items(): # sname = state_index[tag] basic_model.add_transition(basic_model.start, states[state_index[tag]], cn / total_start) # End transition total_end = sum(tag_ends.values()) for tag, cn in tag_ends.items(): basic_model.add_transition(states[state_index[tag]], basic_model.end, cn / total_end) # Edges between states for the observed transition frequencies P(tag_i | tag_i-1) for key, value in tag_bigrams.items(): basic_model.add_transition(states[state_index[key[0]]], states[state_index[key[1]]], value / tag_unigrams[key[0]]) # Finalize the model basic_model.bake() return basic_model
def hmmer2pom(hmm): # set up environment from math import exp from pomegranate import DiscreteDistribution,HiddenMarkovModel,State tags = dict(); header = 0; alphabet = None; hmmlines = list() # parse HMMER file for line in hmm.splitlines(): l = line.strip() if len(l) == 0 or l[0] == '#': continue elif header == 0: if l.startswith('HMM') and l[3] != 'E': # beginning of actual HMM header = 1; alphabet = l.split()[1:] else: parts = l.strip().split() if parts[0] in tags: if not isinstance(tags[parts[0]], list): tags[parts[0]] = [tags[parts[0]]] tags[parts[0]].append(' '.join(parts[1:])) else: tags[parts[0]] = ' '.join(parts[1:]) elif header == 1: header = 2 else: if l.startswith('COMPO'): parts = l.strip().split(); tags[parts[0]] = ' '.join(parts[1:]) else: hmmlines.append(l) # create all states model = HiddenMarkovModel(tags['NAME']); tmpstates = list(); K = 0 i_emit = hmmlines[0].split(); tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I0")) # insertion state for l in range(2,len(hmmlines),3): m_emit,i_emit,state_trans = [hmmlines[l+i].split() for i in range(0,3)]; K = int(m_emit[0]) tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(m_emit[i+1])) for i in range(len(alphabet))}), name="M%d" % K)) # match state tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I%d" % K)) # insertion state tmpstates.append(State(None, name="D%d" % K)) # deletion state assert K != 0, "No match states in profile HMM" model.add_states(tmpstates); name2state = {state.name:state for state in tmpstates}; name2state["M0"] = model.start; name2state["M%d"%(K+1)] = model.end # create all transitions for l in range(1,len(hmmlines),3): k = int(l/3); parts = hmmlines[l].split() model.add_transition(name2state["M%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[0]))) # 0: M_k -> M_k+1 model.add_transition(name2state["M%d"%k], name2state["I%d"%k], exp(-1*float(parts[1]))) # 1: M_k -> I_k if parts[2] != '*': # no D_k+1 in last row model.add_transition(name2state["M%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[2]))) # 2: M_k -> D_k+1 model.add_transition(name2state["I%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[3]))) # 3: I_k -> M_k+1 model.add_transition(name2state["I%d"%k], name2state["I%d"%k], exp(-1*float(parts[4]))) # 4: I_k -> I_k if k != 0: # no D0 state model.add_transition(name2state["D%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[5]))) # 5: D_k -> M_k+1 if parts[6] != '*': # no D0 state and no D_k+1 in last row model.add_transition(name2state["D%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[6]))) # 6: D_k -> D_k+1 model.bake() return model.to_json()
def train_hmm_tagger(data): # HMM # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger. # # - Add one state per tag # - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$ # - Add an edge from the starting state `basic_model.start` to each tag # - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$ # - Add an edge from each tag to the end state `basic_model.end` # - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$ # - Add an edge between _every_ pair of tags # - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$ basic_model = HiddenMarkovModel(name="base-hmm-tagger") state_dict = {} states = [] emission_counts = pair_counts(*list(zip( *data.training_set.stream()))[::-1]) for tag in emission_counts.keys(): tag_count = tag_unigrams[tag] probs = {} for w in emission_counts[tag]: probs[w] = emission_counts[tag][w] / tag_count emission_p = DiscreteDistribution(probs) state = State(emission_p, name="" + tag) basic_model.add_state(state) state_dict[tag] = state for tag in tag_starts: basic_model.add_transition(basic_model.start, state_dict[tag], tag_starts[tag] / len(data.training_set.Y)) basic_model.add_transition(state_dict[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) for (tag1, tag2) in tag_bigrams: basic_model.add_transition( state_dict[tag1], state_dict[tag2], tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1]) # finalize the model basic_model.bake() assert all( tag in set(s.name for s in basic_model.states) for tag in data.training_set.tagset ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags." assert basic_model.edge_count() == 168, ( "Your network should have an edge from the start node to each state, one edge between every " + "pair of tags (states), and an edge from each state to the end node.") HTML( '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>' ) return basic_model
class HMMWrapper: def __init__(self): self.model = HiddenMarkovModel() self.start = self.model.start self.end = self.model.end self.states_before_bake = [] self.states = None def add_state(self, state, start_prob=0): self.states_before_bake.append((state, start_prob)) self.model.add_state(state) def add_transition(self, start_state, end_state, prob): # print('adding from', start_state.name, 'to', end_state.name, prob) self.model.add_transition(start_state, end_state, prob) def bake(self): starter_states_no_prob = [] free_start_prob = 1.0 for state in self.states_before_bake: if 'none' not in state[0].name: if not state[1]: starter_states_no_prob.append(state) else: free_start_prob -= state[1] print('asignado ' + str(state[1]) + ' a ' + state[0].name) self.add_transition(self.start, state[0], state[1]) len_no_prob = len(starter_states_no_prob) starter_prob = free_start_prob / len_no_prob print(len_no_prob, starter_prob) for state in starter_states_no_prob: self.add_transition(self.start, state, starter_prob) self.model.bake() self.states = self.model.states def make_states_from_alignment(self, first_state, last_state, seq_matrix, name): columns = column_clasify(seq_matrix) zones = create_zones(columns) grouped_states = group_states(zones, name) add_states(self, grouped_states) trans = calculate_transitions(first_state, last_state, grouped_states) apply_transitions(self, trans) def predict(self, *args, **kwargs): return self.model.predict(*args, **kwargs)
def _initialize_new_hmm(hmm, new_states, new_transitions): new_hmm = HiddenMarkovModel() for state in new_states: if state not in (hmm.start, hmm.end): new_hmm.add_state(state) for source_state, target_state, probability in new_transitions: if source_state != hmm.start and target_state != hmm.end: new_hmm.add_transition(source_state, target_state, probability) elif source_state == hmm.start: new_hmm.add_transition(new_hmm.start, target_state, probability) elif target_state == hmm.end: new_hmm.add_transition(source_state, new_hmm.end, probability) new_hmm.bake() return new_hmm
def insert_delete_main_hmm(data_matrix): v_columns = column_clasify(data_matrix) v_zones = create_zones(v_columns) v_grouped_states = group_states(v_zones, 'test') v_model = HiddenMarkovModel() v_first_state = State(None, name='ali_start') v_last_state = State(None, name='ali_end') v_model.add_state(v_first_state) v_model.add_transition(v_model.start, v_first_state, 1) v_model.add_state(v_last_state) add_states(v_model, v_grouped_states) v_trans = calculate_transitions(v_first_state, v_last_state, v_grouped_states) apply_transitions(v_model, v_trans) v_model.bake() return v_model
def gaussian_hmm(n_states, lower, upper, variance, model_id): """ insantiate a model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ np.random.seed(int(time.time())) model = HiddenMarkovModel(model_id) # make states with distrobutions from random subsets of timepoints x = np.linspace(lower, upper, n_states) states = [] for i in range(n_states): dist = \ NormalDistribution(x[i], variance) states.append(State(dist, name=str(i))) model.add_states(states) # add uniform start probabilities start_prob = 1.0 / n_states start_probs = [] for i in range(n_states): start_probs.append(start_prob + np.random.ranf()) start_probs = np.array(start_probs) start_probs = start_probs / start_probs.sum() for i, state in enumerate(states): model.add_transition(model.start, state, start_probs[i]) # add transition probabilities proportional to probability of generating # one state mean from another for state1 in states: transitions = [] for other_state in states: transitions.append(np.exp(state1.distribution.log_probability( other_state.distribution.parameters[0])) + np.random.ranf()) transitions = np.array(transitions) transitions = transitions / transitions.sum() for i, state2 in enumerate(states): model.add_transition(state1, state2, transitions[i]) model.bake() print 'Initialized HMM: ', model.name return model
def init_model(start_dip, stay_state, mean_eu, sd_eu, mean_loh): ## define distributions d_eu = NormalDistribution(mean_eu, sd_eu) ## euploid enriched at 0 d_loh = NormalDistribution(mean_loh, sd_eu) ## loss of heterozygosity enriched at 1 d_aneu = NormalDistribution(mean_loh / 2.0, sd_eu * 1.4) ## aneuploid enriched at 1 ## define states s_eu = State(d_eu, name='EU') ## enriched at 0 s_loh = State(d_loh, name='LOH') ## enriched at 1 s_aneu = State(d_aneu, name='ANEU') ## enriched at 1 ## define model and pass in states model = HiddenMarkovModel() model.add_states(s_eu, s_loh, s_aneu) ## define transition matrix (state a, state b, probability) model.add_transition(model.start, s_eu, start_dip) model.add_transition(model.start, s_loh, 1.0 - start_dip - 0.1) model.add_transition(model.start, s_aneu, 0.1) model.add_transition(s_eu, s_eu, stay_state) model.add_transition(s_eu, s_loh, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_eu, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_eu, model.end, 0.002) model.add_transition(s_loh, s_loh, stay_state) model.add_transition(s_loh, s_eu, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_loh, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_loh, model.end, 0.002) model.add_transition(s_aneu, s_aneu, stay_state) model.add_transition(s_aneu, s_eu, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, s_loh, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, model.end, 0.002) ## finalize internal structure model.bake() ## only train transitions, not emissions model.freeze_distributions() return model
def ghmm_model(states_labels: tuple, transitions: tuple, init_prob: tuple, end_prob: tuple, means: list, vars: list) -> HiddenMarkovModel: """ :param states_labels: :param transitions: :param init_prob: :param end_prob: :param means: :param vars: :return: """ hmm_model = HiddenMarkovModel() mix_num = len(vars[0]) states = [] for state_i, state in enumerate(states_labels): mixture = [] for mix_i in range(mix_num): init_mean = means[state_i][mix_i] init_var = vars[state_i][mix_i] mixture.append(NormalDistribution(init_mean, init_var)) states.append(State(GeneralMixtureModel(mixture), name=str(state_i))) hmm_model.add_states(*tuple(states)) for row in range(len(states_labels)): for col in range(len(states_labels)): prob = transitions[row][col] if prob != 0.: hmm_model.add_transition(states[row], states[col], prob) for state_i, prob in enumerate(init_prob): if prob != 0.: hmm_model.add_transition(hmm_model.start, states[state_i], prob) for state_i, prob in enumerate(end_prob): if prob != 0.: hmm_model.add_transition(states[state_i], hmm_model.end, prob) hmm_model.bake() return hmm_model
class ModelWrapper: def __init__(self): self.model = HiddenMarkovModel() def add_state(self, distribution, name): state = State(distribution, name=name) self.model.add_state(state) return state def bake(self): self.model.bake() def viterbi(self, seq): return self.model.viterbi(seq) def add_transition(self, states, next_state_data): for state in states: for next_data in next_state_data: self.model.add_transition(state, next_data[0], next_data[1])
def build_the_same_model_in_test_sample_from_site_line_by_line(): # State olds emission distribution, but not #transition distribution, because that's stored in the graph edges. s1 = State(NormalDistribution(5, 1)) s2 = State(NormalDistribution(1, 7)) s3 = State(NormalDistribution(8, 2)) model = HiddenMarkovModel() model.add_states(s1, s2, s3) model.add_transition(model.start, s1, 1.0) model.add_transition(s1, s1, 0.7) model.add_transition(s1, s2, 0.3) model.add_transition(s2, s2, 0.8) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s3, 0.9) model.add_transition(s3, model.end, 0.1) model.bake() model.plot()
def buildHmm(minAmpliconLength, maxGap, windowSize): b_bkgd_1 = 0.1 a_interstate = b_bkgd_1**(2 * minAmpliconLength / windowSize) b_amp_0 = (a_interstate)**(0.5 * windowSize / maxGap) b_amp_1 = 1 - b_amp_0 b_bkgd_0 = 1 - b_bkgd_1 bkgdDist = DiscreteDistribution({0: b_bkgd_0, 1: b_bkgd_1}) ampDist = DiscreteDistribution({0: b_amp_0, 1: b_amp_1}) s_bkgd = State(bkgdDist, name='background') s_amp = State(ampDist, name='amplicon') hmm = HiddenMarkovModel() hmm.add_states(s_bkgd, s_amp) hmm.add_transition(hmm.start, s_bkgd, 1 - a_interstate) hmm.add_transition(hmm.start, s_amp, a_interstate) hmm.add_transition(s_bkgd, s_bkgd, 1 - a_interstate) hmm.add_transition(s_bkgd, s_amp, a_interstate) hmm.add_transition(s_amp, s_bkgd, a_interstate) hmm.add_transition(s_amp, s_amp, 1 - a_interstate) hmm.bake() return hmm
def create_hidden_MarkovModel(e_df, q_df, start_p_dict): """ Creates a Hidden Markov Model based on DataFrame @args: - e_df (pd.Dataframe): contains the emission probabilites - q_df (pd.Dataframe): contains the emission probabilites """ model = HiddenMarkovModel(name="Example Model") '#1: Create a dict for each key in trans. df' model_dict = {} for key in q_df.keys().values: model_dict[key] = {} '#2: Create the states' for key in model_dict: '#2.1.Step Add teh emission prob. to each state, , P(observation | state)' emission_p = DiscreteDistribution(e_df[key].to_dict()) sunny_state = State(emission_p, name=key) model_dict[key] = State(emission_p, name=key) model.add_state(model_dict[key]) '#2.2.Step: Add the start probability for each state' model.add_transition(model.start, model_dict[key], start_p_dict[key]) '#3.Step: Add the transition probability to each state' for key, item in q_df.to_dict("index").items(): for item_name, value in item.items(): print(key, " , ", item_name, ": ", value) tmp_origin = model_dict[key] tmp_destination = model_dict[item_name] model.add_transition(tmp_origin, tmp_destination, q_df.loc[key, item_name]) # finally, call the .bake() method to finalize the model model.bake() return model
def build_reference_repeat_finder_hmm(patterns, copies=1): pattern = patterns[0] model = Model(name="HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) last_end = None start_random_matches = State(insert_distribution, name='start_random_matches') end_random_matches = State(insert_distribution, name='end_random_matches') model.add_states([start_random_matches, end_random_matches]) for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(len(pattern)): distribution_map = dict({ 'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01 }) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), repeat))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 0.5) else: model.add_transition(model.start, unit_start, 0.5) model.add_transition(model.start, start_random_matches, 0.5) model.add_transition(start_random_matches, unit_start, 0.5) model.add_transition(start_random_matches, start_random_matches, 0.5) model.add_transition(unit_end, end_random_matches, 0.5) if repeat == copies - 1: model.add_transition(unit_end, model.end, 0.5) model.add_transition(end_random_matches, end_random_matches, 0.5) model.add_transition(end_random_matches, model.end, 0.5) model.add_transition(unit_start, match_states[0], 0.98) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) last_end = unit_end model.bake() if len(patterns) > 1: # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True) fit_patterns = [pattern * copies for pattern in patterns] model.fit(fit_patterns, algorithm='viterbi', transition_pseudocount=1, use_pseudocount=True) return model
model.add_transition( model.start, rainy, 0.6 ) model.add_transition( model.start, sunny, 0.4 ) # Transition matrix, with 0.05 subtracted from each probability to add to # the probability of exiting the hmm model.add_transition( rainy, rainy, 0.65 ) model.add_transition( rainy, sunny, 0.25 ) model.add_transition( sunny, rainy, 0.35 ) model.add_transition( sunny, sunny, 0.55 ) # Add transitions to the end of the model model.add_transition( rainy, model.end, 0.1 ) model.add_transition( sunny, model.end, 0.1 ) # Finalize the model structure model.bake( verbose=True ) # Lets sample from this model. print model.sample() # Lets call Bob every hour and see what he's doing! # (aka build up a sequence of observations) sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ] # What is the probability of seeing this sequence? print "Probability of Sequence: ", \ math.e**model.forward( sequence )[ len(sequence), model.end_index ] print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]
hmm.add_transition(s33, s222, 0.01) hmm.add_transition(s33, s3, 0.01) hmm.add_transition(s33, s33, 0.92) hmm.add_transition(s33, s333, 0.01) hmm.add_transition(s333, s1, 0.01) hmm.add_transition(s333, s11, 0.01) hmm.add_transition(s333, s111, 0.01) hmm.add_transition(s333, s2, 0.01) hmm.add_transition(s333, s22, 0.01) hmm.add_transition(s333, s222, 0.01) hmm.add_transition(s333, s3, 0.01) hmm.add_transition(s333, s33, 0.01) hmm.add_transition(s333, s333, 0.92) hmm.bake() hmm.fit(X) # , weights=w) hmm does not support weights in pomegranate preds = hmm.predict(X) probs = hmm.predict_proba(X) data_thr['preds'] = pd.Series(preds).astype("category") color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink", "brown", "green", "orange"] # Spectral9 color_key = color_key[:len(set(preds))+2] covs = np.array([np.array(hmm.states[m].distribution.parameters[1]) for m in range(9)]) means = np.array([np.array(hmm.states[m].distribution.parameters[0]) for m in range(9)])
st = State(posdis, name='swing') distros.append(st) hmm_states.append(st) negdis = MGD.from_samples(negative_data) st2 = State(negdis, name='stance') distros.append(st2) hmm_states.append(st2) cl.add_states(hmm_states) cl.add_transition(cl.start, hmm_states[0], 0.5) cl.add_transition(cl.start, hmm_states[1], 0.5) for i in range(0, 2): for j in range(0, 2): cl.add_transition(hmm_states[i], hmm_states[j], t[i][j]) cl.bake() f += 1 train_data = fd[train_index] train_class = fl[train_index] test_data = fd[test_index] test_class = fl[test_index] seq = [] if batch_training == 1: s = 0 while s < len(train_data): k = 0 seq_entry = [] while k < 20 and s < len(train_data): seq_entry.append(train_data[s]) k += 1
state2 = State(NormalDistribution(0, 2), name="normal") silent = State(None, name="silent") model.add_state(state) model.add_state(state2) model.add_transition(state, state, 0.4) model.add_transition(state, state2, 0.4) model.add_transition(state2, state2, 0.4) model.add_transition(state2, state, 0.4) model.add_transition(model.start, state, 0.5) model.add_transition(model.start, state2, 0.5) model.add_transition(state, model.end, 0.2) model.add_transition(state2, model.end, 0.2) model.bake() sequence = model.sample() print sequence print print model.forward(sequence)[ len(sequence), model.end_index ] print model.backward(sequence)[0,model.start_index] print trans, ems = model.forward_backward(sequence) print trans print ems print model.train( [ sequence ] ) print print model.forward(sequence)[ len(sequence), model.end_index ]
# Define the transitions model = Model("infinite") model.add_transition(model.start, s1, 0.7) model.add_transition(model.start, s2, 0.2) model.add_transition(model.start, s3, 0.1) model.add_transition(s1, s1, 0.6) model.add_transition(s1, s2, 0.1) model.add_transition(s1, s3, 0.3) model.add_transition(s2, s1, 0.4) model.add_transition(s2, s2, 0.4) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s1, 0.05) model.add_transition(s3, s2, 0.15) model.add_transition(s3, s3, 0.8) model.bake() sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1] print model.is_infinite() print "Algorithms On Infinite Model" sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1] print "Forward" print model.forward(sequence) print "\n".join(state.name for state in model.states) print "Backward" print model.backward(sequence) print "Forward-Backward"
def main(): rospy.init_node('hmm_trainer') phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) rospack = rospkg.RosPack() packpath = rospack.get_path('exo_control') datapath = packpath + "/log/mat_files/" verbose = rospy.get_param('~verbose', False) """Print console output into text file""" sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w") """Data loading""" n_trials = 3 n_sub = 9 healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"] patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"] study_subs = [healthy_subs, patients] dataset = [{} for x in range(len(study_subs))] for i in range(len(study_subs)): for sub in study_subs[i]: dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)], "fder_gyro_y": [[] for x in range(n_trials)], "time": [[] for x in range(n_trials)], "labels": [[] for x in range(n_trials)], "Fs_fsr": 0.0} for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat") for signal in data: if signal not in ["pathol","fder_gyro_y"]: if signal == "Fs_fsr": data[signal] = mat_file[signal][0][0] else: data[signal][trial] = mat_file[signal][0] del mat_file """Feature extraction""" """First derivative""" for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): der = [] gyro_y = data["gyro_y"][trial] der.append(gyro_y[0]) for i in range(1,len(gyro_y)-1): der.append((gyro_y[i+1]-gyro_y[i-1])/2) der.append(gyro_y[-1]) data["fder_gyro_y"][trial] = der del der, sub, data """Global variables of cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] n_classes = 4 n_signals = 2 tol = 6e-2 # Tolerance window of 60 ms # pathology = 0 for pathology in range(len(dataset)): if pathology == 0: rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**") print "**Leave-one-out cross validation with HEALTHY subjects**" else: rospy.logwarn("**Leave-one-out cross validation with PATIENTS**") print "**Leave-one-out cross validation with PATIENTS**" # if True: for lou_sub,lou_data in dataset[pathology].iteritems(): # Iterate through leave-one-out subject's data rospy.logwarn("Leave " + lou_sub + " out:") print "Leave " + lou_sub + " out:" t = np.zeros((4, 4)) # Transition matrix prev = -1 for trial in range(n_trials): for label in lou_data["labels"][trial]: if prev == -1: prev = label t[prev][label] += 1.0 prev = label t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) class_data = [[] for x in range(n_classes)] # full_lou_data = [] # full_lou_labels = [] for trial in range(n_trials): for sample in range(len(lou_data["gyro_y"][trial])): d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]] l = lou_data["labels"][trial][sample] # full_lou_data.append(d) # full_lou_labels.append(l) class_data[l].append(d) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] for state in range(n_classes): cov = np.ma.cov(np.array(class_data[state]), rowvar=False) class_cov.append(cov) for signal in range(n_signals): class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0) class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0) class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0) # lou_trial = 1 # if True: for lou_trial in range(n_trials): rospy.logwarn("Trial {}".format(lou_trial+1)) print("Trial {}".format(lou_trial+1)) """Classifier initialization""" # distros = [] hmm_states = [] for state in range(n_classes): dis = MGD\ (np.array(class_means[state]).flatten(), np.array(class_cov[state])) st = State(dis, name=state_names[state]) # distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for state in range(n_classes): model.add_transition(model.start, hmm_states[state], startprob[state]) """Left-right model""" for i in range(n_classes): for j in range(n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() """Create training and test data""" x_train = [] x_test = [] test_gyro_y = lou_data["gyro_y"][lou_trial] test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial] """Create test data with n-th trial of leave-one-out subject""" for sample in range(len(test_gyro_y)): x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]]) """Create training data with n-1 trials of the rest of subjects (healthy group)""" for train_sub,train_data in dataset[0].iteritems(): count_trials = 0 if lou_sub != train_sub: # if train_sub == "daniel": for trial in range(n_trials): if trial != lou_trial and count_trials < 1: # rospy.logwarn(trial) train_gyro_y = train_data["gyro_y"][trial] train_fder_gyro_y = train_data["fder_gyro_y"][trial] for sample in range(len(train_gyro_y)): x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]]) count_trials += 1 rospy.logwarn(len(x_train)) x_train = list([x_train]) """Training""" rospy.logwarn("Training HMM...") model.fit(x_train, algorithm='baum-welch', verbose=True) # model.fit(x_train, algorithm='viterbi', verbose='True') """Find most-likely sequence""" rospy.logwarn("Finding most-likely sequence...") logp, path = model.viterbi(x_test) # rospy.logwarn(len(path)) # rospy.logwarn(len(lou_data["labels"][lou_trial])) class_labels = [] for i in range(len(lou_data["labels"][lou_trial])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) '''Saving classifier labels into csv file''' # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:] """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print "Mean time (MT) and Coefficient of Variance (CoV)" curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples/lou_data["Fs_fsr"]) n_phases = 0 stride_samples = 0 phases_time[label-1].append(count/lou_data["Fs_fsr"]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0))
coding_model.add_transition(coding_state2, ez_states_tag[0], 0.0000000230000) coding_model.add_transition(donor0_states[-1], in0, 1) coding_model.add_transition(donor1_states[-1], in1, 1) coding_model.add_transition(donor2_states[-1], in2, 1) coding_model.add_transition(in0_spacers[-1], acceptor0_states[0], 1) coding_model.add_transition(in1_spacers[-1], acceptor1_states[0], 1) coding_model.add_transition(in2_spacers[-1], acceptor2_states[0], 1) coding_model.add_transition(acceptor0_states[-1], coding_state0, 1.0) coding_model.add_transition(acceptor1_states[-1], coding_state0, 1.0) coding_model.add_transition(acceptor2_states[-1], coding_state0, 1.0) coding_model.add_transition(ze_states[-1], coding_state0, 1.0) coding_model.add_transition(ez_states_taa[-1], exon3_state, 1.0) coding_model.add_transition(ez_states_tga[-1], exon3_state, 1.0) coding_model.add_transition(ez_states_tag[-1], exon3_state, 1.0) coding_model.add_transition(exon3_state, exon3_state, 0.9) coding_model.add_transition(exon3_state, poly_a_states[0], 0.1) coding_model.add_transition(poly_a_states[-1], post_poly_spacer[0], 1.0) coding_model.add_transition(post_poly_spacer[-1], back, 1.0) coding_model.bake() with open('coding_model_base_poly.json', 'w', encoding='utf-8') as out: out.write(coding_model.to_json())
def build_dis_classifier(self): skf = StratifiedKFold(self.full_labels, n_folds=self.folds) classifier_array = [] stats_array = [] num_class = len(self.full_data[0]) print (num_class) for cl in range(0, num_class): lel = -1 tp_total = 0.0 tn_total = 0.0 fp_total = 0.0 fn_total = 0.0 tests = 0 for train_index, test_index in skf: if lel > 0: lel -= 1 continue stats = [] distros = [] hmm_states = [] state_names = ['swing', 'stance'] swings = 0 stances = 0 for i in range(0, 2): dis = MGD.from_samples(self.class_data[i]) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM() print(model.states) model.add_states(hmm_states) model.add_transition(model.start, hmm_states[0], 0.5) model.add_transition(model.start, hmm_states[1], 0.5) model.add_transition(hmm_states[1], model.end, 0.000000000000000001) model.add_transition(hmm_states[0], model.end, 0.000000000000000001) for i in range(0, 2): for j in range(0, 2): model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j]) model.bake() tp = 0.0 tn = 0.0 fp = 0.0 fn = 0.0 train_data = self.full_data[train_index, cl] train_class = self.full_labels[train_index, cl] test_data = self.full_data[test_index] test_class = self.full_labels[test_index] print(np.isfinite(train_data).all()) print(np.isfinite(test_data).all()) print(np.isnan(train_data.any())) print(np.isinf(train_data.any())) print(np.isnan(test_data.any())) print(np.isinf(test_data.any())) if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \ or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())): rospy.logerr("NaN or Inf Detected") exit() try: rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests)) seq = np.array(train_data) model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150) except ValueError: rospy.logwarn("Something went wrong, exiting") rospy.shutdown() exit() seq = [] if self.batch_test == 1: s = 0 # for s in range(0, len(test_data)): while s < len(test_data): k = 0 seq_entry = [] while k < 20 and s < len(test_data): seq_entry.append(test_data[s]) k += 1 s += 1 seq.append(seq_entry) else: seq = np.array(test_data) if seq == [] or test_data == []: rospy.logerr("Empty testing sequence") continue log, path = model.viterbi(test_data) if (len(path) - 2) != len(test_data): rospy.logerr(len(path)) rospy.logerr(path[0][1].name) rospy.logerr(path[len(path) - 1][1].name) rospy.logerr(len(test_data)) exit() tests += 1 for i in range(0, len(path) - 2): if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end': if path[i + 1][1].name == 'swing': # prediction is 0 swings += 1 if test_class[i] == 0: # class is 0 tn += 1.0 elif test_class[i] == 1: fn += 1.0 # class is 1 elif path[i + 1][1].name == 'stance': # prediction is 1 stances += 1 if test_class[i] == 1: # class is 1 tp += 1.0 elif test_class[i] == 0: # class is 0 fp += 1.0 print (swings) print (stances) if (tp + fn) != 0.0: rospy.logwarn("Sensitivity : " + str(tp / (tp + fn))) # sensitivity = tp / (tp + fn) else: rospy.logwarn("Sensitivity : 0.0") # sensitivity = 0.0 if (tn + fp) != 0.0: rospy.logwarn("Specificity : " + str(tn / (tn + fp))) # specificity = tn_total / (tn_total + fp_total) else: rospy.logwarn("Specificity : 0.0") # specificity = 0.0 if (tn + tp + fn + fp) != 0.0: rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp))) # accuracy = (tn + tp) / (tn + tp + fn + fp) else: rospy.logwarn("Accuracy : 0.0") # accuracy = 0.0 tn_total += tn tp_total += tp fn_total += fn fp_total += fp tp_total /= tests tn_total /= tests fp_total /= tests fn_total /= tests rospy.logerr("TP :" + str(tp_total)) rospy.logerr("TN :" + str(tn_total)) rospy.logerr("FP :" + str(fp_total)) rospy.logerr("FN :" + str(fn_total)) rospy.logerr("Tests :" + str(tests)) if (tp_total + fn_total) != 0.0: sensitivity = tp_total / (tp_total + fn_total) else: sensitivity = 0.0 if (tn_total + fp_total) != 0.0: specificity = tn_total / (tn_total + fp_total) else: specificity = 0.0 if (tn_total + tp_total + fn_total + fp_total) != 0.0: accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total) else: accuracy = 0.0 rospy.logwarn("----------------------------------------------------------") rospy.logerr("Total accuracy: " + str(accuracy)) rospy.logerr("Total sensitivity: " + str(sensitivity)) rospy.logerr("Total specificity: " + str(specificity)) stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests, accuracy, sensitivity, specificity] rospy.logwarn("-------------------DONE-------------------------") classifier_array.append(model) stats_array.append(stats) pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb')) pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb')) scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
def init_lr_hmm(sequences, steps, states_per_step, force_end=False, model_id='Left-Righ HMM', seed=None): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences force_end if we require sequence to end in end state """ # seed random number generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(states_per_step) trans = trans / trans.sum() for j in range(states_per_step): model.add_transition(model.start, states[0, j], trans[j]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> end if force_end: for j in range(states_per_step): trans = np.random.ranf(2) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # end transition model.add_transition(states[(steps - 1), j], model.end, trans[1]) model.bake() print 'Initialized Left-Right HMM:', model.name, '[', \ steps, states_per_step, ']' return model
def init_gaussian_hmm(sequences, n_states, model_id, seed=None): """ insantiate a model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ """ # make random transition probability matrix # scale each row to sum to 1 trans = np.random.ranf((n_states, n_states)) for i in range(n_states): trans[i, :] = trans[i, :] / trans[i, :].sum() # make distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) # x = math.min(3, x) dists = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dists.append(NormalDistribution.from_samples (sequences[:, temp_assignment])) # random start probabilities # scale to sum to 1 starts = np.random.ranf(n_states) starts = starts / sum(starts) model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id) """ # seed random numer generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) # make states with distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) states = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) states.append(State(dist, name=str(i))) model.add_states(states) # add random start probabilities start_probs = np.random.ranf(n_states) start_probs = start_probs / start_probs.sum() for i, state in enumerate(states): model.add_transition(model.start, state, start_probs[i]) # add random transition probabilites out of each state for state1 in states: transitions = np.random.ranf(n_states) transitions = transitions / transitions.sum() for i, state2 in enumerate(states): model.add_transition(state1, state2, transitions[i]) model.bake() print 'Initialized HMM: ', model.name return model
def train_and_test(): with open('../data extractors/exons_start_1.txt') as in_file: total = [] for line in in_file: no_p_line = line.replace('P', '').lower().replace('\n', '') total.append(no_p_line) converted_total = [converter_to(x, 2) for x in total] matrixDonor0 = numpy.array( matrix_from_exa('../data extractors/new_donor1.exa')) c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt') print(c0.p, c1.p, c2.p) coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0') coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1') coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2') donor0_data = classify(matrixDonor0, 2) donor0_states = sequence_state_factory(donor0_data, 'donor0') post = State(DiscreteDistribution(equal_distribution), name='post') model = HiddenMarkovModel('coding to donor') model.add_state(coding_state0) model.add_state(coding_state1) model.add_state(coding_state2) add_sequence(model, donor0_states) model.add_state(post) model.add_transition(model.start, coding_state0, 1) model.add_transition(coding_state0, coding_state1, 0.6) model.add_transition(coding_state0, donor0_states[0], 0.4) model.add_transition(coding_state1, coding_state2, 0.6) model.add_transition(coding_state1, donor0_states[0], 0.4) model.add_transition(coding_state2, coding_state0, 0.6) model.add_transition(coding_state2, donor0_states[0], 0.4) model.add_transition(donor0_states[-1], post, 1) model.add_transition(post, post, 0.9) model.add_transition(post, model.end, 0.1) model.bake() test_model(model) model.fit(converted_total, transition_pseudocount=1, emission_pseudocount=1, verbose=True) test_model(model) with open('partial_model_coding_to_donor_model0.json', 'w') as out: out.write(model.to_json())
def crop_status_hmm_model(nn_pobability_matrix, timeseries_steps, n_observed_classes): # 0 1 2 3 4 5 ['emergence', 'growth', 'flowers', 'senescing', 'senesced', 'no_crop'] d0 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=0, n_samples=timeseries_steps, n_classes=n_observed_classes) d1 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=1, n_samples=timeseries_steps, n_classes=n_observed_classes) d2 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=2, n_samples=timeseries_steps, n_classes=n_observed_classes) d3 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=3, n_samples=timeseries_steps, n_classes=n_observed_classes) d4 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=4, n_samples=timeseries_steps, n_classes=n_observed_classes) d5 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=5, n_samples=timeseries_steps, n_classes=n_observed_classes) s0_emerge = State(d0, name='emergence') s1_growth = State(d1, name='growth') s2_fls = State(d2, name='flowers') s3_sencing = State(d3, name='senescing') s4_senced = State(d4, name='senesced') s5_none = State(d5, name='no_crop') model = HiddenMarkovModel() # Initialize each hidden state. # All states have an equal chance of being the starting state. for s in [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none]: model.add_state(s) model.add_transition(model.start, s, 1) model.add_transitions( s0_emerge, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [90., 5., 0., 0., 0., 5.]) model.add_transitions( s1_growth, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [0., 90., 2.5, 2.5, 0., 5.]) model.add_transitions( s2_fls, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [0., 0., 90., 5., 0., 5.]) model.add_transitions( s3_sencing, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [0., 0., 0., 90., 5., 5.]) model.add_transitions( s4_senced, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [0., 0., 0., 0., 90., 10.]) model.add_transitions( s5_none, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none], [10., 0, 0., 0., 0., 90.]) model.bake(verbose=False) return model
def _segment(self, arr, components=2): nonzero = arr[arr > 0] idx = self.hampel_filter(np.log2(nonzero)) filtered = nonzero[idx] log_gmm = self.get_states(np.log2(filtered)) log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_ ln_gmm = self.get_states(filtered) # to improve the sensitivity ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_ if (len(log_means) == 1): means, probs = ln_means, ln_probs scale = 'linear' else: means, probs = log_means, log_probs scale = 'log' logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale)) model = HiddenMarkovModel() # GMM emissions dists = [] for m in means: tmp = [] for i in range(components): e = m + (-1)**i * ((i+1)//2) * 0.5 s = 0.5 tmp.append(NormalDistribution(e, s)) mixture = State(GeneralMixtureModel(tmp), name=str(m)) dists.append(mixture) model.add_states(*tuple(dists)) # transition matrix for i in range(len(means)): for j in range(len(means)): if i==j: model.add_transition(dists[i], dists[j], 0.8) else: model.add_transition(dists[i], dists[j], 0.2/(len(means)-1)) # starts and ends for i in range(len(means)): model.add_transition(model.start, dists[i], probs[i]) model.bake() # training sequences tmp = np.zeros(nonzero.size) tmp[idx] = filtered newarr = np.zeros(arr.size) newarr[arr > 0] = tmp if len(means) > 1: model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs, max_iterations=5000, stop_threshold=2e-4) queue = newarr[newarr > 0] if scale=='log': seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]] else: seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]] seg = self.assign_cnv(queue, seq) predicted = np.zeros(newarr.size) predicted[newarr > 0] = seg seg = self.call_intervals(predicted) else: seg = [(0, newarr.size)] return newarr, seg, scale
def dominant_cover_hmm_model(nn_pobability_matrix, timeseries_steps, n_observed_classes): d0 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=0, n_samples=timeseries_steps, n_classes=n_observed_classes) d1 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=1, n_samples=timeseries_steps, n_classes=n_observed_classes) d2 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=2, n_samples=timeseries_steps, n_classes=n_observed_classes) d3 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=3, n_samples=timeseries_steps, n_classes=n_observed_classes) d4 = NeuralNetworkWrapperCustom( predicted_probabilities=nn_pobability_matrix, i=4, n_samples=timeseries_steps, n_classes=n_observed_classes) s0_veg = State(d0, name='vegetation') s1_residue = State(d1, name='residue') s2_soil = State(d2, name='soil') s3_snow = State(d3, name='snow') s4_water = State(d4, name='water') model = HiddenMarkovModel() # Initialize each hidden state. # All states have an equal chance of being the starting state. for s in [s0_veg, s1_residue, s2_soil, s3_snow, s4_water]: model.add_state(s) model.add_transition(model.start, s, 1) model.add_transitions(s0_veg, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [95., 1.0, 1.0, 1.0, 1.0]) model.add_transitions(s1_residue, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 95., 1.0, 1.0, 1.0]) model.add_transitions(s2_soil, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 95., 1.0, 1.0]) model.add_transitions(s3_snow, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 1.0, 95., 1.0]) model.add_transitions(s4_water, [s0_veg, s1_residue, s2_soil, s3_snow, s4_water], [1.0, 1.0, 1.0, 1.0, 95.]) model.bake(verbose=False) return model
def get_suffix_matcher_hmm(pattern): model = Model(name="Suffix Matcher HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) insert_states = [] match_states = [] delete_states = [] hmm_name = 'suffix' for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, hmm_name))) for i in range(len(pattern)): distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01}) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), hmm_name))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), hmm_name))) unit_start = State(None, name='suffix_start_%s' % hmm_name) unit_end = State(None, name='suffix_end_%s' % hmm_name) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 model.add_transition(model.start, unit_start, 1) model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) for i in range(len(pattern)): model.add_transition(unit_start, match_states[i], 0.98 / len(pattern)) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) model.bake(merge=None) return model
def get_constant_number_of_repeats_matcher_hmm(patterns, copies): model = Model(name="Repeating Pattern Matcher HMM Model") transitions, emissions = build_profile_hmm_for_repeats( patterns, settings.MAX_ERROR_RATE) matches = [m for m in emissions.keys() if m.startswith('M')] last_end = None for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(matches) + 1): insert_distribution = DiscreteDistribution(emissions['I%s' % i]) insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(1, len(matches) + 1): match_distribution = DiscreteDistribution(emissions['M%s' % i]) match_states.append( State(match_distribution, name='M%s_%s' % (str(i), repeat))) for i in range(1, len(matches) + 1): delete_states.append(State(None, name='D%s_%s' % (str(i), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) n = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 1) else: model.add_transition(model.start, unit_start, 1) if repeat == copies - 1: model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, match_states[0], transitions['unit_start']['M1']) model.add_transition(unit_start, delete_states[0], transitions['unit_start']['D1']) model.add_transition(unit_start, insert_states[0], transitions['unit_start']['I0']) model.add_transition(insert_states[0], insert_states[0], transitions['I0']['I0']) model.add_transition(insert_states[0], delete_states[0], transitions['I0']['D1']) model.add_transition(insert_states[0], match_states[0], transitions['I0']['M1']) model.add_transition(delete_states[n], unit_end, transitions['D%s' % (n + 1)]['unit_end']) model.add_transition(delete_states[n], insert_states[n + 1], transitions['D%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(match_states[n], unit_end, transitions['M%s' % (n + 1)]['unit_end']) model.add_transition(match_states[n], insert_states[n + 1], transitions['M%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], insert_states[n + 1], transitions['I%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], unit_end, transitions['I%s' % (n + 1)]['unit_end']) for i in range(1, len(matches) + 1): model.add_transition(match_states[i - 1], insert_states[i], transitions['M%s' % i]['I%s' % i]) model.add_transition(delete_states[i - 1], insert_states[i], transitions['D%s' % i]['I%s' % i]) model.add_transition(insert_states[i], insert_states[i], transitions['I%s' % i]['I%s' % i]) if i < len(matches): model.add_transition(insert_states[i], match_states[i], transitions['I%s' % i]['M%s' % (i + 1)]) model.add_transition(insert_states[i], delete_states[i], transitions['I%s' % i]['D%s' % (i + 1)]) model.add_transition(match_states[i - 1], match_states[i], transitions['M%s' % i]['M%s' % (i + 1)]) model.add_transition(match_states[i - 1], delete_states[i], transitions['M%s' % i]['D%s' % (i + 1)]) model.add_transition(delete_states[i - 1], match_states[i], transitions['D%s' % i]['M%s' % (i + 1)]) model.add_transition(delete_states[i - 1], delete_states[i], transitions['D%s' % i]['D%s' % (i + 1)]) last_end = unit_end model.bake(merge=None) return model
'c': 0.45, 'g': 0.05, 't': 0.05 }), name='fixed') hmmodel.add_state(back_state) hmmodel.add_state(fixed_state) hmmodel.add_transition(hmmodel.start, back_state, 1) hmmodel.add_transition(back_state, back_state, 0.9) hmmodel.add_transition(back_state, fixed_state, 0.1) hmmodel.add_transition(fixed_state, fixed_state, 0.9) hmmodel.add_transition(fixed_state, back_state, 0.1) hmmodel.bake() seq = list('acgtacgtaaaaccccaaa') lopg, path = hmmodel.viterbi(seq) print([x[1].name for x in path]) print(hmmodel.to_json()) to_fit1 = list('acgtacacacacacacac') to_fit2 = list('acgtacgtacgtacgtacgtacgtacgtcgt') to_fit3 = list('aaaaacccccaaacc') to_fit4 = list('aaaaaccgcccaaaccacgtacgtacgtacgtactacgggggg') lopg, path = hmmodel.viterbi(to_fit4)
for tag in data.training_set.tagset: tag_emissions = DiscreteDistribution({ word: emission_counts[tag][word] / tag_unigrams[tag] for word in emission_counts[tag] }) tag_states[tag] = State(tag_emissions, name=tag) basic_model.add_state(tag_states[tag]) # Add edges between states for the observed transition frequencies P(tag_i | tag_i-1) for tag in data.training_set.tagset: basic_model.add_transition(basic_model.start, tag_states[tag], tag_starts[tag] / tag_unigrams[tag]) for tag1 in data.training_set.tagset: basic_model.add_transition( tag_states[tag], tag_states[tag1], tag_bigrams[(tag, tag1)] / tag_unigrams[tag]) basic_model.add_transition(tag_states[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) # finalize the model basic_model.bake() # Evaluate the accuracy of HMM tagger on the training and test corp hmm_training_acc = accuracy(data.training_set.X, data.training_set.Y, basic_model) print("training accuracy basic hmm model: {:.2f}%".format(100 * hmm_training_acc)) hmm_testing_acc = accuracy(data.testing_set.X, data.testing_set.Y, basic_model) print("testing accuracy basic hmm model: {:.2f}%".format(100 * hmm_testing_acc))
def main(): rospy.init_node('hmm_trainer') param_vec = [] rospack = rospkg.RosPack() if (len(sys.argv) < 2): print("Missing the prefix argument.") exit() else: prefix = sys.argv[1] use_measurements = np.zeros(3) # patient = rospy.get_param('~patient', 'None') # if prefix == 'None': # rospy.logerr("No filename given ,exiting") # exit() phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) packpath = rospack.get_path('exo_gait_phase_det') datapath = packpath + "/log/mat_files/" rospy.logwarn("Patient: {}".format(prefix)) print("Patient: {}".format(prefix)) verbose = rospy.get_param('~verbose', False) """Print console output into text file""" # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w") """Data loading""" n_trials = 3 data = [[] for x in range(0, n_trials)] for i in range(0, n_trials): data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) + ".mat") accel_x = [[] for x in range(0, n_trials)] accel_y = [[] for x in range(0, n_trials)] accel_z = [[] for x in range(0, n_trials)] gyro_x = [[] for x in range(0, n_trials)] gyro_y = [[] for x in range(0, n_trials)] gyro_z = [[] for x in range(0, n_trials)] time_array = [[] for x in range(0, n_trials)] labels = [[] for x in range(0, n_trials)] fs_fsr = [] for i in range(0, n_trials): # accel_x[i] = data[i]["accel_x"][0] # accel_y[i] = data[i]["accel_y"][0] # accel_z[i] = data[i]["accel_z"][0] gyro_x[i] = data[i]["gyro_x"][0] gyro_y[i] = data[i]["gyro_y"][0] gyro_z[i] = data[i]["gyro_z"][0] time_array[i] = data[i]["time"][0] labels[i] = data[i]["labels"][0] fs_fsr.append(data[i]["Fs_fsr"][0][0]) """Feature extraction""" """First derivative""" # fder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(gyro_x[i][0]) # for j in range(1,len(gyro_x[i])-1): # der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2) # der.append(gyro_x[i][-1]) # fder_gyro_x.append(der) fder_gyro_y = [] for i in range(n_trials): der = [] der.append(gyro_y[i][0]) for j in range(1, len(gyro_y[i]) - 1): der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2) der.append(gyro_y[i][-1]) fder_gyro_y.append(der) # fder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(gyro_z[i][0]) # for j in range(1,len(gyro_z[i])-1): # der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2) # der.append(gyro_z[i][-1]) # fder_gyro_z.append(der) """Second derivative""" # sder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_x[i][0]) # for j in range(1,len(fder_gyro_x[i])-1): # der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2) # der.append(fder_gyro_x[i][-1]) # sder_gyro_x.append(der) # # sder_gyro_y = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_y[i][0]) # for j in range(1,len(fder_gyro_y[i])-1): # der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2) # der.append(fder_gyro_y[i][-1]) # sder_gyro_y.append(der) # # sder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_z[i][0]) # for j in range(1,len(fder_gyro_z[i])-1): # der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2) # der.append(fder_gyro_z[i][-1]) # sder_gyro_z.append(der) """Peak detector""" # window_wid = 15 # Window width should be odd # search_ratio = window_wid/2 # pdet_gyro_x = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_x[i])): # if j <= search_ratio: # win = gyro_x[i][:j+search_ratio+1] # elif j >= len(gyro_x[i])-search_ratio-1: # win = gyro_x[i][j-search_ratio:] # else: # win = gyro_x[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_x[i][j]/max(win)) # pdet_gyro_x.append(pdet) # print len(gyro_x) # print len(pdet_gyro_x) # for i in range(3): # print len(gyro_x[i]) # print len(pdet_gyro_x[i]) # pdet_gyro_y = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_y[i])): # if j <= search_ratio: # win = gyro_y[i][:j+search_ratio+1] # elif j >= len(gyro_y[i])-search_ratio-1: # win = gyro_y[i][j-search_ratio:] # else: # win = gyro_y[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_y[i][j]/max(win)) # pdet_gyro_y.append(pdet) # # pdet_gyro_z = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_z[i])): # if j <= search_ratio: # win = gyro_z[i][:j+search_ratio+1] # elif j >= len(gyro_z[i])-search_ratio-1: # win = gyro_z[i][j-search_ratio:] # else: # win = gyro_z[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_z[i][j]/max(win)) # pdet_gyro_z.append(pdet) """Create training and test data""" ff = [[] for x in range(0, n_trials)] for j in range(0, n_trials): for k in range(0, len(time_array[j])): f_ = [] # f_.append(accel_x[j][k]) # f_.append(accel_y[j][k]) # f_.append(accel_z[j][k]) # f_.append(gyro_x[j][k]) # f_.append(fder_gyro_x[j][k]) # f_.append(sder_gyro_x[j][k]) # f_.append(pdet_gyro_x[j][k]) f_.append(gyro_y[j][k]) f_.append(fder_gyro_y[j][k]) # f_.append(sder_gyro_y[j][k]) # f_.append(pdet_gyro_y[j][k]) # f_.append(gyro_z[j][k]) # f_.append(fder_gyro_z[j][k]) # f_.append(sder_gyro_z[j][k]) # f_.append(pdet_gyro_z[j][k]) ff[j].append(f_) n_signals = len(ff[0][0]) """cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] rospy.logwarn("""Intra-subject training""") print("""Intra-subject training""") # for leave_one_out in range(0, n_trials): for leave_one_out in range(1, 2): rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1)) print("-------TRIAL {}-------".format(leave_one_out + 1)) """Transition matrix""" t = np.zeros((4, 4)) # Transition matrix prev = -1 for i in range(0, len(labels[leave_one_out])): # data[i]._replace(label = correct_mapping[data[i].label]) if prev == -1: prev = labels[leave_one_out][i] t[prev][labels[leave_one_out][i]] += 1.0 prev = labels[leave_one_out][i] t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) n_classes = 4 class_data = [[] for x in range(n_classes)] full_data = [] full_labels = [] for i in range(len(ff[leave_one_out])): full_data.append(ff[leave_one_out][i]) full_labels.append(labels[leave_one_out][i]) # print full_data == ff[leave_one_out] # print full_labels == labels[leave_one_out] # print len(full_data) == len(full_labels) # for i in range(0,len(ff[leave_one_out-1])): # full_data.append(ff[leave_one_out-1][i]) # full_labels.append(labels[leave_one_out-1][i]) # for i in range(0,len(ff[(leave_one_out+1) % n_trials])): # full_data.append(ff[(leave_one_out+1) % n_trials][i]) # full_labels.append(labels[(leave_one_out+1) % n_trials][i]) # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials])) # print full_data # print len(full_data) # print full_labels # print len(full_labels) for i in range(0, len(full_data)): class_data[full_labels[i]].append(full_data[i]) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] classifiers = [] for i in range(0, n_classes): # cov = np.ma.cov(np.array(class_data[i]), rowvar=False) cov = np.cov(np.array(class_data[i]), rowvar=False) class_cov.append(cov) for j in range(0, n_signals): class_means[i][j] = np.array( class_data[i][:])[:, [j]].mean(axis=0) class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0) class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0) print "\n" + str(class_cov) + "\n" """Classifier initialization""" distros = [] hmm_states = [] for i in range(n_classes): dis = MGD\ (np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for i in range(0, n_classes): model.add_transition(model.start, hmm_states[i], startprob[i]) """Left-right model""" for i in range(0, n_classes): for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() # print (model.name) # rospy.logwarn("N. observations: " + str(model.d)) # print (model.edges) # rospy.logwarn("N. hidden states: " + str(model.silent_start)) # print model """Training""" # limit = int(len(ff1)*(8/10.0)) # 80% of data to test, 20% to train # x_train = list([ff1[:limit]]) # x_train = list([ff1,ff2]) # x_train = list([ff2]) x_train = [] for i in range(0, len(ff[leave_one_out - 1])): x_train.append(ff[leave_one_out - 1][i]) for i in range(0, len(ff[(leave_one_out + 1) % n_trials])): x_train.append(ff[(leave_one_out + 1) % n_trials][i]) x_train = list([x_train]) rospy.logwarn("Training...") model.fit(x_train, algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose) # model.fit(seq, algorithm='viterbi', verbose='True') """Find most-likely sequence""" # logp, path = model.viterbi(ff[limit:]) logp, path = model.viterbi(ff[leave_one_out]) # print logp # print path class_labels = [] for i in range(len(labels[leave_one_out])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) labels[leave_one_out] = list(labels[leave_one_out][1:]) # Saving classifier labels into csv file # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") sum = 0.0 true_pos = 0.0 false_pos = 0.0 true_neg = 0.0 false_neg = 0.0 tol = 6e-2 # Tolerance window of 60 ms tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out]))) print "FSR freq: " + str(fs_fsr[leave_one_out]) print "Tolerance win: " + str(tol_window) # print tol_window # # print type(tol_window) # for i in range(0, len(labels[leave_one_out])): # """Tolerance window""" # if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window: # # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # # print curr_tol # win = [] # for j in range(i-tol_window,i+tol_window+1): # win.append(state_names[labels[leave_one_out][j]]) # if path[i][1].name in win: # sum += 1.0 # else: # if path[i][1].name == labels[leave_one_out][i]: # sum += 1.0 """Performance Evaluation""" rospy.logwarn("Calculating results...") time_error = [[] for x in range(n_classes)] for phase in range(n_classes): for i in range(len(labels[leave_one_out])): """Tolerance window""" if i >= tol_window and i < len( labels[leave_one_out]) - tol_window: # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # print curr_tol win = [] for j in range(i - tol_window, i + tol_window + 1): win.append(labels[leave_one_out][j]) """Calculate time error with true positives""" if class_labels[i] == phase: if class_labels[i] in win: for k in range(len(win)): if win[k] == phase: time_error[phase].append( (k - tol_window) / fs_fsr[leave_one_out]) break true_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_pos" else: false_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_pos" else: if phase != labels[leave_one_out][i]: # if phase not in win: true_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_neg" else: false_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_neg" else: if class_labels[i] == phase: if class_labels[i] == labels[leave_one_out][i]: true_pos += 1.0 else: false_pos += 1.0 else: if phase != labels[leave_one_out][i]: true_neg += 1.0 else: false_neg += 1.0 rospy.logwarn("Timing error") print("Timing error") for phase in range(n_classes): rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" if len(time_error[phase]) > 0: rospy.logwarn( str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))) print str(np.mean(time_error[phase])) + " + " + str( np.std(time_error[phase])) else: rospy.logwarn("0.06 + 0") print "0.06 + 0" """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print("Mean time (MT) and Coefficient of Variance (CoV)") n_group = 0 for label_group in [class_labels, labels[leave_one_out]]: if n_group == 0: rospy.logwarn("Results for HMM:") print("Results for HMM:") else: rospy.logwarn("Results for FSR:") print("Results for FSR:") curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in label_group: # for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples / fs_fsr[leave_one_out]) n_phases = 0 stride_samples = 0 phases_time[label - 1].append(count / fs_fsr[leave_one_out]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) n_group += 1 """Accuracy""" # acc = sum/len(labels[leave_one_out]) if (true_neg + true_pos + false_neg + false_pos) != 0.0: acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg + false_pos) else: acc = 0.0 """Sensitivity or True Positive Rate""" if true_pos + false_neg != 0: tpr = true_pos / (true_pos + false_neg) else: tpr = 0.0 """Specificity or True Negative Rate""" if false_pos + true_neg != 0: tnr = true_neg / (false_pos + true_neg) else: tnr = 0.0 # rospy.logwarn("Accuracy: {}%".format(acc*100)) rospy.logwarn("Accuracy: {}%".format(acc * 100.0)) # print("Accuracy: {}%".format(acc*100.0)) rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0)) # print("Sensitivity: {}%".format(tpr*100.0)) rospy.logwarn("Specificity: {}%".format(tnr * 100.0)) # print("Specificity: {}%".format(tnr*100.0)) """Goodness index""" G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2) if G <= 0.25: rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G)) # print("Optimum classifier (G = {} <= 0.25)".format(G)) elif G > 0.25 and G <= 0.7: rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G)) # print("Good classifier (0.25 < G = {} <= 0.7)".format(G)) elif G == 0.7: rospy.logwarn("Random classifier (G = 0.7)") # print("Random classifier (G = 0.7)") else: rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))