class HMMWrapper: def __init__(self): self.model = HiddenMarkovModel() self.start = self.model.start self.end = self.model.end self.states_before_bake = [] self.states = None def add_state(self, state, start_prob=0): self.states_before_bake.append((state, start_prob)) self.model.add_state(state) def add_transition(self, start_state, end_state, prob): # print('adding from', start_state.name, 'to', end_state.name, prob) self.model.add_transition(start_state, end_state, prob) def bake(self): starter_states_no_prob = [] free_start_prob = 1.0 for state in self.states_before_bake: if 'none' not in state[0].name: if not state[1]: starter_states_no_prob.append(state) else: free_start_prob -= state[1] print('asignado ' + str(state[1]) + ' a ' + state[0].name) self.add_transition(self.start, state[0], state[1]) len_no_prob = len(starter_states_no_prob) starter_prob = free_start_prob / len_no_prob print(len_no_prob, starter_prob) for state in starter_states_no_prob: self.add_transition(self.start, state, starter_prob) self.model.bake() self.states = self.model.states def make_states_from_alignment(self, first_state, last_state, seq_matrix, name): columns = column_clasify(seq_matrix) zones = create_zones(columns) grouped_states = group_states(zones, name) add_states(self, grouped_states) trans = calculate_transitions(first_state, last_state, grouped_states) apply_transitions(self, trans) def predict(self, *args, **kwargs): return self.model.predict(*args, **kwargs)
def cluster(self): if self.preprocessed_data is None: print("No preprocessed_data attribute found") return -1 if self.alg == "Kmeans": from sklearn.cluster import KMeans km = KMeans(n_clusters=self.K, precompute_distances=True) km.fit(np.concatenate( self.preprocessed_data)) #flattens all dates together self.states = [km.predict(d) for d in self.preprocessed_data] elif self.alg == "HMM": from pomegranate import HiddenMarkovModel, MultivariateGaussianDistribution distribution = MultivariateGaussianDistribution hmm=HiddenMarkovModel().from_samples(distribution,n_components=self.K\ ,X=self.preprocessed_data.copy()) self.states = [ np.array(hmm.predict(d.copy())) for d in self.preprocessed_data ] else: print("Unrecognised or undefined clustering algorithm.") return -1 self.experiment_progress = 2
hmm.add_transition(s33, s33, 0.92) hmm.add_transition(s33, s333, 0.01) hmm.add_transition(s333, s1, 0.01) hmm.add_transition(s333, s11, 0.01) hmm.add_transition(s333, s111, 0.01) hmm.add_transition(s333, s2, 0.01) hmm.add_transition(s333, s22, 0.01) hmm.add_transition(s333, s222, 0.01) hmm.add_transition(s333, s3, 0.01) hmm.add_transition(s333, s33, 0.01) hmm.add_transition(s333, s333, 0.92) hmm.bake() hmm.fit(X) # , weights=w) hmm does not support weights in pomegranate preds = hmm.predict(X) probs = hmm.predict_proba(X) data_thr['preds'] = pd.Series(preds).astype("category") color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink", "brown", "green", "orange"] # Spectral9 color_key = color_key[:len(set(preds))+2] covs = np.array([np.array(hmm.states[m].distribution.parameters[1]) for m in range(9)]) means = np.array([np.array(hmm.states[m].distribution.parameters[0]) for m in range(9)]) # transform cov for non-standardizeed data: covs = np.array([np.dot(np.diag(np.sqrt(scaler.var_)),
reunify_sequences(hm_model, mdd_states_sequences, background, [1, 1, 1, 1]) hm_model.bake() a = 'a' c = 'c' g = 'g' t = 't' seq = numpy.array([ c, t, g, t, c, t, c, c, c, g, g, c, g, g, c, c, a, g, c, g, g, c, g, g, a, a, c, c, t, g, t, g, c, g, a, g, t, g, g, a, t, g, c, g, g, a, a, g, c, c, g, g, c, g, c, a, g, c, a, g, t, c, c, c, t, c, g, g, c, a, g, c, c, a, a, g, g, t, a, a, g, c, a, g, a, g, g, c, t, g, c, g, c, c, c, c, t, t, c, g, g, a, g, g, g, t, g, c, t, t, g, g, g, a, a, g, g, c, g, c, g, g, g, t, c, g, a, g, c, c, a, g, t, g, g, c, t, g, c, t, g, c, g, c, g, t, c, g ]) hmm_predictions = hm_model.predict(seq) #print("sequence: {}".format(' '.join(seq))) #print("hmm pred: {}".format(' '.join(map( str, hmm_predictions)))) empar = [] for i, s in enumerate(seq): empar.append((seq[i], hm_model.states[hmm_predictions[i]].name)) print(len(hm_model.states), hm_model.states[19]) print(empar) second_model = insert_delete_main_hmm(og_matrix) print(second_model.states) #for idx, val in enumerate(final_percent): # final_percent[idx] = { # 'a': count_a[idx] / examples, # 'c': count_c[idx] / examples,