Ejemplo n.º 1
0
class HMMWrapper:
    def __init__(self):
        self.model = HiddenMarkovModel()
        self.start = self.model.start
        self.end = self.model.end
        self.states_before_bake = []
        self.states = None

    def add_state(self, state, start_prob=0):
        self.states_before_bake.append((state, start_prob))
        self.model.add_state(state)

    def add_transition(self, start_state, end_state, prob):
        # print('adding from', start_state.name, 'to', end_state.name, prob)
        self.model.add_transition(start_state, end_state, prob)

    def bake(self):
        starter_states_no_prob = []
        free_start_prob = 1.0
        for state in self.states_before_bake:
            if 'none' not in state[0].name:
                if not state[1]:
                    starter_states_no_prob.append(state)
                else:
                    free_start_prob -= state[1]
                    print('asignado ' + str(state[1]) + ' a ' + state[0].name)
                    self.add_transition(self.start, state[0], state[1])

        len_no_prob = len(starter_states_no_prob)
        starter_prob = free_start_prob / len_no_prob
        print(len_no_prob, starter_prob)
        for state in starter_states_no_prob:
            self.add_transition(self.start, state, starter_prob)

        self.model.bake()
        self.states = self.model.states

    def make_states_from_alignment(self, first_state, last_state, seq_matrix,
                                   name):
        columns = column_clasify(seq_matrix)
        zones = create_zones(columns)
        grouped_states = group_states(zones, name)
        add_states(self, grouped_states)
        trans = calculate_transitions(first_state, last_state, grouped_states)
        apply_transitions(self, trans)

    def predict(self, *args, **kwargs):
        return self.model.predict(*args, **kwargs)
Ejemplo n.º 2
0
    def cluster(self):
        if self.preprocessed_data is None:
            print("No preprocessed_data attribute found")
            return -1

        if self.alg == "Kmeans":
            from sklearn.cluster import KMeans
            km = KMeans(n_clusters=self.K, precompute_distances=True)
            km.fit(np.concatenate(
                self.preprocessed_data))  #flattens all dates together
            self.states = [km.predict(d) for d in self.preprocessed_data]

        elif self.alg == "HMM":
            from pomegranate import HiddenMarkovModel, MultivariateGaussianDistribution
            distribution = MultivariateGaussianDistribution
            hmm=HiddenMarkovModel().from_samples(distribution,n_components=self.K\
            ,X=self.preprocessed_data.copy())
            self.states = [
                np.array(hmm.predict(d.copy())) for d in self.preprocessed_data
            ]
        else:
            print("Unrecognised or undefined clustering algorithm.")
            return -1
        self.experiment_progress = 2
Ejemplo n.º 3
0
hmm.add_transition(s33, s33, 0.92)
hmm.add_transition(s33, s333, 0.01)

hmm.add_transition(s333, s1, 0.01)
hmm.add_transition(s333, s11, 0.01)
hmm.add_transition(s333, s111, 0.01)
hmm.add_transition(s333, s2, 0.01)
hmm.add_transition(s333, s22, 0.01)
hmm.add_transition(s333, s222, 0.01)
hmm.add_transition(s333, s3, 0.01)
hmm.add_transition(s333, s33, 0.01)
hmm.add_transition(s333, s333, 0.92)

hmm.bake()
hmm.fit(X)  # , weights=w) hmm does not support weights in pomegranate
preds = hmm.predict(X)
probs = hmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+2]

covs = np.array([np.array(hmm.states[m].distribution.parameters[1])
                 for m in range(9)])
means = np.array([np.array(hmm.states[m].distribution.parameters[0])
                  for m in range(9)])

# transform cov for non-standardizeed data:
covs = np.array([np.dot(np.diag(np.sqrt(scaler.var_)),
Ejemplo n.º 4
0
reunify_sequences(hm_model, mdd_states_sequences, background, [1, 1, 1, 1])

hm_model.bake()
a = 'a'
c = 'c'
g = 'g'
t = 't'
seq = numpy.array([
    c, t, g, t, c, t, c, c, c, g, g, c, g, g, c, c, a, g, c, g, g, c, g, g, a,
    a, c, c, t, g, t, g, c, g, a, g, t, g, g, a, t, g, c, g, g, a, a, g, c, c,
    g, g, c, g, c, a, g, c, a, g, t, c, c, c, t, c, g, g, c, a, g, c, c, a, a,
    g, g, t, a, a, g, c, a, g, a, g, g, c, t, g, c, g, c, c, c, c, t, t, c, g,
    g, a, g, g, g, t, g, c, t, t, g, g, g, a, a, g, g, c, g, c, g, g, g, t, c,
    g, a, g, c, c, a, g, t, g, g, c, t, g, c, t, g, c, g, c, g, t, c, g
])
hmm_predictions = hm_model.predict(seq)
#print("sequence: {}".format(' '.join(seq)))
#print("hmm pred: {}".format(' '.join(map( str, hmm_predictions))))
empar = []
for i, s in enumerate(seq):
    empar.append((seq[i], hm_model.states[hmm_predictions[i]].name))
print(len(hm_model.states), hm_model.states[19])
print(empar)

second_model = insert_delete_main_hmm(og_matrix)
print(second_model.states)

#for idx, val in enumerate(final_percent):
#    final_percent[idx] = {
#        'a': count_a[idx] / examples,
#        'c': count_c[idx] / examples,