def _fit_word_model(X, nstates, **kwargs): wmodel = pomegranate.HiddenMarkovModel(None) wmodel.start.name = str(-1) wmodel.end.name = str(nstates) states = [ pomegranate.State(PrecomputedDistribution(s, nstates), name=str(s)) for s in range(nstates) ] for s in range(nstates): wmodel.add_state(states[s]) wmodel.add_transition(states[s], states[s], 0.8) wmodel.add_transition(wmodel.start, states[0], 1) for s in range(1, nstates): wmodel.add_transition(states[s - 1], states[s], 0.15) wmodel.add_transition(states[-1], wmodel.end, 0.15) wmodel.add_transition(states[-2], states[1], 0.05) for s in range(2, nstates - 1): wmodel.add_transition(states[s - 2], states[s], 0.05) wmodel.bake() improvement = wmodel.fit(X, **kwargs) if np.isnan(improvement): raise ValueError print("HMM improvement: {:2.4f}".format(improvement)) return [(int(e[0].name), int(e[1].name), np.exp(e[2]['probability'])) for e in wmodel.graph.edges(data=True)]
def load_params(self, file_contents): (mod_check, model_txt, feature_txt, gui_state_dict_txt, pg_gui_state_dict_txt, str2num_state_dict_txt, misc_txt) = file_contents.split('\nSTART_NEW_SECTION\n') if mod_check != 'VanillaHmm': error_msg = '\nERROR: loaded model parameters are not for a Vanilla HMM!' # if self.gui: # self.gui.notify(error_msg) # return # else: raise ValueError(error_msg) self.trained = pg.HiddenMarkovModel().from_yaml(model_txt) self.feature_list = feature_txt.split('\n') self.gui_state_dict = yaml.load(gui_state_dict_txt, Loader=yaml.FullLoader) self.pg_gui_state_dict = yaml.load(pg_gui_state_dict_txt, Loader=yaml.FullLoader) self.str2num_state_dict = yaml.load(str2num_state_dict_txt, Loader=yaml.FullLoader) misc_dict = yaml.load(misc_txt, Loader=yaml.SafeLoader) if misc_dict['dbscan_epsilon'] == 'nan': misc_dict['dbscan_epsilon'] = np.nan self.nb_states = misc_dict['nb_states'] self.data.eps = misc_dict['dbscan_epsilon'] self.supervision_influence = misc_dict['supervision_influence'] self.framerate = misc_dict['framerate'] self.timestamp = numeric_timestamp()
def hmm(nstates=2, bias=0.1): def make_bias(i, s): if i == 0: return [bias, 1 - bias][s] else: return [1 - bias, bias][s] states = [ pmg.State(pmg.DiscreteDistribution({ 0: make_bias(i, 0), 1: make_bias(i, 1) }), name='S%d' % i) for i in range(nstates) ] #trans = np.ones((nstates, nstates)) / nstates; trans = np.random.rand(nstates, nstates) for i in range(nstates): trans[i] = trans[i] / trans[i].sum() model = pmg.HiddenMarkovModel() model.add_states(states) for i in range(nstates): for j in range(nstates): model.add_transition(states[i], states[j], trans[i, j]) model.add_transition(model.start, states[i], 1.0 / nstates) model.bake() return model
def run(): # Load dataset path = 'datasets/' with open(path + datasetload, 'rb') as f: a = pickle.load(f) X = a[0] X = X.astype(int) # Create HMM D = bond_dimension N = X.shape[1] d = np.max(X + 1) list_of_states = [] for i in xrange(N): list_of_states.append([]) for u in xrange(bond_dimension): dictionnary = dict() for l in xrange(d): dictionnary[str(l)] = np.random.rand() list_of_states[i].append( pomegranate.State( pomegranate.DiscreteDistribution(dictionnary))) model = pomegranate.HiddenMarkovModel() for i in xrange(N - 1): for d in xrange(D): for d2 in xrange(D): model.add_transition(list_of_states[i][d], list_of_states[i + 1][d2], np.random.rand()) for d in xrange(D): model.add_transition(model.start, list_of_states[0][d], np.random.rand()) for d in xrange(D): model.add_transition(list_of_states[N - 1][d], model.end, np.random.rand()) model.bake() # Train HMM begin = time.time() sequencetrain = [[str(i) for i in v] for v in X] np.random.seed() model.fit(sequencetrain,algorithm='baum-welch',stop_threshold=1e-50,min_iterations=1000,\ max_iterations=n_iter) u = 0 for i in sequencetrain: u += model.log_probability(i) accuracy = -u / len(sequencetrain) time_elapsed = time.time() - begin print("Negative log likelihood = %.3f" % (accuracy)) print("Time elapsed = %.2fs" % (time_elapsed))
def fit_non_sil_phn(data_init, n_mix, dim_feature, name_phn, covar_type='full'): # Create model with 3 states # Left-to-right: each state is connected to itself and its direct successor state_0 = create_state(data_init=data_init[0], n_mix=n_mix, dim_feature=dim_feature, name_phn=name_phn, name_state='-first', covar_type=covar_type) state_1 = create_state(data_init=data_init[1], n_mix=n_mix, dim_feature=dim_feature, name_phn=name_phn, name_state='-mid', covar_type=covar_type) state_2 = create_state(data_init=data_init[2], n_mix=n_mix, dim_feature=dim_feature, name_phn=name_phn, name_state='-last', covar_type=covar_type) model = pomegranate.HiddenMarkovModel(name_phn) model.add_state(state_0) model.add_state(state_1) model.add_state(state_2) model.add_transition(model.start, state_0, 1.0) model.add_transition(state_0, state_0, 0.5) model.add_transition(state_0, state_1, 0.5) model.add_transition(state_0, model.end, 0.000001) model.add_transition(state_1, state_1, 0.5) model.add_transition(state_1, model.end, 0.000001) model.add_transition(state_1, state_2, 0.5) model.add_transition(state_2, state_2, 0.5) model.add_transition(state_2, model.end, 0.5) model.bake() return model
def generate_model(state, transition): # Setup hmm model = pomegranate.HiddenMarkovModel() A = pomegranate.State(pomegranate.DiscreteDistribution({'A': state, 'B': 1-state}), name='A') B = pomegranate.State(pomegranate.DiscreteDistribution({'A': 1-state, 'B': state}), name='B') model.add_transition(model.start, A, 0.5) model.add_transition(model.start, B, 0.5) model.add_transition(A, A, 1-transition) model.add_transition(A, B, transition) model.add_transition(B, A, transition) model.add_transition(B, B, 1-transition) model.add_transition(A, model.end, 0.5) model.add_transition(B, model.end, 0.5) model.bake(verbose=False) return model
def load_params(self, file_contents): (mod_check, model_txt, feature_txt, gui_state_dict_txt, pg_gui_state_dict_txt, str2num_state_dict_txt, misc_txt) = file_contents.split('\nSTART_NEW_SECTION\n') if mod_check != 'SubstateHmm': error_msg = '\nERROR: loaded model parameters are not for a substate HMM!' raise ValueError(error_msg) self.trained = pg.HiddenMarkovModel().from_yaml(model_txt) self.feature_list = feature_txt.split('\n') self.gui_state_dict = yaml.load(gui_state_dict_txt, Loader=yaml.FullLoader) self.pg_gui_state_dict = yaml.load(pg_gui_state_dict_txt, Loader=yaml.FullLoader) self.str2num_state_dict = yaml.load(str2num_state_dict_txt, Loader=yaml.FullLoader) misc_dict = yaml.load(misc_txt, Loader=yaml.SafeLoader) if misc_dict['dbscan_epsilon'] == 'nan': misc_dict['dbscan_epsilon'] = np.nan self.nb_states = misc_dict['nb_states'] self.buffer = misc_dict['buffer'] self.data.eps = misc_dict['dbscan_epsilon']
def fit_hmm(self): print('Fitting Model') s0 = pg.State(pg.MultivariateGaussianDistribution( np.array([1, 1, 1]), .1 * np.eye(3)), name='0') s1 = pg.State(pg.MultivariateGaussianDistribution( np.array([1, 1, 1]), 3 * np.eye(3)), name='1') s2 = pg.State(pg.MultivariateGaussianDistribution( np.array([.5, .5, .5]), .1 * np.eye(3) + .1 * np.ones([3, 3])), name='2') s3 = pg.State(pg.MultivariateGaussianDistribution( np.array([1.5, 1.5, 1.5]), .1 * np.eye(3) + .1 * np.ones([3, 3])), name='3') model = pg.HiddenMarkovModel() model.add_states([s0, s1, s2, s3]) model.add_transition(model.start, s0, .85) model.add_transition(model.start, s1, .05) model.add_transition(model.start, s2, .05) model.add_transition(model.start, s3, .05) model.add_transition(s0, s0, .85) model.add_transition(s0, s1, .05) model.add_transition(s0, s2, .05) model.add_transition(s0, s3, .05) model.add_transition(s1, s0, .1) model.add_transition(s1, s1, .7) model.add_transition(s1, s2, .1) model.add_transition(s1, s3, .1) model.add_transition(s2, s0, .1) model.add_transition(s2, s1, .1) model.add_transition(s2, s2, .7) model.add_transition(s2, s3, .1) model.add_transition(s3, s0, .1) model.add_transition(s3, s1, .1) model.add_transition(s3, s2, .1) model.add_transition(s3, s3, .7) model.bake() model.fit(self.accels_filt) self.model = model
def make_hmm_model(emission_mat, transition_probs): model = pomegranate.HiddenMarkovModel('ndf') ictal_emissions = {i:emission_mat[1,i] for i in range(emission_mat.shape[1])} baseline_emissions = {i:emission_mat[0,i] for i in range(emission_mat.shape[1])} ictal = pomegranate.State(pomegranate.DiscreteDistribution(ictal_emissions ), name = '1') baseline = pomegranate.State(pomegranate.DiscreteDistribution(baseline_emissions), name = '0') model.add_state(ictal) model.add_state(baseline) model.add_transition( model.start, ictal, 0.05 ) model.add_transition( model.start, baseline, 99.95) model.add_transition( baseline, baseline, transition_probs[0,0] ) model.add_transition( baseline, ictal, transition_probs[0,1] ) model.add_transition( ictal, ictal , transition_probs[1,1] ) model.add_transition( ictal, baseline, transition_probs[1,0] ) model.bake(verbose=False ) return model
def get_untrained_hmm(self, data_dict): """ return an untrained pomegranate hmm object with parameters filled in - If all data is unlabeled: finds emission parameters using k-means, transmission and start p are equal - If some data is labeled: initial estimate using given classifications """ hmm = pg.HiddenMarkovModel() # Get emission distributions & transition probs states, edge_states, pg_gui_state_dict = self.get_states(data_dict) tm_dict, pstart_dict, pend_dict = self.get_transitions(data_dict) # for k in tm_dict: tm_dict[k] = max(tm_dict[k], 0.000001) # reset 0-prob transitions to essentially 0, avoids nans on edges # for k in pstart_dict: pstart_dict[k] = max(pstart_dict[k], 0.000001) # for k in pend_dict: pend_dict[k] = max(pend_dict[k], 0.000001) # Add states, self-transitions, transitions to start/end state for s_name in states: s = states[s_name] hmm.add_state(s) hmm.add_transition(hmm.start, s, pstart_dict[s_name], pseudocount=0) hmm.add_transition(s, hmm.end, pend_dict[s_name], pseudocount=0) hmm.add_transition(s, s, tm_dict[(s_name, s_name)], pseudocount=0) # Make connections between states using edge states for es_name in edge_states: es_list = edge_states[es_name][0] s1, s2 = [states[s] for s in edge_states[es_name][1]] for es in es_list: hmm.add_state(es) hmm.add_transition(s1, es_list[0], tm_dict[edge_states[es_name][1]]) for i in range(1, self.buffer): hmm.add_transition(es_list[i-1], es_list[i], 1.0, pseudocount=9999999) hmm.add_transition(es_list[-1], s2, 1.0, pseudocount=9999999) hmm.bake() state_names = np.array([state.name for state in hmm.states]) self.pg_gui_state_dict = pg_gui_state_dict self.gui_state_dict = {si: pg_gui_state_dict.get(s, None) for si, s in enumerate(state_names)} self.str2num_state_dict = {str(si): ni for si, ni in zip(state_names, list(self.gui_state_dict))} return hmm
def get_substate_object(self, vec, state_name): vec_clean = vec[:, np.invert(np.any(np.isnan(vec), axis=0))] nb_clust = min(10, vec_clean.shape[1]) # labels = GaussianMixture(n_components=nb_clust).fit_predict(vec_clean.T) gm = GaussianMixture(n_components=nb_clust).fit(vec_clean.T) gm.covariances_ += np.eye(gm.covariances_.shape[1]) * 1E-9 hmm_out = pg.HiddenMarkovModel() hmm_out.name = state_name hmm_out.start.name = f'{state_name}_start' hmm_out.end.name = f'{state_name}_end' added_state_names = [] for n in range(nb_clust): sn = f'{state_name}_{str(n)}' added_state_names.append(sn) st = pg.State(pg.MultivariateGaussianDistribution( gm.means_[n, :], gm.covariances_[n, :, :]), name=sn) hmm_out.add_state(st) hmm_out.add_transition(hmm_out.start, st, gm.weights_[n], pseudocount=9999999) hmm_out.add_transition(st, hmm_out.end, 1.0, pseudocount=9999999) return hmm_out, added_state_names
def concatenative_hmm_alignment(trans): """concatenate hmm from transcription""" # initialize the syllable counter dictionary dict_syl_counter = dict() for l in syl_2_phn.keys(): dict_syl_counter[l] = 0 hmm_conc = pomegranate.HiddenMarkovModel("hmm_conc") hmm_precedent = [] p_first = True for syl in trans: phns_syl = syl_2_phn[syl] if len(phns_syl) == 1: for p in phns_syl[0]: hmm_p = pickle.load( open(os.path.join(path_pretrained_model, p + '.pkl'), 'rb')) change_state_name(hmm_p, syl + '-' + str(dict_syl_counter[syl]), p) hmm_conc.add_model(hmm_p) if p_first: hmm_conc.add_transition(hmm_conc.start, hmm_p.start, 1.0) p_first = False else: for ii_hmm_precedent in range(len(hmm_precedent)): hmm_conc.add_transition( hmm_precedent[ii_hmm_precedent].end, hmm_p.start, 1.0) hmm_precedent = [hmm_p] else: hmm_branch_precedent = hmm_precedent hmm_in_branch_precedent = None hmm_precedent = [] for ii_phns, phns in enumerate(phns_syl): for ii_p, p in enumerate(phns): hmm_p = pickle.load( open(os.path.join(path_pretrained_model, p + '.pkl'), 'rb')) change_state_name( hmm_p, syl + '-' + str(ii_phns) + '-' + str(dict_syl_counter[syl]), p) hmm_conc.add_model(hmm_p) if p_first: hmm_conc.add_transition(hmm_conc.start, hmm_p.start, 1.0) if ii_phns == len(phns) - 1: p_first = False elif ii_p == 0: for ii_hmm_precedent in range( len(hmm_branch_precedent)): hmm_conc.add_transition( hmm_branch_precedent[ii_hmm_precedent].end, hmm_p.start, 1.0) else: hmm_conc.add_transition(hmm_in_branch_precedent.end, hmm_p.start, 1.0) hmm_in_branch_precedent = hmm_p if ii_p == len(phns) - 1: hmm_precedent.append(hmm_p) dict_syl_counter[syl] += 1 for ii_hmm_precedent in range(len(hmm_precedent)): hmm_conc.add_transition(hmm_precedent[ii_hmm_precedent].end, hmm_conc.end, 1.0) hmm_conc.bake() # hmm_conc.plot() # plt.savefig('topo.png', dpi=3000) return hmm_conc
def get_model(r, params, window_size, num_skipped, seq_len, p, \ g, resample_prob, x_chr=False, haploid=False, debug=False, h_t=1, skip_score=float("-Inf")): """ Builds the hidden Markov model for a given chromosome or scaffold, using the Pomegranate module. Arguments: r -- (float) the per site, per generation recombination probability params -- a dict where keys are names of states (AA, AB, and BB) and values are dicts where values are mu and sd, which are floats representing means and standard deviations of emission probability distributions window_size -- (int) the window size for this run, in bp num_skipped -- (int) the number of windows that were skipped due to not passing criteria seq_len -- (int) the number of windows in the current chromosome/scaffold p -- (float) the percent ancestry the admixed population derives from ancestral population A (estimated beforehand) g -- (int) the number of generations since admixture (estimated beforehand) resample_prob -- (float) probability of resampling the same ancestral recombination event twice in an individual after the set number of generations since admixture (referred to as z in the paper) x_chr -- (boolean) does this chromosome/scaffold belong to a hemizygous sex chromosome? haploid -- (boolean) is this individual haploid along this chromosome/scaffold? debug -- (boolean) should debugging messages be printed to the screen? h_t -- (float) if the user has specified that expected reduction in heterozygosity given the number of generations since admixture should be incorporated into the model, this is the expected fraction of the initial heterozygosity that remains after g generations. skip_score -- (float) the number emitted by adlibs_score when "skipped" windows are encountered Returns: a Pomegranate HMM object for the current chromosome/scaffold """ global prob_lim model = pomegranate.HiddenMarkovModel(name='ancestry') # Compute probabilities of transitioning to a skip state or the end. Cap these # both at the specified probability limit. skip_prob = num_skipped / seq_len if skip_prob > prob_lim: skip_prob = prob_lim state_end = 1 / seq_len if state_end > prob_lim: state_end = prob_lim if x_chr: r *= (2 / 3) # Determine probabilities of transitions if haploid: # Should 2 be 1.5? I don't think so -- we already multiplied r by (2/3) # so that's in here already. aa_bb = g * r * (1 - p) bb_aa = g * r * p # Eliminate the heterozygous state. aa_ab = 0 ab_aa = 0 bb_ab = 0 ab_bb = 0 else: probs = get_trans_probs(r, g, p, resample_prob) aa_ab = probs['aa_ab'] ab_aa = probs['ab_aa'] aa_bb = probs['aa_bb'] bb_ab = probs['bb_ab'] ab_bb = probs['ab_bb'] bb_aa = probs['bb_aa'] aa_ab *= window_size ab_aa *= window_size aa_bb *= window_size bb_ab *= window_size ab_bb *= window_size bb_aa *= window_size aa_aa = 1 - (aa_ab + aa_bb + state_end + skip_prob) ab_ab = 1 - (ab_aa + ab_bb + state_end + skip_prob) bb_bb = 1 - (bb_aa + bb_ab + state_end + skip_prob) # Account for reduction in heterozygosity due to genetic drift if haploid: pass #aa_aa += (aa_bb - aa_bb*h_t) #aa_bb *= h_t #bb_bb += (bb_aa - bb_aa*h_t) #bb_aa *= h_t else: aa_aa += (aa_aa / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t) aa_bb += (aa_bb / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t) bb_aa += (bb_aa / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t) bb_bb += (bb_bb / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t) aa_ab *= h_t bb_ab *= h_t ab_aa += (ab_aa / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t) ab_bb += (ab_bb / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t) ab_ab *= h_t if debug: print("# AA -> AA {}".format(aa_aa), file=sys.stderr) print("# AA -> AB {}".format(aa_ab), file=sys.stderr) print("# AA -> BB {}".format(aa_bb), file=sys.stderr) print("# AB -> AA {}".format(ab_aa), file=sys.stderr) print("# AB -> AB {}".format(ab_ab), file=sys.stderr) print("# AB -> BB {}".format(ab_bb), file=sys.stderr) print("# BB -> AA {}".format(bb_aa), file=sys.stderr) print("# BB -> AB {}".format(bb_ab), file=sys.stderr) print("# BB -> BB {}".format(bb_bb), file=sys.stderr) print("# SKIP {}".format(skip_prob), file=sys.stderr) aaDist = pomegranate.NormalDistribution(params['AA']['mu'], params['AA']['sd']) abDist = pomegranate.NormalDistribution(params['AB']['mu'], params['AB']['sd']) bbDist = pomegranate.NormalDistribution(params['BB']['mu'], params['BB']['sd']) aaState = pomegranate.State(aaDist, name="AA") abState = pomegranate.State(abDist, name="AB") bbState = pomegranate.State(bbDist, name="BB") model.add_state(aaState) if not haploid: model.add_state(abState) model.add_state(bbState) #### ADD skip states skip_dist = pomegranate.UniformDistribution(skip_score - 0.01, skip_score) aa_skip_state = pomegranate.State(skip_dist, name="skip-AA") ab_skip_state = pomegranate.State(skip_dist, name="skip-AB") bb_skip_state = pomegranate.State(skip_dist, name="skip-BB") model.add_state(aa_skip_state) if not haploid: model.add_state(ab_skip_state) model.add_state(bb_skip_state) if haploid: model.add_transition(model.start, aaState, p * (1 - skip_prob)) model.add_transition(model.start, aa_skip_state, p * skip_prob) model.add_transition(model.start, bbState, (1 - p) * (1 - skip_prob)) model.add_transition(model.start, bb_skip_state, (1 - p) * skip_prob) else: model.add_transition(model.start, aaState, p**2 * (1 - skip_prob)) model.add_transition(model.start, aa_skip_state, p**2 * skip_prob) model.add_transition(model.start, abState, 2 * p * (1 - p) * (1 - skip_prob)) model.add_transition(model.start, ab_skip_state, 2 * p * (1 - p) * skip_prob) model.add_transition(model.start, bbState, (1 - p)**2 * (1 - skip_prob)) model.add_transition(model.start, bb_skip_state, (1 - p)**2 * skip_prob) model.add_transition(aaState, model.end, 1 / seq_len) if not haploid: model.add_transition(abState, model.end, 1 / seq_len) model.add_transition(bbState, model.end, 1 / seq_len) model.add_transition(aaState, bbState, aa_bb) model.add_transition(aaState, aaState, aa_aa) model.add_transition(bbState, aaState, bb_aa) model.add_transition(bbState, bbState, bb_bb) if not haploid: model.add_transition(aaState, abState, aa_ab) model.add_transition(abState, aaState, ab_aa) model.add_transition(abState, bbState, ab_bb) model.add_transition(abState, abState, ab_ab) model.add_transition(bbState, abState, bb_ab) ### Add skip state transitions model.add_transition(aaState, aa_skip_state, skip_prob) if not haploid: model.add_transition(abState, ab_skip_state, skip_prob) model.add_transition(bbState, bb_skip_state, skip_prob) model.add_transition(aa_skip_state, aa_skip_state, skip_prob) if not haploid: model.add_transition(ab_skip_state, ab_skip_state, skip_prob) model.add_transition(bb_skip_state, bb_skip_state, skip_prob) model.add_transition(aa_skip_state, bbState, aa_bb) model.add_transition(bb_skip_state, aaState, bb_aa) if not haploid: model.add_transition(aa_skip_state, abState, aa_ab) model.add_transition(ab_skip_state, aaState, ab_aa) model.add_transition(ab_skip_state, bbState, ab_bb) model.add_transition(bb_skip_state, abState, bb_ab) model.add_transition(aa_skip_state, model.end, 1 / seq_len) if not haploid: model.add_transition(ab_skip_state, model.end, 1 / seq_len) model.add_transition(bb_skip_state, model.end, 1 / seq_len) model.add_transition(aa_skip_state, aaState, 1 - skip_prob - aa_ab - aa_bb - 1 / seq_len) if not haploid: model.add_transition(ab_skip_state, abState, 1 - skip_prob - ab_aa - ab_bb - 1 / seq_len) model.add_transition(bb_skip_state, bbState, 1 - skip_prob - bb_aa - bb_ab - 1 / seq_len) ### model.bake() return model
if feature in patient_data.columns: patient_data = patient_data.drop(feature, axis=1) df1 = patient_data.pop('hypnogram_User') patient_data['hypnogram_User'] = df1 n_features = patient_data.shape[1] - 2 data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data( data=patient_data) training_class_array.append(hidden_sequence) train_df = train_df.append(train) hmm_dist = dst.Distributions(train_df) feature_names = patient_data.drop(['hypnogram_User', 'hypnogram_Machine'], axis=1).columns.values.tolist() dist, state_names = hmm_dist.gauss_kernel_dist(feature_names) model = pg.HiddenMarkovModel('prediction') create_states(model, training_class_array, state_names) model.bake() #TESTING PART!!! :) list_of_testing_patients = list_of_testing_patients.reset_index() for i in range(list_of_testing_patients.shape[0]): path = "Data/" + str(list_of_testing_patients['file_name'][i]) patient_data = dp.data_import(path) binary_features = [ "Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal", "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia" ] for feature in binary_features:
def run_hmm_on_files(path, n_features): poznamka = [] try: print(path) data, n_features, feature_names = dp.preprocess_any_file(path, n_features) def preprocess_data(data): train, test = ms.train_test_split(data, test_size=0.3, shuffle=False) data_columns = list(data.columns.values) hidden_sequence = data['hypnogram_User'].tolist() l = len(hidden_sequence) for i in reversed(range(0, l)): if hidden_sequence[i] == "NotScored": train = train.drop([i]) del hidden_sequence[i] observation_sequence = train.iloc[:, 0:n_features].values.tolist() return data_columns, hidden_sequence, observation_sequence, train, test def create_states(model, hidden_sequence, state_names): chain_model = pg.MarkovChain.from_samples([hidden_sequence]) states = {} # type: Dict[str, pg.State] for name in state_names: states[name] = pg.State(dist[state_names.index(name)], name=name) model.add_states(list(states.values())) # sets the starting probability for state 'Wake' to 1.0 try: model.add_transition(model.start, states['Wake'], 1.0) poznamka.append("") except KeyError: print("nezacina wake") poznamka.append('nezacina wake') pass # insert the emission probabilities, that we computed in summary for prob in chain_model.distributions[1].parameters[0]: state1 = states[prob[0]] state2 = states[prob[1]] probability = prob[2] model.add_transition(state1, state2, probability) data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data(data) hmm_dist = dst.Distributions(train) dist, state_names = hmm_dist.gauss_kernel_dist(feature_names) model = pg.HiddenMarkovModel('prediction') create_states(model, hidden_sequence, state_names) model.bake() test_observation_sequence = train.iloc[:, 0:n_features].values.tolist() #hmm_fit = model.fit([observation_sequence], labels=[hidden_sequence], algorithm='labeled') hmm_pred = model.predict(test_observation_sequence) conf_hmm = metrics.confusion_matrix(hidden_sequence, [state_names[id] for id in hmm_pred], state_names) #print(conf_hmm) #print(state_names) state_ids = np.array([state_names.index(val) for val in hidden_sequence]) score = (np.array(hmm_pred) == state_ids).mean() print(score) except ValueError: print('nejaky valueerror - napr nepozna stlpec hypnogram user') score = "NaN" feature_names = [] return poznamka, score
def fit_transitions(self, X, gloss_seqs, **hmm_fit_args): # Train individual word models params = [] for i in range(len(self.labels)): # Range of state indexes for this label axes = sum(self.chains_lengths[:i]), sum(self.chains_lengths[:i + 1]) # Compute posteriors for the states of this label subsgments = [(seq, start, stop) for seq, gloss_seq in enumerate(gloss_seqs) for l, start, stop in gloss_seq if l == self.labels[i]] Xw = [[Xm[seq][start:stop] for seq, start, stop in subsgments] for Xm in X] Xw = [ self.posterior.predict_logproba(*x)[:, axes[0]:axes[1]] for x in zip(*Xw) ] Xw = [x - self.p_s[None, axes[0]:axes[1]] for x in Xw] # Xw = [x - logsumexp(x, axis=1, keepdims=True) for x in Xw] # pseudo log-likelihoods params.append( self._fit_word_model(Xw, self.chains_lengths[i], **hmm_fit_args)) # Create complete model print("loading trained parameters into the model") self.hmm = pomegranate.HiddenMarkovModel(None) states = [] for i in range(self.nstates): s = pomegranate.State(PrecomputedDistribution(i, self.nstates), name=str(i)) states.append(s) self.hmm.add_state(s) self.hmm.start.name = str(-1) self.hmm.end.name = str(self.nstates) self.hmm.add_transition(self.hmm.start, states[-1], 1) self.hmm.add_transition(states[-1], states[-1], self.p_idle2idle) for i in range(self.nlabels): state_offset = np.sum(self.chains_lengths[:i]) l = self.chains_lengths[i] for s1, s2, p in params[i]: # Adjust indexes and parameters to integrate within full model s2 = -1 if s2 == l else s2 + state_offset if s1 == -1: p = self.p_idle2gesture else: s1 += state_offset self.hmm.add_transition(states[s1], states[s2], p) self.hmm.bake() # Build mapping between internal indexes and ours self.state2idx = np.array([ int(s.name) for s in self.hmm.states if s.name not in {"-1", str(self.nstates)} ], dtype=np.int32) idx2labels = np.concatenate([ np.full((self.chains_lengths[i], ), self.labels[i]) for i in range(self.nlabels) ] + [np.array([0.0])]).astype(np.int32) self.state2label = np.array([ idx2labels[int(s.name)] for s in self.hmm.states if int(s.name) not in {-1, self.nstates} ])
def decode_sequence(probs=None, algorithm='threshold', params=dict(n=5, t=.8), verbose=True): ''' Once a model outputs probabilities for some sequence of data, that data shall be passed to this method. This method will use various ways to decode an underlying sequence in order to determine where the *actual* canned laughter was. possible algorithms to decode sequence: - 'neural' surround-n-gram neural network: this method will use a pretrained Keras model to label some sample i using the multiclass probabilities of all of the samples numbered [i-n, i-n+1, ... i, i+1, ..., i+n], i.e., n before and n afterwards. - 'hmm' HMM: this method will use a hidden Markov model with underlying states that are the same as surface states (the two state spaces for hidden and observed are equivalent). uses Viterbi to decode the underlying state sequence. requires a params to be passed as dict(c=DiscreteDistribution) where c is a class (label) and DiscreteDistribution is an instance of emission probabilities created using `pomegranate`, for each such class c (0, 1, 2, ...) - 'threshold' window and threshold method: this is simple heuristic-based method that will observe windows of length n, and if the average probability of any single class is at least t, it will assign that same class to all of the samples in that window. imagine a threshold of 0.9, then it is intuitively likely if few of the samples are labeled with some other class, they may have been accidentally so-labeled. - 'modethreshold' like 'threshold' but instead of considering avg probability, it considers what percentage of labels are a particular class and if that surpasses a threshold, then all labels are made that same label --- probs: an nparray of (n_samples, n_classes) probabilities such that foreach sample, the sum of probabilities across classes adds up to 1. In case supplied array is of shape (n_samples,) it will be converted to multiclass using this module's _binary_probs_to_multiclass method return: a list of len n_samples, with the ith sample being the predicted label of that sample. this prediction would usually also incorporate somehow the samples before and after the current sample ''' color.INFO('INFO', 'shape of input probs is: {}'.format(probs.shape)) if probs.shape[-1] == 1: probs = _binary_probs_to_multiclass(probs) color.INFO('INFO', 'received probs of shape {}'.format(str(probs.shape))) if algorithm == 'threshold': n, t = params['n'], params['t'] labels = [np.argmax(timechunk) for timechunk in probs] for i in range(len(probs) - n + 1): # print(np.average(probs[i:i+n], axis=0)[0], # np.average(probs[i:i+n], axis=0)[1]) for c in range(probs.shape[-1]): avg = np.average(probs[i:i + n], axis=0)[c] if avg >= t: # color.INFO('DEBUG', # 'found threshold window of {} at [{}:{}] for class {}'.format(avg, i, i+n, c)) labels[i:i + n] = [c for _ in range(n)] return labels elif algorithm == 'hmm' or algorithm == 'viterbi': # define default emission probabilities default = { 0: pmgt.DiscreteDistribution({ '0': 0.7, '1': 0.3 }), 1: pmgt.DiscreteDistribution({ '0': 0.2, '1': 0.8 }) } states = [] for c in [*range(probs.shape[-1])]: state = pmgt.State(params.get(c, default[c]), name=str(c)) states += [state] model = pmgt.HiddenMarkovModel('laugh-decoder') model.add_states(states) if 'transitions' in params: model.add_transitions(params['transitions']) else: # start must always go to state 0 model.add_transitions([model.start, states[0]], [states[0], model.end], [1., .1]) model.add_transitions([states[0], states[0], states[1], states[1]], [states[0], states[1], states[0], states[1]], [.5, .4, .2, .8]) model.bake() # if verbose: # model.plot() # plotting is weird labels = [str(np.argmax(entry)) for entry in probs] labels = model.predict(sequence=labels, algorithm='viterbi') return labels[1:-1] else: raise NotImplementedError
seq = [np.array(np.random.rand(100) > 0.2, dtype=int)] model = hmm(nstates=2) nstates = 2 states = [pmg.DiscreteDistribution({ 0: 0.5, 1: 0.5 }) for i in range(nstates)] trans = np.ones((nstates, nstates)) / nstates trans = np.random.rand(nstates, nstates) for i in range(nstates): trans[i] = trans[i] / trans[i].sum() model = pmg.HiddenMarkovModel().from_matrix(trans, states, np.ones(nstates) / nstates, np.zeros(nstates)) model.plot() print model.fit(seq) plt.figure(1) plt.clf() model.plot() logp, path = model.viterbi(seq[0]) print idx_from_path(path) ### worm data
def concatenative_hmm_recogntion(path_pretrained_model): hmm_do_d = pickle.load( open(os.path.join(path_pretrained_model, 'd.pkl'), 'rb')) change_state_name(hmm_do_d, 'do', 'd') hmm_do_ow = pickle.load( open(os.path.join(path_pretrained_model, 'ow.pkl'), 'rb')) change_state_name(hmm_do_ow, 'do', 'ow') hmm_re_r = pickle.load( open(os.path.join(path_pretrained_model, 'r.pkl'), 'rb')) change_state_name(hmm_re_r, 're', 'r') hmm_re_ey = pickle.load( open(os.path.join(path_pretrained_model, 'ey.pkl'), 'rb')) change_state_name(hmm_re_ey, 're', 'ey') hmm_mi_m = pickle.load( open(os.path.join(path_pretrained_model, 'm.pkl'), 'rb')) change_state_name(hmm_mi_m, 'mi', 'm') hmm_mi_iy = pickle.load( open(os.path.join(path_pretrained_model, 'iy.pkl'), 'rb')) change_state_name(hmm_mi_iy, 'mi', 'iy') hmm_fa_f = pickle.load( open(os.path.join(path_pretrained_model, 'f.pkl'), 'rb')) change_state_name(hmm_fa_f, 'fa', 'f') hmm_fa_aa = pickle.load( open(os.path.join(path_pretrained_model, 'aa.pkl'), 'rb')) change_state_name(hmm_fa_aa, 'fa', 'aa') hmm_sol0_s = pickle.load( open(os.path.join(path_pretrained_model, 's.pkl'), 'rb')) change_state_name(hmm_sol0_s, 'sol0', 's') hmm_sol0_ow = pickle.load( open(os.path.join(path_pretrained_model, 'ow.pkl'), 'rb')) change_state_name(hmm_sol0_ow, 'sol0', 'ow') hmm_sol0_l = pickle.load( open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb')) change_state_name(hmm_sol0_l, 'sol0', 'l') hmm_sol1_s = pickle.load( open(os.path.join(path_pretrained_model, 's.pkl'), 'rb')) change_state_name(hmm_sol1_s, 'sol1', 's') hmm_sol1_ao = pickle.load( open(os.path.join(path_pretrained_model, 'ao.pkl'), 'rb')) change_state_name(hmm_sol1_ao, 'sol1', 'ao') hmm_sol1_l = pickle.load( open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb')) change_state_name(hmm_sol1_l, 'sol1', 'l') hmm_la_l = pickle.load( open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb')) change_state_name(hmm_la_l, 'la', 'l') hmm_la_aa = pickle.load( open(os.path.join(path_pretrained_model, 'aa.pkl'), 'rb')) change_state_name(hmm_la_aa, 'la', 'aa') hmm_si_s = pickle.load( open(os.path.join(path_pretrained_model, 's.pkl'), 'rb')) change_state_name(hmm_si_s, 'si', 's') hmm_si_iy = pickle.load( open(os.path.join(path_pretrained_model, 'iy.pkl'), 'rb')) change_state_name(hmm_si_iy, 'si', 'iy') hmm_sil = pickle.load( open(os.path.join(path_pretrained_model, 'sil.pkl'), 'rb')) hmm_conc = pomegranate.HiddenMarkovModel("hmm_conc") hmm_conc.add_model(hmm_do_d) hmm_conc.add_model(hmm_do_ow) hmm_conc.add_model(hmm_re_r) hmm_conc.add_model(hmm_re_ey) hmm_conc.add_model(hmm_mi_m) hmm_conc.add_model(hmm_mi_iy) hmm_conc.add_model(hmm_fa_f) hmm_conc.add_model(hmm_fa_aa) hmm_conc.add_model(hmm_sol0_s) hmm_conc.add_model(hmm_sol0_ow) hmm_conc.add_model(hmm_sol0_l) hmm_conc.add_model(hmm_sol1_s) hmm_conc.add_model(hmm_sol1_ao) hmm_conc.add_model(hmm_sol1_l) hmm_conc.add_model(hmm_la_l) hmm_conc.add_model(hmm_la_aa) hmm_conc.add_model(hmm_si_s) hmm_conc.add_model(hmm_si_iy) hmm_conc.add_model(hmm_sil) # phrase start to phn start transitions hmm_conc.add_transition(hmm_conc.start, hmm_do_d.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_re_r.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_mi_m.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_fa_f.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_sol0_s.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_sol1_s.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_la_l.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_si_s.start, 0.111111) hmm_conc.add_transition(hmm_conc.start, hmm_sil.start, 0.111111) # # phn end to phrase end transitions # hmm_conc.add_transition(hmm_ow.end, hmm_conc.end, 0.2) # hmm_conc.add_transition(hmm_ey.end, hmm_conc.end, 0.2) # hmm_conc.add_transition(hmm_iy.end, hmm_conc.end, 0.2) # hmm_conc.add_transition(hmm_aa.end, hmm_conc.end, 0.2) # hmm_conc.add_transition(hmm_ao.end, hmm_conc.end, 0.2) # consonant to vowel transitions hmm_conc.add_transition(hmm_do_d.end, hmm_do_ow.start, 1.0) hmm_conc.add_transition(hmm_re_r.end, hmm_re_ey.start, 1.0) hmm_conc.add_transition(hmm_mi_m.end, hmm_mi_iy.start, 1.0) hmm_conc.add_transition(hmm_fa_f.end, hmm_fa_aa.start, 1.0) hmm_conc.add_transition(hmm_sol0_s.end, hmm_sol0_ow.start, 1.0) hmm_conc.add_transition(hmm_sol0_ow.end, hmm_sol0_l.start, 0.5) hmm_conc.add_transition(hmm_sol0_ow.end, hmm_sol0_l.end, 0.5) hmm_conc.add_transition(hmm_sol1_s.end, hmm_sol1_ao.start, 1.0) hmm_conc.add_transition(hmm_sol1_ao.end, hmm_sol1_l.start, 0.5) hmm_conc.add_transition(hmm_sol1_ao.end, hmm_sol1_l.end, 0.5) hmm_conc.add_transition(hmm_la_l.end, hmm_la_aa.start, 1.0) hmm_conc.add_transition(hmm_si_s.end, hmm_si_iy.start, 1.0) # syllable end to phrase start hmm_conc.add_transition(hmm_do_ow.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_re_ey.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_mi_iy.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_fa_aa.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_sol0_l.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_sol1_l.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_la_aa.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_si_iy.end, hmm_conc.start, 1.0) hmm_conc.add_transition(hmm_sil.end, hmm_conc.start, 1.0) hmm_conc.bake(merge=False) pickle.dump(hmm_conc, open(os.path.join(path_pretrained_model, 'hmm_conc.pkl'), 'wb'), protocol=2) hmm_conc.plot() plt.savefig('topo.png', dpi=3000)
from duree import duree Nsamples = 100 # Définition des paramétres du modéle start_probability = np.array([1, 0, 0]) T = np.array([[0.5 , 0.4 , 0.1],[0.3 , 0.4 , 0.3 ],[0.1 , 0.2 , 0.7 ]]) # Matrice de transition temporaire B = np.array([[0.5 , 0.5],[0.25,0.75], [0.75, 0.25]]) # matrice d'émission temporaire dicoObs={'pile': 0 ,'face':1} # pour transformer les chaines en entier (0,1 et 2) dicoState={'P1':0 ,'P2':1, 'P3':2} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel( name="partie 5" ) # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission p1 = pg.State( pg.DiscreteDistribution({ 'pile': B[0,0],'face': B[0,1]}), name='P1' ) p2 = pg.State( pg.DiscreteDistribution({ 'pile': B[1,0],'face': B[1,1]}), name='P2' ) p3 = pg.State( pg.DiscreteDistribution({ 'pile': B[2,0],'face': B[2,1]}), name='P3') # Matrice de transition model.add_transitions(model.start,[p1,p2,p3],[1, 0, 0]) # Probs initiales model.add_transitions(p1, [p1,p2,p3],[T[0,0],T[0,1],T[0,2]]) # transitions depuis sunny
def get_untrained_hmm(self, data_dict): """ return an untrained pomegranate hmm object with parameters filled in - If all data is unlabeled: finds emission parameters using k-means, transmission and start p are equal - If some data is labeled: initial estimate using given classifications """ hmm = pg.HiddenMarkovModel() # Get emission distributions & transition probs states, edge_states, pg_gui_state_dict, gm_dict = self.get_states(data_dict) tm_dict, pstart_dict, pend_dict = self.get_transitions(data_dict) for k in tm_dict: tm_dict[k] = max(tm_dict[k], 0.000001) # reset 0-prob transitions to essentially 0, avoids nans on edges for k in pstart_dict: pstart_dict[k] = max(pstart_dict[k], 0.000001) for k in pend_dict: pend_dict[k] = max(pend_dict[k], 0.000001) # Add states, self-transitions, transitions to start/end state # se_dict = {} tr_dict = {} for sidx, s_name in enumerate(states): s = states[s_name] hmm.add_states(s) # start_state = pg.State(None, name=f'{s_name}_start'); end_state = pg.State(None, name=f'{s_name}_end') # transitions between substates internal_tr_dict = {f'{s_name}_{iidx}': tr for iidx, tr in enumerate(gm_dict[s_name].weights_)} p_stay = tm_dict[(s_name, s_name)] for ss1, ss2 in product(s, repeat=2): hmm.add_transition(ss1, ss2, p_stay * internal_tr_dict[ss2.name], pseudocount=0) # start and end hmm.add_transition(hmm.start, s[0],pstart_dict[s_name]) hmm.add_transition(s[-1], hmm.end, pend_dict[s_name]) # for ss in s: # hmm.add_transition(hmm.start, ss, pstart_dict[s_name]) # hmm.add_transition(ss, hmm.end, pend_dict[s_name]) tr_dict[s_name] = internal_tr_dict # Make connections between states using edge states for es_name in edge_states: es_list, es_ids = edge_states[es_name] hmm.add_states(es_list) # transitions into edge states for ss in states[es_ids[0]]: hmm.add_transition(ss, es_list[0], tm_dict[es_ids], group=f'{es_name}_in', pseudocount=0) # transitions out of edge states hmm.add_transition(es_list[-1], states[es_ids[1]][0], tr_dict[es_ids[1]][states[es_ids[1]][0].name], pseudocount=0) # for ss in states[es_ids[1]]: # hmm.add_transition(es_list[-1], ss, tr_dict[es_ids[1]][ss.name], pseudocount=0) # transitions between edge states for i in range(1, self.buffer): hmm.add_transition(es_list[i-1], es_list[i], 1.0, pseudocount=9999999) hmm.bake() state_names = np.array([state.name for state in hmm.states]) self.pg_gui_state_dict = pg_gui_state_dict self.gui_state_dict = {si: pg_gui_state_dict.get(s, None) for si, s in enumerate(state_names)} self.str2num_state_dict = {str(si): ni for si, ni in zip(state_names, list(self.gui_state_dict))} return hmm, gm_dict