Python HiddenMarkovModel Examples, pomegranate.HiddenMarkovModel Python Examples

Example #1

0

Show file

File: hmm.py Project: nlgranger/code_these

    def _fit_word_model(X, nstates, **kwargs):
        wmodel = pomegranate.HiddenMarkovModel(None)
        wmodel.start.name = str(-1)
        wmodel.end.name = str(nstates)

        states = [
            pomegranate.State(PrecomputedDistribution(s, nstates), name=str(s))
            for s in range(nstates)
        ]

        for s in range(nstates):
            wmodel.add_state(states[s])
            wmodel.add_transition(states[s], states[s], 0.8)

        wmodel.add_transition(wmodel.start, states[0], 1)
        for s in range(1, nstates):
            wmodel.add_transition(states[s - 1], states[s], 0.15)
        wmodel.add_transition(states[-1], wmodel.end, 0.15)
        wmodel.add_transition(states[-2], states[1], 0.05)

        for s in range(2, nstates - 1):
            wmodel.add_transition(states[s - 2], states[s], 0.05)

        wmodel.bake()

        improvement = wmodel.fit(X, **kwargs)
        if np.isnan(improvement):
            raise ValueError
        print("HMM improvement: {:2.4f}".format(improvement))

        return [(int(e[0].name), int(e[1].name), np.exp(e[2]['probability']))
                for e in wmodel.graph.edges(data=True)]

Example #2

0

Show file

 def load_params(self, file_contents):
     (mod_check, model_txt, feature_txt, gui_state_dict_txt,
      pg_gui_state_dict_txt, str2num_state_dict_txt,
      misc_txt) = file_contents.split('\nSTART_NEW_SECTION\n')
     if mod_check != 'VanillaHmm':
         error_msg = '\nERROR: loaded model parameters are not for a Vanilla HMM!'
         # if self.gui:
         #     self.gui.notify(error_msg)
         #     return
         # else:
         raise ValueError(error_msg)
     self.trained = pg.HiddenMarkovModel().from_yaml(model_txt)
     self.feature_list = feature_txt.split('\n')
     self.gui_state_dict = yaml.load(gui_state_dict_txt,
                                     Loader=yaml.FullLoader)
     self.pg_gui_state_dict = yaml.load(pg_gui_state_dict_txt,
                                        Loader=yaml.FullLoader)
     self.str2num_state_dict = yaml.load(str2num_state_dict_txt,
                                         Loader=yaml.FullLoader)
     misc_dict = yaml.load(misc_txt, Loader=yaml.SafeLoader)
     if misc_dict['dbscan_epsilon'] == 'nan':
         misc_dict['dbscan_epsilon'] = np.nan
     self.nb_states = misc_dict['nb_states']
     self.data.eps = misc_dict['dbscan_epsilon']
     self.supervision_influence = misc_dict['supervision_influence']
     self.framerate = misc_dict['framerate']
     self.timestamp = numeric_timestamp()

Example #3

0

Show file

def hmm(nstates=2, bias=0.1):
    def make_bias(i, s):
        if i == 0:
            return [bias, 1 - bias][s]
        else:
            return [1 - bias, bias][s]

    states = [
        pmg.State(pmg.DiscreteDistribution({
            0: make_bias(i, 0),
            1: make_bias(i, 1)
        }),
                  name='S%d' % i) for i in range(nstates)
    ]

    #trans = np.ones((nstates, nstates)) / nstates;
    trans = np.random.rand(nstates, nstates)
    for i in range(nstates):
        trans[i] = trans[i] / trans[i].sum()

    model = pmg.HiddenMarkovModel()
    model.add_states(states)
    for i in range(nstates):
        for j in range(nstates):
            model.add_transition(states[i], states[j], trans[i, j])
        model.add_transition(model.start, states[i], 1.0 / nstates)
    model.bake()
    return model

Example #4

0

Show file

def run():
    # Load dataset
    path = 'datasets/'
    with open(path + datasetload, 'rb') as f:
        a = pickle.load(f)
    X = a[0]
    X = X.astype(int)

    # Create HMM
    D = bond_dimension
    N = X.shape[1]
    d = np.max(X + 1)
    list_of_states = []
    for i in xrange(N):
        list_of_states.append([])
        for u in xrange(bond_dimension):
            dictionnary = dict()
            for l in xrange(d):
                dictionnary[str(l)] = np.random.rand()
            list_of_states[i].append(
                pomegranate.State(
                    pomegranate.DiscreteDistribution(dictionnary)))
    model = pomegranate.HiddenMarkovModel()
    for i in xrange(N - 1):
        for d in xrange(D):
            for d2 in xrange(D):
                model.add_transition(list_of_states[i][d],
                                     list_of_states[i + 1][d2],
                                     np.random.rand())
    for d in xrange(D):
        model.add_transition(model.start, list_of_states[0][d],
                             np.random.rand())
    for d in xrange(D):
        model.add_transition(list_of_states[N - 1][d], model.end,
                             np.random.rand())
    model.bake()

    # Train HMM
    begin = time.time()
    sequencetrain = [[str(i) for i in v] for v in X]
    np.random.seed()
    model.fit(sequencetrain,algorithm='baum-welch',stop_threshold=1e-50,min_iterations=1000,\
              max_iterations=n_iter)

    u = 0
    for i in sequencetrain:
        u += model.log_probability(i)
    accuracy = -u / len(sequencetrain)

    time_elapsed = time.time() - begin

    print("Negative log likelihood = %.3f" % (accuracy))
    print("Time elapsed = %.2fs" % (time_elapsed))

Example #5

0

Show file

def fit_non_sil_phn(data_init,
                    n_mix,
                    dim_feature,
                    name_phn,
                    covar_type='full'):
    # Create model with 3 states
    # Left-to-right: each state is connected to itself and its direct successor

    state_0 = create_state(data_init=data_init[0],
                           n_mix=n_mix,
                           dim_feature=dim_feature,
                           name_phn=name_phn,
                           name_state='-first',
                           covar_type=covar_type)
    state_1 = create_state(data_init=data_init[1],
                           n_mix=n_mix,
                           dim_feature=dim_feature,
                           name_phn=name_phn,
                           name_state='-mid',
                           covar_type=covar_type)
    state_2 = create_state(data_init=data_init[2],
                           n_mix=n_mix,
                           dim_feature=dim_feature,
                           name_phn=name_phn,
                           name_state='-last',
                           covar_type=covar_type)

    model = pomegranate.HiddenMarkovModel(name_phn)

    model.add_state(state_0)
    model.add_state(state_1)
    model.add_state(state_2)

    model.add_transition(model.start, state_0, 1.0)
    model.add_transition(state_0, state_0, 0.5)
    model.add_transition(state_0, state_1, 0.5)
    model.add_transition(state_0, model.end, 0.000001)
    model.add_transition(state_1, state_1, 0.5)
    model.add_transition(state_1, model.end, 0.000001)
    model.add_transition(state_1, state_2, 0.5)
    model.add_transition(state_2, state_2, 0.5)
    model.add_transition(state_2, model.end, 0.5)

    model.bake()
    return model

Example #6

0

Show file

def generate_model(state, transition):
    # Setup hmm
    model = pomegranate.HiddenMarkovModel()

    A = pomegranate.State(pomegranate.DiscreteDistribution({'A': state, 'B': 1-state}), name='A')
    B = pomegranate.State(pomegranate.DiscreteDistribution({'A': 1-state, 'B': state}), name='B')

    model.add_transition(model.start, A, 0.5)
    model.add_transition(model.start, B, 0.5)

    model.add_transition(A, A, 1-transition)
    model.add_transition(A, B, transition)
    model.add_transition(B, A, transition)
    model.add_transition(B, B, 1-transition)

    model.add_transition(A, model.end, 0.5)
    model.add_transition(B, model.end, 0.5)

    model.bake(verbose=False)
    return model

Example #7

0

Show file

 def load_params(self, file_contents):
     (mod_check, model_txt, feature_txt, gui_state_dict_txt,
      pg_gui_state_dict_txt, str2num_state_dict_txt,
      misc_txt) = file_contents.split('\nSTART_NEW_SECTION\n')
     if mod_check != 'SubstateHmm':
         error_msg = '\nERROR: loaded model parameters are not for a substate HMM!'
         raise ValueError(error_msg)
     self.trained = pg.HiddenMarkovModel().from_yaml(model_txt)
     self.feature_list = feature_txt.split('\n')
     self.gui_state_dict = yaml.load(gui_state_dict_txt,
                                     Loader=yaml.FullLoader)
     self.pg_gui_state_dict = yaml.load(pg_gui_state_dict_txt,
                                        Loader=yaml.FullLoader)
     self.str2num_state_dict = yaml.load(str2num_state_dict_txt,
                                         Loader=yaml.FullLoader)
     misc_dict = yaml.load(misc_txt, Loader=yaml.SafeLoader)
     if misc_dict['dbscan_epsilon'] == 'nan':
         misc_dict['dbscan_epsilon'] = np.nan
     self.nb_states = misc_dict['nb_states']
     self.buffer = misc_dict['buffer']
     self.data.eps = misc_dict['dbscan_epsilon']

Example #8

0

Show file

    def fit_hmm(self):
        print('Fitting Model')
        s0 = pg.State(pg.MultivariateGaussianDistribution(
            np.array([1, 1, 1]), .1 * np.eye(3)),
                      name='0')
        s1 = pg.State(pg.MultivariateGaussianDistribution(
            np.array([1, 1, 1]), 3 * np.eye(3)),
                      name='1')
        s2 = pg.State(pg.MultivariateGaussianDistribution(
            np.array([.5, .5, .5]), .1 * np.eye(3) + .1 * np.ones([3, 3])),
                      name='2')

        s3 = pg.State(pg.MultivariateGaussianDistribution(
            np.array([1.5, 1.5, 1.5]), .1 * np.eye(3) + .1 * np.ones([3, 3])),
                      name='3')
        model = pg.HiddenMarkovModel()
        model.add_states([s0, s1, s2, s3])
        model.add_transition(model.start, s0, .85)
        model.add_transition(model.start, s1, .05)
        model.add_transition(model.start, s2, .05)
        model.add_transition(model.start, s3, .05)
        model.add_transition(s0, s0, .85)
        model.add_transition(s0, s1, .05)
        model.add_transition(s0, s2, .05)
        model.add_transition(s0, s3, .05)
        model.add_transition(s1, s0, .1)
        model.add_transition(s1, s1, .7)
        model.add_transition(s1, s2, .1)
        model.add_transition(s1, s3, .1)
        model.add_transition(s2, s0, .1)
        model.add_transition(s2, s1, .1)
        model.add_transition(s2, s2, .7)
        model.add_transition(s2, s3, .1)
        model.add_transition(s3, s0, .1)
        model.add_transition(s3, s1, .1)
        model.add_transition(s3, s2, .1)
        model.add_transition(s3, s3, .7)
        model.bake()
        model.fit(self.accels_filt)
        self.model = model

Example #9

0

Show file

File: hmm.py Project: as-101/pyecog

def make_hmm_model(emission_mat, transition_probs):
    model = pomegranate.HiddenMarkovModel('ndf')

    ictal_emissions    = {i:emission_mat[1,i] for i in range(emission_mat.shape[1])}
    baseline_emissions = {i:emission_mat[0,i] for i in range(emission_mat.shape[1])}

    ictal    = pomegranate.State(pomegranate.DiscreteDistribution(ictal_emissions   ), name = '1')
    baseline = pomegranate.State(pomegranate.DiscreteDistribution(baseline_emissions), name = '0')

    model.add_state(ictal)
    model.add_state(baseline)

    model.add_transition( model.start, ictal, 0.05 )
    model.add_transition( model.start, baseline, 99.95)

    model.add_transition( baseline, baseline, transition_probs[0,0] )
    model.add_transition( baseline, ictal,    transition_probs[0,1]  )
    model.add_transition( ictal, ictal   ,    transition_probs[1,1] )
    model.add_transition( ictal, baseline,    transition_probs[1,0]  )

    model.bake(verbose=False )
    return model

Example #10

0

Show file

    def get_untrained_hmm(self, data_dict):
        """
        return an untrained pomegranate hmm object with parameters filled in
        - If all data is unlabeled: finds emission parameters using k-means, transmission and start p are equal
        - If some data is labeled: initial estimate using given classifications
        """
        hmm = pg.HiddenMarkovModel()

        # Get emission distributions & transition probs
        states, edge_states, pg_gui_state_dict = self.get_states(data_dict)
        tm_dict, pstart_dict, pend_dict = self.get_transitions(data_dict)
        # for k in tm_dict: tm_dict[k] = max(tm_dict[k], 0.000001)  # reset 0-prob transitions to essentially 0, avoids nans on edges
        # for k in pstart_dict: pstart_dict[k] = max(pstart_dict[k], 0.000001)
        # for k in pend_dict: pend_dict[k] = max(pend_dict[k], 0.000001)

        # Add states, self-transitions, transitions to start/end state
        for s_name in states:
            s = states[s_name]
            hmm.add_state(s)
            hmm.add_transition(hmm.start, s, pstart_dict[s_name], pseudocount=0)
            hmm.add_transition(s, hmm.end, pend_dict[s_name], pseudocount=0)
            hmm.add_transition(s, s, tm_dict[(s_name, s_name)], pseudocount=0)

        # Make connections between states using edge states
        for es_name in edge_states:
            es_list = edge_states[es_name][0]
            s1, s2 = [states[s] for s in edge_states[es_name][1]]
            for es in es_list: hmm.add_state(es)
            hmm.add_transition(s1, es_list[0], tm_dict[edge_states[es_name][1]])
            for i in range(1, self.buffer):
                hmm.add_transition(es_list[i-1], es_list[i], 1.0, pseudocount=9999999)
            hmm.add_transition(es_list[-1], s2, 1.0, pseudocount=9999999)
        hmm.bake()

        state_names = np.array([state.name for state in hmm.states])
        self.pg_gui_state_dict = pg_gui_state_dict
        self.gui_state_dict = {si: pg_gui_state_dict.get(s, None) for si, s in enumerate(state_names)}
        self.str2num_state_dict = {str(si): ni for si, ni in zip(state_names, list(self.gui_state_dict))}
        return hmm

Example #11

0

Show file

 def get_substate_object(self, vec, state_name):
     vec_clean = vec[:, np.invert(np.any(np.isnan(vec), axis=0))]
     nb_clust = min(10, vec_clean.shape[1])
     # labels = GaussianMixture(n_components=nb_clust).fit_predict(vec_clean.T)
     gm = GaussianMixture(n_components=nb_clust).fit(vec_clean.T)
     gm.covariances_ += np.eye(gm.covariances_.shape[1]) * 1E-9
     hmm_out = pg.HiddenMarkovModel()
     hmm_out.name = state_name
     hmm_out.start.name = f'{state_name}_start'
     hmm_out.end.name = f'{state_name}_end'
     added_state_names = []
     for n in range(nb_clust):
         sn = f'{state_name}_{str(n)}'
         added_state_names.append(sn)
         st = pg.State(pg.MultivariateGaussianDistribution(
             gm.means_[n, :], gm.covariances_[n, :, :]),
                       name=sn)
         hmm_out.add_state(st)
         hmm_out.add_transition(hmm_out.start,
                                st,
                                gm.weights_[n],
                                pseudocount=9999999)
         hmm_out.add_transition(st, hmm_out.end, 1.0, pseudocount=9999999)
     return hmm_out, added_state_names

Example #12

0

Show file

File: concatenative_hmm.py Project: ronggong/syllable-recognition-solfege

def concatenative_hmm_alignment(trans):
    """concatenate hmm from transcription"""
    # initialize the syllable counter dictionary
    dict_syl_counter = dict()
    for l in syl_2_phn.keys():
        dict_syl_counter[l] = 0

    hmm_conc = pomegranate.HiddenMarkovModel("hmm_conc")
    hmm_precedent = []
    p_first = True
    for syl in trans:
        phns_syl = syl_2_phn[syl]
        if len(phns_syl) == 1:
            for p in phns_syl[0]:
                hmm_p = pickle.load(
                    open(os.path.join(path_pretrained_model, p + '.pkl'),
                         'rb'))
                change_state_name(hmm_p,
                                  syl + '-' + str(dict_syl_counter[syl]), p)
                hmm_conc.add_model(hmm_p)
                if p_first:
                    hmm_conc.add_transition(hmm_conc.start, hmm_p.start, 1.0)
                    p_first = False
                else:
                    for ii_hmm_precedent in range(len(hmm_precedent)):
                        hmm_conc.add_transition(
                            hmm_precedent[ii_hmm_precedent].end, hmm_p.start,
                            1.0)
                hmm_precedent = [hmm_p]
        else:
            hmm_branch_precedent = hmm_precedent
            hmm_in_branch_precedent = None
            hmm_precedent = []
            for ii_phns, phns in enumerate(phns_syl):
                for ii_p, p in enumerate(phns):
                    hmm_p = pickle.load(
                        open(os.path.join(path_pretrained_model, p + '.pkl'),
                             'rb'))
                    change_state_name(
                        hmm_p, syl + '-' + str(ii_phns) + '-' +
                        str(dict_syl_counter[syl]), p)
                    hmm_conc.add_model(hmm_p)
                    if p_first:
                        hmm_conc.add_transition(hmm_conc.start, hmm_p.start,
                                                1.0)
                        if ii_phns == len(phns) - 1:
                            p_first = False
                    elif ii_p == 0:
                        for ii_hmm_precedent in range(
                                len(hmm_branch_precedent)):
                            hmm_conc.add_transition(
                                hmm_branch_precedent[ii_hmm_precedent].end,
                                hmm_p.start, 1.0)
                    else:
                        hmm_conc.add_transition(hmm_in_branch_precedent.end,
                                                hmm_p.start, 1.0)
                    hmm_in_branch_precedent = hmm_p
                    if ii_p == len(phns) - 1:
                        hmm_precedent.append(hmm_p)
        dict_syl_counter[syl] += 1

    for ii_hmm_precedent in range(len(hmm_precedent)):
        hmm_conc.add_transition(hmm_precedent[ii_hmm_precedent].end,
                                hmm_conc.end, 1.0)

    hmm_conc.bake()
    # hmm_conc.plot()
    # plt.savefig('topo.png', dpi=3000)
    return hmm_conc

Example #13

0

Show file

def get_model(r, params, window_size, num_skipped, seq_len, p, \
    g, resample_prob, x_chr=False, haploid=False, debug=False, h_t=1, skip_score=float("-Inf")):
    """
    Builds the hidden Markov model for a given chromosome or scaffold, using the
    Pomegranate module.
    
    Arguments:
        r -- (float) the per site, per generation recombination probability
        params -- a dict where keys are names of states (AA, AB, and BB) and values
            are dicts where values are mu and sd, which are floats representing
            means and standard deviations of emission probability distributions
        window_size -- (int) the window size for this run, in bp
        num_skipped -- (int) the number of windows that were skipped due to not passing
            criteria
        seq_len -- (int) the number of windows in the current chromosome/scaffold
        p -- (float) the percent ancestry the admixed population derives from ancestral
            population A (estimated beforehand)
        g -- (int) the number of generations since admixture (estimated beforehand)
        resample_prob -- (float) probability of resampling the same ancestral recombination
            event twice in an individual after the set number of generations since admixture
            (referred to as z in the paper)
        x_chr -- (boolean) does this chromosome/scaffold belong to a hemizygous sex
            chromosome?
        haploid -- (boolean) is this individual haploid along this chromosome/scaffold?
        debug -- (boolean) should debugging messages be printed to the screen?
        h_t -- (float) if the user has specified that expected reduction in heterozygosity
            given the number of generations since admixture should be incorporated into
            the model, this is the expected fraction of the initial heterozygosity that
            remains after g generations.
        skip_score -- (float) the number emitted by adlibs_score when "skipped" windows
            are encountered
    
    Returns:
        a Pomegranate HMM object for the current chromosome/scaffold
    """
    global prob_lim

    model = pomegranate.HiddenMarkovModel(name='ancestry')

    # Compute probabilities of transitioning to a skip state or the end. Cap these
    # both at the specified probability limit.
    skip_prob = num_skipped / seq_len
    if skip_prob > prob_lim:
        skip_prob = prob_lim
    state_end = 1 / seq_len
    if state_end > prob_lim:
        state_end = prob_lim

    if x_chr:
        r *= (2 / 3)

    # Determine probabilities of transitions
    if haploid:
        # Should 2 be 1.5? I don't think so -- we already multiplied r by (2/3)
        # so that's in here already.
        aa_bb = g * r * (1 - p)
        bb_aa = g * r * p
        # Eliminate the heterozygous state.
        aa_ab = 0
        ab_aa = 0
        bb_ab = 0
        ab_bb = 0
    else:
        probs = get_trans_probs(r, g, p, resample_prob)
        aa_ab = probs['aa_ab']
        ab_aa = probs['ab_aa']
        aa_bb = probs['aa_bb']
        bb_ab = probs['bb_ab']
        ab_bb = probs['ab_bb']
        bb_aa = probs['bb_aa']

    aa_ab *= window_size
    ab_aa *= window_size
    aa_bb *= window_size
    bb_ab *= window_size
    ab_bb *= window_size
    bb_aa *= window_size

    aa_aa = 1 - (aa_ab + aa_bb + state_end + skip_prob)
    ab_ab = 1 - (ab_aa + ab_bb + state_end + skip_prob)
    bb_bb = 1 - (bb_aa + bb_ab + state_end + skip_prob)

    # Account for reduction in heterozygosity due to genetic drift

    if haploid:
        pass
        #aa_aa += (aa_bb - aa_bb*h_t)
        #aa_bb *= h_t
        #bb_bb += (bb_aa - bb_aa*h_t)
        #bb_aa *= h_t
    else:
        aa_aa += (aa_aa / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t)
        aa_bb += (aa_bb / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t)
        bb_aa += (bb_aa / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t)
        bb_bb += (bb_bb / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t)
        aa_ab *= h_t
        bb_ab *= h_t
        ab_aa += (ab_aa / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t)
        ab_bb += (ab_bb / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t)
        ab_ab *= h_t

    if debug:
        print("# AA -> AA {}".format(aa_aa), file=sys.stderr)
        print("# AA -> AB {}".format(aa_ab), file=sys.stderr)
        print("# AA -> BB {}".format(aa_bb), file=sys.stderr)
        print("# AB -> AA {}".format(ab_aa), file=sys.stderr)
        print("# AB -> AB {}".format(ab_ab), file=sys.stderr)
        print("# AB -> BB {}".format(ab_bb), file=sys.stderr)
        print("# BB -> AA {}".format(bb_aa), file=sys.stderr)
        print("# BB -> AB {}".format(bb_ab), file=sys.stderr)
        print("# BB -> BB {}".format(bb_bb), file=sys.stderr)
        print("# SKIP {}".format(skip_prob), file=sys.stderr)

    aaDist = pomegranate.NormalDistribution(params['AA']['mu'],
                                            params['AA']['sd'])
    abDist = pomegranate.NormalDistribution(params['AB']['mu'],
                                            params['AB']['sd'])
    bbDist = pomegranate.NormalDistribution(params['BB']['mu'],
                                            params['BB']['sd'])

    aaState = pomegranate.State(aaDist, name="AA")
    abState = pomegranate.State(abDist, name="AB")
    bbState = pomegranate.State(bbDist, name="BB")

    model.add_state(aaState)
    if not haploid:
        model.add_state(abState)
    model.add_state(bbState)

    #### ADD skip states

    skip_dist = pomegranate.UniformDistribution(skip_score - 0.01, skip_score)

    aa_skip_state = pomegranate.State(skip_dist, name="skip-AA")
    ab_skip_state = pomegranate.State(skip_dist, name="skip-AB")
    bb_skip_state = pomegranate.State(skip_dist, name="skip-BB")

    model.add_state(aa_skip_state)
    if not haploid:
        model.add_state(ab_skip_state)
    model.add_state(bb_skip_state)

    if haploid:
        model.add_transition(model.start, aaState, p * (1 - skip_prob))
        model.add_transition(model.start, aa_skip_state, p * skip_prob)
        model.add_transition(model.start, bbState, (1 - p) * (1 - skip_prob))
        model.add_transition(model.start, bb_skip_state, (1 - p) * skip_prob)
    else:
        model.add_transition(model.start, aaState, p**2 * (1 - skip_prob))
        model.add_transition(model.start, aa_skip_state, p**2 * skip_prob)
        model.add_transition(model.start, abState,
                             2 * p * (1 - p) * (1 - skip_prob))
        model.add_transition(model.start, ab_skip_state,
                             2 * p * (1 - p) * skip_prob)
        model.add_transition(model.start, bbState,
                             (1 - p)**2 * (1 - skip_prob))
        model.add_transition(model.start, bb_skip_state,
                             (1 - p)**2 * skip_prob)

    model.add_transition(aaState, model.end, 1 / seq_len)
    if not haploid:
        model.add_transition(abState, model.end, 1 / seq_len)
    model.add_transition(bbState, model.end, 1 / seq_len)

    model.add_transition(aaState, bbState, aa_bb)
    model.add_transition(aaState, aaState, aa_aa)
    model.add_transition(bbState, aaState, bb_aa)
    model.add_transition(bbState, bbState, bb_bb)

    if not haploid:
        model.add_transition(aaState, abState, aa_ab)
        model.add_transition(abState, aaState, ab_aa)
        model.add_transition(abState, bbState, ab_bb)
        model.add_transition(abState, abState, ab_ab)
        model.add_transition(bbState, abState, bb_ab)

    ### Add skip state transitions
    model.add_transition(aaState, aa_skip_state, skip_prob)
    if not haploid:
        model.add_transition(abState, ab_skip_state, skip_prob)
    model.add_transition(bbState, bb_skip_state, skip_prob)

    model.add_transition(aa_skip_state, aa_skip_state, skip_prob)
    if not haploid:
        model.add_transition(ab_skip_state, ab_skip_state, skip_prob)
    model.add_transition(bb_skip_state, bb_skip_state, skip_prob)

    model.add_transition(aa_skip_state, bbState, aa_bb)
    model.add_transition(bb_skip_state, aaState, bb_aa)

    if not haploid:
        model.add_transition(aa_skip_state, abState, aa_ab)
        model.add_transition(ab_skip_state, aaState, ab_aa)
        model.add_transition(ab_skip_state, bbState, ab_bb)
        model.add_transition(bb_skip_state, abState, bb_ab)

    model.add_transition(aa_skip_state, model.end, 1 / seq_len)
    if not haploid:
        model.add_transition(ab_skip_state, model.end, 1 / seq_len)
    model.add_transition(bb_skip_state, model.end, 1 / seq_len)

    model.add_transition(aa_skip_state, aaState,
                         1 - skip_prob - aa_ab - aa_bb - 1 / seq_len)
    if not haploid:
        model.add_transition(ab_skip_state, abState,
                             1 - skip_prob - ab_aa - ab_bb - 1 / seq_len)
    model.add_transition(bb_skip_state, bbState,
                         1 - skip_prob - bb_aa - bb_ab - 1 / seq_len)
    ###

    model.bake()

    return model

Example #14

0

Show file

File: hmm_separate_patients2.py Project: kristinablaskova/research_project

            if feature in patient_data.columns:
                patient_data = patient_data.drop(feature, axis=1)
        df1 = patient_data.pop('hypnogram_User')
        patient_data['hypnogram_User'] = df1
        n_features = patient_data.shape[1] - 2
        data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data(
            data=patient_data)
        training_class_array.append(hidden_sequence)
        train_df = train_df.append(train)

    hmm_dist = dst.Distributions(train_df)
    feature_names = patient_data.drop(['hypnogram_User', 'hypnogram_Machine'],
                                      axis=1).columns.values.tolist()
    dist, state_names = hmm_dist.gauss_kernel_dist(feature_names)

    model = pg.HiddenMarkovModel('prediction')
    create_states(model, training_class_array, state_names)
    model.bake()

    #TESTING PART!!! :)
    list_of_testing_patients = list_of_testing_patients.reset_index()

    for i in range(list_of_testing_patients.shape[0]):
        path = "Data/" + str(list_of_testing_patients['file_name'][i])
        patient_data = dp.data_import(path)
        binary_features = [
            "Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal",
            "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea",
            "MixedApnea", "LongRR", "Tachycardia"
        ]
        for feature in binary_features:

Example #15

0

Show file

File: hmm.py Project: kristinablaskova/research_project

def run_hmm_on_files(path, n_features):
    poznamka = []
    try:
        print(path)

        data, n_features, feature_names = dp.preprocess_any_file(path, n_features)


        def preprocess_data(data):
            train, test = ms.train_test_split(data, test_size=0.3, shuffle=False)
            data_columns = list(data.columns.values)
            hidden_sequence = data['hypnogram_User'].tolist()
            l = len(hidden_sequence)
            for i in reversed(range(0, l)):
                if hidden_sequence[i] == "NotScored":
                    train = train.drop([i])
                    del hidden_sequence[i]
            observation_sequence = train.iloc[:, 0:n_features].values.tolist()
            return data_columns, hidden_sequence, observation_sequence, train, test

        def create_states(model, hidden_sequence, state_names):
            chain_model = pg.MarkovChain.from_samples([hidden_sequence])
            states = {}  # type: Dict[str, pg.State]
            for name in state_names:
                states[name] = pg.State(dist[state_names.index(name)], name=name)
            model.add_states(list(states.values()))
            # sets the starting probability for state 'Wake' to 1.0
            try:
                model.add_transition(model.start, states['Wake'], 1.0)
                poznamka.append("")
            except KeyError:
                print("nezacina wake")
                poznamka.append('nezacina wake')
                pass
            # insert the emission probabilities, that we computed in summary
            for prob in chain_model.distributions[1].parameters[0]:
                state1 = states[prob[0]]
                state2 = states[prob[1]]
                probability = prob[2]
                model.add_transition(state1, state2, probability)



        data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data(data)
        hmm_dist = dst.Distributions(train)
        dist, state_names = hmm_dist.gauss_kernel_dist(feature_names)
        model = pg.HiddenMarkovModel('prediction')
        create_states(model, hidden_sequence, state_names)
        model.bake()

        test_observation_sequence = train.iloc[:, 0:n_features].values.tolist()
        #hmm_fit = model.fit([observation_sequence], labels=[hidden_sequence], algorithm='labeled')
        hmm_pred = model.predict(test_observation_sequence)

        conf_hmm = metrics.confusion_matrix(hidden_sequence, [state_names[id] for id in hmm_pred], state_names)
        #print(conf_hmm)
        #print(state_names)

        state_ids = np.array([state_names.index(val) for val in hidden_sequence])
        score = (np.array(hmm_pred) == state_ids).mean()
        print(score)
    except ValueError:
        print('nejaky valueerror - napr nepozna stlpec hypnogram user')
        score = "NaN"
        feature_names = []

    return poznamka, score

Example #16

0

Show file

File: hmm.py Project: nlgranger/code_these

    def fit_transitions(self, X, gloss_seqs, **hmm_fit_args):
        # Train individual word models
        params = []

        for i in range(len(self.labels)):
            # Range of state indexes for this label
            axes = sum(self.chains_lengths[:i]), sum(self.chains_lengths[:i +
                                                                         1])

            # Compute posteriors for the states of this label
            subsgments = [(seq, start, stop)
                          for seq, gloss_seq in enumerate(gloss_seqs)
                          for l, start, stop in gloss_seq
                          if l == self.labels[i]]
            Xw = [[Xm[seq][start:stop] for seq, start, stop in subsgments]
                  for Xm in X]
            Xw = [
                self.posterior.predict_logproba(*x)[:, axes[0]:axes[1]]
                for x in zip(*Xw)
            ]
            Xw = [x - self.p_s[None, axes[0]:axes[1]] for x in Xw]
            # Xw = [x - logsumexp(x, axis=1, keepdims=True) for x in Xw]

            # pseudo log-likelihoods
            params.append(
                self._fit_word_model(Xw, self.chains_lengths[i],
                                     **hmm_fit_args))

        # Create complete model
        print("loading trained parameters into the model")
        self.hmm = pomegranate.HiddenMarkovModel(None)

        states = []
        for i in range(self.nstates):
            s = pomegranate.State(PrecomputedDistribution(i, self.nstates),
                                  name=str(i))
            states.append(s)
            self.hmm.add_state(s)

        self.hmm.start.name = str(-1)
        self.hmm.end.name = str(self.nstates)
        self.hmm.add_transition(self.hmm.start, states[-1], 1)
        self.hmm.add_transition(states[-1], states[-1], self.p_idle2idle)

        for i in range(self.nlabels):
            state_offset = np.sum(self.chains_lengths[:i])
            l = self.chains_lengths[i]

            for s1, s2, p in params[i]:
                # Adjust indexes and parameters to integrate within full model
                s2 = -1 if s2 == l else s2 + state_offset
                if s1 == -1:
                    p = self.p_idle2gesture
                else:
                    s1 += state_offset

                self.hmm.add_transition(states[s1], states[s2], p)

        self.hmm.bake()

        # Build mapping between internal indexes and ours
        self.state2idx = np.array([
            int(s.name) for s in self.hmm.states
            if s.name not in {"-1", str(self.nstates)}
        ],
                                  dtype=np.int32)
        idx2labels = np.concatenate([
            np.full((self.chains_lengths[i], ), self.labels[i])
            for i in range(self.nlabels)
        ] + [np.array([0.0])]).astype(np.int32)
        self.state2label = np.array([
            idx2labels[int(s.name)] for s in self.hmm.states
            if int(s.name) not in {-1, self.nstates}
        ])

Example #17

0

Show file

def decode_sequence(probs=None,
                    algorithm='threshold',
                    params=dict(n=5, t=.8),
                    verbose=True):
    '''
    Once a model outputs probabilities for some sequence of data, that
    data shall be passed to this method. This method will use various
    ways to decode an underlying sequence in order to determine where
    the *actual* canned laughter was.
    possible algorithms to decode sequence:
        - 'neural'
          surround-n-gram neural network: this method will use a pretrained
          Keras model to label some sample i using the multiclass probabilities
          of all of the samples numbered [i-n, i-n+1, ... i, i+1, ..., i+n],
          i.e., n before and n afterwards.
        - 'hmm'
          HMM: this method will use a hidden Markov model with underlying
               states that are the same as surface states (the two state spaces
               for hidden and observed are equivalent).
               uses Viterbi to decode the underlying state sequence.
               requires a params to be passed as dict(c=DiscreteDistribution)
               where c is a class (label) and DiscreteDistribution is an
               instance of emission probabilities created using `pomegranate`,
               for each such class c (0, 1, 2, ...)
        - 'threshold'
          window and threshold method: this is simple heuristic-based method
          that will observe windows of length n, and if the average probability
          of any single class is at least t, it will assign that same
          class to all of the samples in that window. imagine a threshold of
          0.9, then it is intuitively likely if few of the samples are labeled
          with some other class, they may have been accidentally so-labeled.
        - 'modethreshold'
          like 'threshold' but instead of considering avg probability, it
          considers what percentage of labels are a particular class and if
          that surpasses a threshold, then all labels are made that same label
    ---
        probs: an nparray of (n_samples, n_classes) probabilities such that
               foreach sample, the sum of probabilities across classes adds up
               to 1. In case supplied array is of shape (n_samples,) it will be
               converted to multiclass using this module's
               _binary_probs_to_multiclass method

        return: a list of len n_samples, with the ith sample being the
                predicted label of that sample. this prediction would usually
                also incorporate somehow the samples before and after the
                current sample
    '''
    color.INFO('INFO', 'shape of input probs is: {}'.format(probs.shape))
    if probs.shape[-1] == 1:
        probs = _binary_probs_to_multiclass(probs)

    color.INFO('INFO', 'received probs of shape {}'.format(str(probs.shape)))
    if algorithm == 'threshold':
        n, t = params['n'], params['t']
        labels = [np.argmax(timechunk) for timechunk in probs]

        for i in range(len(probs) - n + 1):
            # print(np.average(probs[i:i+n], axis=0)[0],
            #       np.average(probs[i:i+n], axis=0)[1])
            for c in range(probs.shape[-1]):
                avg = np.average(probs[i:i + n], axis=0)[c]
                if avg >= t:
                    # color.INFO('DEBUG',
                    #            'found threshold window of {} at [{}:{}] for class {}'.format(avg, i, i+n, c))
                    labels[i:i + n] = [c for _ in range(n)]

        return labels

    elif algorithm == 'hmm' or algorithm == 'viterbi':
        # define default emission probabilities
        default = {
            0: pmgt.DiscreteDistribution({
                '0': 0.7,
                '1': 0.3
            }),
            1: pmgt.DiscreteDistribution({
                '0': 0.2,
                '1': 0.8
            })
        }

        states = []
        for c in [*range(probs.shape[-1])]:
            state = pmgt.State(params.get(c, default[c]), name=str(c))
            states += [state]

        model = pmgt.HiddenMarkovModel('laugh-decoder')
        model.add_states(states)

        if 'transitions' in params:
            model.add_transitions(params['transitions'])
        else:
            # start must always go to state 0
            model.add_transitions([model.start, states[0]],
                                  [states[0], model.end], [1., .1])
            model.add_transitions([states[0], states[0], states[1], states[1]],
                                  [states[0], states[1], states[0], states[1]],
                                  [.5, .4, .2, .8])
        model.bake()

        # if verbose:
        #     model.plot() # plotting is weird

        labels = [str(np.argmax(entry)) for entry in probs]
        labels = model.predict(sequence=labels, algorithm='viterbi')
        return labels[1:-1]

    else:
        raise NotImplementedError

Example #18

0

Show file

seq = [np.array(np.random.rand(100) > 0.2, dtype=int)]

model = hmm(nstates=2)

nstates = 2
states = [pmg.DiscreteDistribution({
    0: 0.5,
    1: 0.5
}) for i in range(nstates)]
trans = np.ones((nstates, nstates)) / nstates
trans = np.random.rand(nstates, nstates)
for i in range(nstates):
    trans[i] = trans[i] / trans[i].sum()
model = pmg.HiddenMarkovModel().from_matrix(trans, states,
                                            np.ones(nstates) / nstates,
                                            np.zeros(nstates))

model.plot()

print model.fit(seq)

plt.figure(1)
plt.clf()
model.plot()

logp, path = model.viterbi(seq[0])
print idx_from_path(path)

### worm data

Example #19

0

Show file

File: concatenative_hmm.py Project: ronggong/syllable-recognition-solfege

def concatenative_hmm_recogntion(path_pretrained_model):

    hmm_do_d = pickle.load(
        open(os.path.join(path_pretrained_model, 'd.pkl'), 'rb'))
    change_state_name(hmm_do_d, 'do', 'd')
    hmm_do_ow = pickle.load(
        open(os.path.join(path_pretrained_model, 'ow.pkl'), 'rb'))
    change_state_name(hmm_do_ow, 'do', 'ow')

    hmm_re_r = pickle.load(
        open(os.path.join(path_pretrained_model, 'r.pkl'), 'rb'))
    change_state_name(hmm_re_r, 're', 'r')
    hmm_re_ey = pickle.load(
        open(os.path.join(path_pretrained_model, 'ey.pkl'), 'rb'))
    change_state_name(hmm_re_ey, 're', 'ey')

    hmm_mi_m = pickle.load(
        open(os.path.join(path_pretrained_model, 'm.pkl'), 'rb'))
    change_state_name(hmm_mi_m, 'mi', 'm')
    hmm_mi_iy = pickle.load(
        open(os.path.join(path_pretrained_model, 'iy.pkl'), 'rb'))
    change_state_name(hmm_mi_iy, 'mi', 'iy')

    hmm_fa_f = pickle.load(
        open(os.path.join(path_pretrained_model, 'f.pkl'), 'rb'))
    change_state_name(hmm_fa_f, 'fa', 'f')
    hmm_fa_aa = pickle.load(
        open(os.path.join(path_pretrained_model, 'aa.pkl'), 'rb'))
    change_state_name(hmm_fa_aa, 'fa', 'aa')

    hmm_sol0_s = pickle.load(
        open(os.path.join(path_pretrained_model, 's.pkl'), 'rb'))
    change_state_name(hmm_sol0_s, 'sol0', 's')
    hmm_sol0_ow = pickle.load(
        open(os.path.join(path_pretrained_model, 'ow.pkl'), 'rb'))
    change_state_name(hmm_sol0_ow, 'sol0', 'ow')
    hmm_sol0_l = pickle.load(
        open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb'))
    change_state_name(hmm_sol0_l, 'sol0', 'l')

    hmm_sol1_s = pickle.load(
        open(os.path.join(path_pretrained_model, 's.pkl'), 'rb'))
    change_state_name(hmm_sol1_s, 'sol1', 's')
    hmm_sol1_ao = pickle.load(
        open(os.path.join(path_pretrained_model, 'ao.pkl'), 'rb'))
    change_state_name(hmm_sol1_ao, 'sol1', 'ao')
    hmm_sol1_l = pickle.load(
        open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb'))
    change_state_name(hmm_sol1_l, 'sol1', 'l')

    hmm_la_l = pickle.load(
        open(os.path.join(path_pretrained_model, 'l.pkl'), 'rb'))
    change_state_name(hmm_la_l, 'la', 'l')
    hmm_la_aa = pickle.load(
        open(os.path.join(path_pretrained_model, 'aa.pkl'), 'rb'))
    change_state_name(hmm_la_aa, 'la', 'aa')

    hmm_si_s = pickle.load(
        open(os.path.join(path_pretrained_model, 's.pkl'), 'rb'))
    change_state_name(hmm_si_s, 'si', 's')
    hmm_si_iy = pickle.load(
        open(os.path.join(path_pretrained_model, 'iy.pkl'), 'rb'))
    change_state_name(hmm_si_iy, 'si', 'iy')

    hmm_sil = pickle.load(
        open(os.path.join(path_pretrained_model, 'sil.pkl'), 'rb'))

    hmm_conc = pomegranate.HiddenMarkovModel("hmm_conc")

    hmm_conc.add_model(hmm_do_d)
    hmm_conc.add_model(hmm_do_ow)

    hmm_conc.add_model(hmm_re_r)
    hmm_conc.add_model(hmm_re_ey)

    hmm_conc.add_model(hmm_mi_m)
    hmm_conc.add_model(hmm_mi_iy)

    hmm_conc.add_model(hmm_fa_f)
    hmm_conc.add_model(hmm_fa_aa)

    hmm_conc.add_model(hmm_sol0_s)
    hmm_conc.add_model(hmm_sol0_ow)
    hmm_conc.add_model(hmm_sol0_l)

    hmm_conc.add_model(hmm_sol1_s)
    hmm_conc.add_model(hmm_sol1_ao)
    hmm_conc.add_model(hmm_sol1_l)

    hmm_conc.add_model(hmm_la_l)
    hmm_conc.add_model(hmm_la_aa)

    hmm_conc.add_model(hmm_si_s)
    hmm_conc.add_model(hmm_si_iy)

    hmm_conc.add_model(hmm_sil)

    # phrase start to phn start transitions
    hmm_conc.add_transition(hmm_conc.start, hmm_do_d.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_re_r.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_mi_m.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_fa_f.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_sol0_s.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_sol1_s.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_la_l.start, 0.111111)
    hmm_conc.add_transition(hmm_conc.start, hmm_si_s.start, 0.111111)

    hmm_conc.add_transition(hmm_conc.start, hmm_sil.start, 0.111111)

    # # phn end to phrase end transitions
    # hmm_conc.add_transition(hmm_ow.end, hmm_conc.end, 0.2)
    # hmm_conc.add_transition(hmm_ey.end, hmm_conc.end, 0.2)
    # hmm_conc.add_transition(hmm_iy.end, hmm_conc.end, 0.2)
    # hmm_conc.add_transition(hmm_aa.end, hmm_conc.end, 0.2)
    # hmm_conc.add_transition(hmm_ao.end, hmm_conc.end, 0.2)

    # consonant to vowel transitions
    hmm_conc.add_transition(hmm_do_d.end, hmm_do_ow.start, 1.0)
    hmm_conc.add_transition(hmm_re_r.end, hmm_re_ey.start, 1.0)
    hmm_conc.add_transition(hmm_mi_m.end, hmm_mi_iy.start, 1.0)
    hmm_conc.add_transition(hmm_fa_f.end, hmm_fa_aa.start, 1.0)
    hmm_conc.add_transition(hmm_sol0_s.end, hmm_sol0_ow.start, 1.0)
    hmm_conc.add_transition(hmm_sol0_ow.end, hmm_sol0_l.start, 0.5)
    hmm_conc.add_transition(hmm_sol0_ow.end, hmm_sol0_l.end, 0.5)
    hmm_conc.add_transition(hmm_sol1_s.end, hmm_sol1_ao.start, 1.0)
    hmm_conc.add_transition(hmm_sol1_ao.end, hmm_sol1_l.start, 0.5)
    hmm_conc.add_transition(hmm_sol1_ao.end, hmm_sol1_l.end, 0.5)
    hmm_conc.add_transition(hmm_la_l.end, hmm_la_aa.start, 1.0)
    hmm_conc.add_transition(hmm_si_s.end, hmm_si_iy.start, 1.0)

    # syllable end to phrase start
    hmm_conc.add_transition(hmm_do_ow.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_re_ey.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_mi_iy.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_fa_aa.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_sol0_l.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_sol1_l.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_la_aa.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_si_iy.end, hmm_conc.start, 1.0)
    hmm_conc.add_transition(hmm_sil.end, hmm_conc.start, 1.0)

    hmm_conc.bake(merge=False)

    pickle.dump(hmm_conc,
                open(os.path.join(path_pretrained_model, 'hmm_conc.pkl'),
                     'wb'),
                protocol=2)

    hmm_conc.plot()
    plt.savefig('topo.png', dpi=3000)

Example #20

0

Show file

from duree import duree

Nsamples = 100
# Définition des paramétres du modéle

start_probability = np.array([1, 0, 0])
T = np.array([[0.5 , 0.4 , 0.1],[0.3 , 0.4 , 0.3 ],[0.1 , 0.2 , 0.7 ]])                # Matrice de transition temporaire

B = np.array([[0.5 , 0.5],[0.25,0.75], [0.75, 0.25]])        # matrice d'émission temporaire


dicoObs={'pile': 0 ,'face':1}        # pour transformer les chaines en entier (0,1 et 2)
dicoState={'P1':0 ,'P2':1, 'P3':2}

## Creation de la chaine de Markov
model = pg.HiddenMarkovModel( name="partie 5" )      # Creation instance
# Matrice d'emission

# Creation etat beau temps et prob emission
p1 = pg.State( pg.DiscreteDistribution({ 'pile': B[0,0],'face': B[0,1]}), name='P1' )

p2 = pg.State( pg.DiscreteDistribution({ 'pile': B[1,0],'face': B[1,1]}), name='P2' )

p3 = pg.State( pg.DiscreteDistribution({ 'pile': B[2,0],'face': B[2,1]}), name='P3')




# Matrice de transition
model.add_transitions(model.start,[p1,p2,p3],[1, 0, 0])  # Probs initiales 
model.add_transitions(p1,  [p1,p2,p3],[T[0,0],T[0,1],T[0,2]])     # transitions depuis sunny

Example #21

0

Show file

File: SubstateHmm_sandbox.py Project: cvdelannoy/FRETboard

    def get_untrained_hmm(self, data_dict):
        """
        return an untrained pomegranate hmm object with parameters filled in
        - If all data is unlabeled: finds emission parameters using k-means, transmission and start p are equal
        - If some data is labeled: initial estimate using given classifications
        """
        hmm = pg.HiddenMarkovModel()

        # Get emission distributions & transition probs
        states, edge_states, pg_gui_state_dict, gm_dict = self.get_states(data_dict)
        tm_dict, pstart_dict, pend_dict = self.get_transitions(data_dict)
        for k in tm_dict: tm_dict[k] = max(tm_dict[k], 0.000001)  # reset 0-prob transitions to essentially 0, avoids nans on edges
        for k in pstart_dict: pstart_dict[k] = max(pstart_dict[k], 0.000001)
        for k in pend_dict: pend_dict[k] = max(pend_dict[k], 0.000001)

        # Add states, self-transitions, transitions to start/end state
        # se_dict = {}
        tr_dict = {}
        for sidx, s_name in enumerate(states):
            s = states[s_name]
            hmm.add_states(s)
            # start_state = pg.State(None, name=f'{s_name}_start'); end_state = pg.State(None, name=f'{s_name}_end')

            # transitions between substates
            internal_tr_dict = {f'{s_name}_{iidx}': tr for iidx, tr in enumerate(gm_dict[s_name].weights_)}
            p_stay = tm_dict[(s_name, s_name)]
            for ss1, ss2 in product(s, repeat=2):
                hmm.add_transition(ss1, ss2, p_stay * internal_tr_dict[ss2.name], pseudocount=0)

            # start and end
            hmm.add_transition(hmm.start, s[0],pstart_dict[s_name])
            hmm.add_transition(s[-1], hmm.end, pend_dict[s_name])
            # for ss in s:
            #     hmm.add_transition(hmm.start, ss, pstart_dict[s_name])
            #     hmm.add_transition(ss, hmm.end, pend_dict[s_name])

            tr_dict[s_name] = internal_tr_dict

        # Make connections between states using edge states
        for es_name in edge_states:
            es_list, es_ids = edge_states[es_name]
            hmm.add_states(es_list)

            # transitions into edge states
            for ss in states[es_ids[0]]:
                hmm.add_transition(ss, es_list[0], tm_dict[es_ids], group=f'{es_name}_in', pseudocount=0)

            # transitions out of edge states
            hmm.add_transition(es_list[-1], states[es_ids[1]][0], tr_dict[es_ids[1]][states[es_ids[1]][0].name], pseudocount=0)
            # for ss in states[es_ids[1]]:
            #     hmm.add_transition(es_list[-1], ss, tr_dict[es_ids[1]][ss.name], pseudocount=0)

            # transitions between edge states
            for i in range(1, self.buffer): hmm.add_transition(es_list[i-1], es_list[i], 1.0, pseudocount=9999999)
        hmm.bake()

        state_names = np.array([state.name for state in hmm.states])
        self.pg_gui_state_dict = pg_gui_state_dict
        self.gui_state_dict = {si: pg_gui_state_dict.get(s, None) for si, s in enumerate(state_names)}
        self.str2num_state_dict = {str(si): ni for si, ni in zip(state_names, list(self.gui_state_dict))}
        return hmm, gm_dict