Ejemplo n.º 1
0
def load_segmentation_model(modeldata):
    model = HiddenMarkovModel('model')

    states = {}
    for s in modeldata:
        if len(s['emission']) == 1:
            emission = NormalDistribution(*s['emission'][0][:2])
        else:
            weights = np.array([w for _, _, w in s['emission']])
            dists = [NormalDistribution(mu, sigma)
                     for mu, sigma, _ in s['emission']]
            emission = GeneralMixtureModel(dists, weights=weights)
        state = State(emission, name=s['name'])

        states[s['name']] = state
        model.add_state(state)
        if 'start_prob' in s:
            model.add_transition(model.start, state, s['start_prob'])

    for s in modeldata:
        current = states[s['name']]
        for nextstate, prob in s['transition']:
            model.add_transition(current, states[nextstate], prob)

    model.bake()

    return model
Ejemplo n.º 2
0
    def oriHMMParams(self):
        """
        Set initial parameters for the Hidden Markov Model (HMM).
        
        Attributes
        ----------
        HMMParams : dict
            Has 3 keys: "A", state transition matrix, "B" (emission probabilities),
            specifying parameters (Means, Variances, Weights) of the mixture
            Gaussian distributions for each hidden state, and "pi", indicating
            the hidden state weights. This dict will be updated after learning
            procedure.
        """
        hmm = HiddenMarkovModel()
        # GMM emissions
        # 5 Hidden States:
        # 0--start, 1--downstream, 2--no bias, 3--upstream, 4--end
        numdists = 3  # Three-distribution Gaussian Mixtures
        var = 7.5 / (numdists - 1)
        means = [[], [], [], [], []]
        for i in range(numdists):
            means[4].append(i * 7.5 / (numdists - 1) + 2.5)
            means[3].append(i * 7.5 / (numdists - 1))
            means[2].append((i - (numdists - 1) / 2) * 7.5 / (numdists - 1))
            means[1].append(-i * 7.5 / (numdists - 1))
            means[0].append(-i * 7.5 / (numdists - 1) - 2.5)
        states = []
        for i, m in enumerate(means):
            tmp = []
            for j in m:
                tmp.append(NormalDistribution(j, var))
            mixture = GeneralMixtureModel(tmp)
            states.append(State(mixture, name=str(i)))
        hmm.add_states(*tuple(states))

        # Transmission matrix
        #A = [[0., 1., 0., 0., 0.],
        #    [0., 0.4, 0.3, 0.3, 0.],
        #    [0.05, 0., 0.5, 0.45, 0.],
        #    [0., 0., 0., 0.5, 0.5],
        #    [0.99, 0., 0.01, 0., 0.]]
        hmm.add_transition(states[0], states[1], 1)
        hmm.add_transition(states[1], states[1], 0.4)
        hmm.add_transition(states[1], states[2], 0.3)
        hmm.add_transition(states[1], states[3], 0.3)
        hmm.add_transition(states[2], states[0], 0.05)
        hmm.add_transition(states[2], states[2], 0.5)
        hmm.add_transition(states[2], states[3], 0.45)
        hmm.add_transition(states[3], states[3], 0.5)
        hmm.add_transition(states[3], states[4], 0.5)
        hmm.add_transition(states[4], states[0], 0.99)
        hmm.add_transition(states[4], states[2], 0.01)

        pi = [0.05, 0.3, 0.3, 0.3, 0.05]
        for i in range(len(states)):
            hmm.add_transition(hmm.start, states[i], pi[i])

        hmm.bake()

        return hmm
Ejemplo n.º 3
0
def build_an_hmm_example():
    # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state
    # because it says the probability of seeing each character when the system is in that state
    d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40})
    d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
    d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10})

    s1 = State(d1, name="s1")
    s2 = State(d2, name="s2")
    s3 = State(d3, name="s3")

    model = HiddenMarkovModel('example')
    model.add_states([s1, s2, s3])
    model.add_transition(model.start, s1, 0.90)
    model.add_transition(model.start, s2, 0.10)
    model.add_transition(s1, s1, 0.80)
    model.add_transition(s1, s2, 0.20)
    model.add_transition(s2, s2, 0.90)
    model.add_transition(s2, s3, 0.10)
    model.add_transition(s3, s3, 0.70)
    model.add_transition(s3, model.end, 0.30)
    model.bake()

    for i in range(len(model.states)):
        print(model.states[i].name)
    model.plot()
    #print(model.log_probability(list('ACGACTATTCGAT')))

    #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1]))

    print("forward:", model.forward(list('ACG')))
Ejemplo n.º 4
0
def init_cycle_hmm(sequences, steps, states_per_step, model_id):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(n_states)
    trans = trans / trans.sum()
    for i, state in enumerate(states.flatten().tolist()):
        model.add_transition(model.start, state, trans[i])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> step0
    for j in range(states_per_step):
        trans = np.random.ranf(states_per_step + 1)
        trans = trans / trans.sum()
        # self transition
        model.add_transition(states[(steps - 1), j], states[(steps - 1), j],
                             trans[0])
        # out transition
        for x in range(states_per_step):
            model.add_transition(states[(steps - 1), j], states[0, x],
                                 trans[x + 1])
    model.bake()
    print 'Initialized Cyclic State HMM:', '[', \
        steps, states_per_step, ']'
    return model
Ejemplo n.º 5
0
def bake_model(tags_sequence, words_sequence):
    """
    'tags' are the time-demand labels that generate the emitted demand level.
    Demand level are represented by 'words'
    """
    # rdemand
    words = [x for x in chain(*words_sequence)]
    tag_unigrams = unigram_counts(words)
    tag_bigrams = bigram_counts(words)

    # Uniform distribution for starting and ending labels
    all_labels = list(set(words))
    tag_starts = starting_counts(all_labels)
    tag_ends = ending_counts(all_labels)

    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    # Emission count
    label_train = tags_sequence
    rdemand_train = words_sequence
    emission_count = pair_counts(rdemand_train, label_train)

    # States with emission probability distributions P(word | tag)
    states = []
    for rdemand, label_dict in emission_count.items():
        dist_tag = DiscreteDistribution({
            label: cn / tag_unigrams[rdemand]
            for label, cn in label_dict.items()
        })
        states.append(State(dist_tag, name=rdemand))

    basic_model.add_states(states)
    state_names = [s.name for s in states]
    state_index = {tag: num for num, tag in enumerate(state_names)}

    # Start transition
    total_start = sum(tag_starts.values())
    for tag, cn in tag_starts.items():
        # sname = state_index[tag]
        basic_model.add_transition(basic_model.start, states[state_index[tag]],
                                   cn / total_start)

    # End transition
    total_end = sum(tag_ends.values())
    for tag, cn in tag_ends.items():
        basic_model.add_transition(states[state_index[tag]], basic_model.end,
                                   cn / total_end)

    # Edges between states for the observed transition frequencies P(tag_i | tag_i-1)
    for key, value in tag_bigrams.items():
        basic_model.add_transition(states[state_index[key[0]]],
                                   states[state_index[key[1]]],
                                   value / tag_unigrams[key[0]])

    # Finalize the model
    basic_model.bake()

    return basic_model
Ejemplo n.º 6
0
def hmmer2pom(hmm):
    # set up environment
    from math import exp
    from pomegranate import DiscreteDistribution,HiddenMarkovModel,State
    tags = dict(); header = 0; alphabet = None; hmmlines = list()

    # parse HMMER file
    for line in hmm.splitlines():
        l = line.strip()
        if len(l) == 0 or l[0] == '#':
            continue
        elif header == 0:
            if l.startswith('HMM') and l[3] != 'E': # beginning of actual HMM
                header = 1; alphabet = l.split()[1:]
            else:
                parts = l.strip().split()
                if parts[0] in tags:
                    if not isinstance(tags[parts[0]], list):
                        tags[parts[0]] = [tags[parts[0]]]
                    tags[parts[0]].append(' '.join(parts[1:]))
                else:
                    tags[parts[0]] = ' '.join(parts[1:])
        elif header == 1:
            header = 2
        else:
            if l.startswith('COMPO'):
                parts = l.strip().split(); tags[parts[0]] = ' '.join(parts[1:])
            else:
                hmmlines.append(l)

    # create all states
    model = HiddenMarkovModel(tags['NAME']); tmpstates = list(); K = 0
    i_emit = hmmlines[0].split(); tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I0")) # insertion state
    for l in range(2,len(hmmlines),3):
        m_emit,i_emit,state_trans = [hmmlines[l+i].split() for i in range(0,3)]; K = int(m_emit[0])
        tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(m_emit[i+1])) for i in range(len(alphabet))}), name="M%d" % K)) # match state
        tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I%d" % K)) # insertion state
        tmpstates.append(State(None, name="D%d" % K)) # deletion state
    assert K != 0, "No match states in profile HMM"
    model.add_states(tmpstates); name2state = {state.name:state for state in tmpstates}; name2state["M0"] = model.start; name2state["M%d"%(K+1)] = model.end

    # create all transitions
    for l in range(1,len(hmmlines),3):
        k = int(l/3); parts = hmmlines[l].split()
        model.add_transition(name2state["M%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[0])))     # 0: M_k -> M_k+1
        model.add_transition(name2state["M%d"%k], name2state["I%d"%k],     exp(-1*float(parts[1])))     # 1: M_k -> I_k
        if parts[2] != '*': # no D_k+1 in last row
            model.add_transition(name2state["M%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[2]))) # 2: M_k -> D_k+1
        model.add_transition(name2state["I%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[3])))     # 3: I_k -> M_k+1
        model.add_transition(name2state["I%d"%k], name2state["I%d"%k],     exp(-1*float(parts[4])))     # 4: I_k -> I_k
        if k != 0: # no D0 state
            model.add_transition(name2state["D%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[5]))) # 5: D_k -> M_k+1
        if parts[6] != '*': # no D0 state and no D_k+1 in last row
            model.add_transition(name2state["D%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[6]))) # 6: D_k -> D_k+1
    model.bake()
    return model.to_json()
Ejemplo n.º 7
0
def train_hmm_tagger(data):
    # HMM
    # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger.
    #
    # - Add one state per tag
    #     - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$
    # - Add an edge from the starting state `basic_model.start` to each tag
    #     - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$
    # - Add an edge from each tag to the end state `basic_model.end`
    #     - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$
    # - Add an edge between _every_ pair of tags
    #     - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$
    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    state_dict = {}
    states = []
    emission_counts = pair_counts(*list(zip(
        *data.training_set.stream()))[::-1])
    for tag in emission_counts.keys():
        tag_count = tag_unigrams[tag]
        probs = {}
        for w in emission_counts[tag]:
            probs[w] = emission_counts[tag][w] / tag_count
        emission_p = DiscreteDistribution(probs)
        state = State(emission_p, name="" + tag)
        basic_model.add_state(state)
        state_dict[tag] = state

    for tag in tag_starts:
        basic_model.add_transition(basic_model.start, state_dict[tag],
                                   tag_starts[tag] / len(data.training_set.Y))
        basic_model.add_transition(state_dict[tag], basic_model.end,
                                   tag_ends[tag] / tag_unigrams[tag])

    for (tag1, tag2) in tag_bigrams:
        basic_model.add_transition(
            state_dict[tag1], state_dict[tag2],
            tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1])

    # finalize the model
    basic_model.bake()

    assert all(
        tag in set(s.name for s in basic_model.states)
        for tag in data.training_set.tagset
    ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags."
    assert basic_model.edge_count() == 168, (
        "Your network should have an edge from the start node to each state, one edge between every "
        +
        "pair of tags (states), and an edge from each state to the end node.")
    HTML(
        '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>'
    )
    return basic_model
Ejemplo n.º 8
0
class HMMWrapper:
    def __init__(self):
        self.model = HiddenMarkovModel()
        self.start = self.model.start
        self.end = self.model.end
        self.states_before_bake = []
        self.states = None

    def add_state(self, state, start_prob=0):
        self.states_before_bake.append((state, start_prob))
        self.model.add_state(state)

    def add_transition(self, start_state, end_state, prob):
        # print('adding from', start_state.name, 'to', end_state.name, prob)
        self.model.add_transition(start_state, end_state, prob)

    def bake(self):
        starter_states_no_prob = []
        free_start_prob = 1.0
        for state in self.states_before_bake:
            if 'none' not in state[0].name:
                if not state[1]:
                    starter_states_no_prob.append(state)
                else:
                    free_start_prob -= state[1]
                    print('asignado ' + str(state[1]) + ' a ' + state[0].name)
                    self.add_transition(self.start, state[0], state[1])

        len_no_prob = len(starter_states_no_prob)
        starter_prob = free_start_prob / len_no_prob
        print(len_no_prob, starter_prob)
        for state in starter_states_no_prob:
            self.add_transition(self.start, state, starter_prob)

        self.model.bake()
        self.states = self.model.states

    def make_states_from_alignment(self, first_state, last_state, seq_matrix,
                                   name):
        columns = column_clasify(seq_matrix)
        zones = create_zones(columns)
        grouped_states = group_states(zones, name)
        add_states(self, grouped_states)
        trans = calculate_transitions(first_state, last_state, grouped_states)
        apply_transitions(self, trans)

    def predict(self, *args, **kwargs):
        return self.model.predict(*args, **kwargs)
Ejemplo n.º 9
0
def _initialize_new_hmm(hmm, new_states, new_transitions):

    new_hmm = HiddenMarkovModel()
    for state in new_states:
        if state not in (hmm.start, hmm.end):
            new_hmm.add_state(state)
    for source_state, target_state, probability in new_transitions:
        if source_state != hmm.start and target_state != hmm.end:
            new_hmm.add_transition(source_state, target_state, probability)
        elif source_state == hmm.start:
            new_hmm.add_transition(new_hmm.start, target_state, probability)
        elif target_state == hmm.end:
            new_hmm.add_transition(source_state, new_hmm.end, probability)

    new_hmm.bake()
    return new_hmm
Ejemplo n.º 10
0
def insert_delete_main_hmm(data_matrix):
    v_columns = column_clasify(data_matrix)
    v_zones = create_zones(v_columns)
    v_grouped_states = group_states(v_zones, 'test')
    v_model = HiddenMarkovModel()
    v_first_state = State(None, name='ali_start')
    v_last_state = State(None, name='ali_end')
    v_model.add_state(v_first_state)
    v_model.add_transition(v_model.start, v_first_state, 1)
    v_model.add_state(v_last_state)
    add_states(v_model, v_grouped_states)
    v_trans = calculate_transitions(v_first_state, v_last_state,
                                    v_grouped_states)
    apply_transitions(v_model, v_trans)
    v_model.bake()
    return v_model
Ejemplo n.º 11
0
def gaussian_hmm(n_states, lower, upper, variance, model_id):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    np.random.seed(int(time.time()))

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = np.linspace(lower, upper, n_states)
    states = []
    for i in range(n_states):
        dist = \
            NormalDistribution(x[i], variance)
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add uniform start probabilities
    start_prob = 1.0 / n_states
    start_probs = []
    for i in range(n_states):
        start_probs.append(start_prob + np.random.ranf())
    start_probs = np.array(start_probs)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add transition probabilities proportional to probability of generating
    # one state mean from another
    for state1 in states:
        transitions = []
        for other_state in states:
            transitions.append(np.exp(state1.distribution.log_probability(
                other_state.distribution.parameters[0])) + np.random.ranf())
        transitions = np.array(transitions)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Ejemplo n.º 12
0
def init_model(start_dip, stay_state, mean_eu, sd_eu, mean_loh):

    ## define distributions
    d_eu = NormalDistribution(mean_eu, sd_eu)  ## euploid enriched at 0
    d_loh = NormalDistribution(mean_loh,
                               sd_eu)  ## loss of heterozygosity enriched at 1
    d_aneu = NormalDistribution(mean_loh / 2.0,
                                sd_eu * 1.4)  ## aneuploid enriched at 1

    ## define states
    s_eu = State(d_eu, name='EU')  ## enriched at 0
    s_loh = State(d_loh, name='LOH')  ## enriched at 1
    s_aneu = State(d_aneu, name='ANEU')  ## enriched at 1

    ## define model and pass in states
    model = HiddenMarkovModel()
    model.add_states(s_eu, s_loh, s_aneu)

    ## define transition matrix (state a, state b, probability)
    model.add_transition(model.start, s_eu, start_dip)
    model.add_transition(model.start, s_loh, 1.0 - start_dip - 0.1)
    model.add_transition(model.start, s_aneu, 0.1)

    model.add_transition(s_eu, s_eu, stay_state)
    model.add_transition(s_eu, s_loh, 1.0 - 4 * stay_state / 5 - 0.001)
    model.add_transition(s_eu, s_aneu, 1.0 - stay_state / 5 - 0.001)
    model.add_transition(s_eu, model.end, 0.002)

    model.add_transition(s_loh, s_loh, stay_state)
    model.add_transition(s_loh, s_eu, 1.0 - 4 * stay_state / 5 - 0.001)
    model.add_transition(s_loh, s_aneu, 1.0 - stay_state / 5 - 0.001)
    model.add_transition(s_loh, model.end, 0.002)

    model.add_transition(s_aneu, s_aneu, stay_state)
    model.add_transition(s_aneu, s_eu, 1.0 - stay_state / 2 - 0.001)
    model.add_transition(s_aneu, s_loh, 1.0 - stay_state / 2 - 0.001)
    model.add_transition(s_aneu, model.end, 0.002)

    ## finalize internal structure
    model.bake()
    ## only train transitions, not emissions
    model.freeze_distributions()

    return model
Ejemplo n.º 13
0
def ghmm_model(states_labels: tuple,
               transitions: tuple,
               init_prob: tuple,
               end_prob: tuple,
               means: list,
               vars: list) -> HiddenMarkovModel:
    """

    :param states_labels:
    :param transitions:
    :param init_prob:
    :param end_prob:
    :param means:
    :param vars:
    :return:
    """
    hmm_model = HiddenMarkovModel()

    mix_num = len(vars[0])
    states = []
    for state_i, state in enumerate(states_labels):
        mixture = []
        for mix_i in range(mix_num):
            init_mean = means[state_i][mix_i]
            init_var = vars[state_i][mix_i]
            mixture.append(NormalDistribution(init_mean, init_var))
        states.append(State(GeneralMixtureModel(mixture), name=str(state_i)))
    hmm_model.add_states(*tuple(states))

    for row in range(len(states_labels)):
        for col in range(len(states_labels)):
            prob = transitions[row][col]
            if prob != 0.:
                hmm_model.add_transition(states[row], states[col], prob)
    for state_i, prob in enumerate(init_prob):
        if prob != 0.:
            hmm_model.add_transition(hmm_model.start, states[state_i], prob)
    for state_i, prob in enumerate(end_prob):
        if prob != 0.:
            hmm_model.add_transition(states[state_i], hmm_model.end, prob)

    hmm_model.bake()

    return hmm_model
Ejemplo n.º 14
0
class ModelWrapper:
    def __init__(self):
        self.model = HiddenMarkovModel()

    def add_state(self, distribution, name):
        state = State(distribution, name=name)
        self.model.add_state(state)
        return state

    def bake(self):
        self.model.bake()

    def viterbi(self, seq):
        return self.model.viterbi(seq)

    def add_transition(self, states, next_state_data):
        for state in states:
            for next_data in next_state_data:
                self.model.add_transition(state, next_data[0], next_data[1])
Ejemplo n.º 15
0
def build_the_same_model_in_test_sample_from_site_line_by_line():

    # State olds emission distribution, but not
    #transition distribution, because that's stored in the graph edges.
    s1 = State(NormalDistribution(5, 1))
    s2 = State(NormalDistribution(1, 7))
    s3 = State(NormalDistribution(8, 2))
    model = HiddenMarkovModel()
    model.add_states(s1, s2, s3)
    model.add_transition(model.start, s1, 1.0)
    model.add_transition(s1, s1, 0.7)
    model.add_transition(s1, s2, 0.3)
    model.add_transition(s2, s2, 0.8)
    model.add_transition(s2, s3, 0.2)
    model.add_transition(s3, s3, 0.9)
    model.add_transition(s3, model.end, 0.1)
    model.bake()

    model.plot()
Ejemplo n.º 16
0
def buildHmm(minAmpliconLength, maxGap, windowSize):
    b_bkgd_1 = 0.1
    a_interstate = b_bkgd_1**(2 * minAmpliconLength / windowSize)
    b_amp_0 = (a_interstate)**(0.5 * windowSize / maxGap)
    b_amp_1 = 1 - b_amp_0
    b_bkgd_0 = 1 - b_bkgd_1
    bkgdDist = DiscreteDistribution({0: b_bkgd_0, 1: b_bkgd_1})
    ampDist = DiscreteDistribution({0: b_amp_0, 1: b_amp_1})
    s_bkgd = State(bkgdDist, name='background')
    s_amp = State(ampDist, name='amplicon')
    hmm = HiddenMarkovModel()
    hmm.add_states(s_bkgd, s_amp)
    hmm.add_transition(hmm.start, s_bkgd, 1 - a_interstate)
    hmm.add_transition(hmm.start, s_amp, a_interstate)
    hmm.add_transition(s_bkgd, s_bkgd, 1 - a_interstate)
    hmm.add_transition(s_bkgd, s_amp, a_interstate)
    hmm.add_transition(s_amp, s_bkgd, a_interstate)
    hmm.add_transition(s_amp, s_amp, 1 - a_interstate)
    hmm.bake()
    return hmm
Ejemplo n.º 17
0
def create_hidden_MarkovModel(e_df, q_df, start_p_dict):
    """
    Creates a Hidden Markov Model based on DataFrame
    @args:
        - e_df (pd.Dataframe): contains the emission probabilites
        - q_df (pd.Dataframe): contains the emission probabilites
    """
    model = HiddenMarkovModel(name="Example Model")

    '#1: Create a dict for each key in trans. df'
    model_dict = {}
    for key in q_df.keys().values:
        model_dict[key] = {}

    '#2: Create the states'
    for key in model_dict:
        '#2.1.Step Add teh emission prob. to each state, , P(observation | state)'
        emission_p = DiscreteDistribution(e_df[key].to_dict())
        sunny_state = State(emission_p, name=key)
        model_dict[key] = State(emission_p, name=key)
        model.add_state(model_dict[key])
        '#2.2.Step: Add the start probability for each state'
        model.add_transition(model.start, model_dict[key], start_p_dict[key])

    '#3.Step: Add the transition probability to each state'
    for key, item in q_df.to_dict("index").items():
        for item_name, value in item.items():
            print(key, " , ", item_name, ": ", value)
            tmp_origin = model_dict[key]
            tmp_destination = model_dict[item_name]
            model.add_transition(tmp_origin, tmp_destination,
                                 q_df.loc[key, item_name])
    # finally, call the .bake() method to finalize the model
    model.bake()

    return model
Ejemplo n.º 18
0
def build_reference_repeat_finder_hmm(patterns, copies=1):
    pattern = patterns[0]
    model = Model(name="HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })

    last_end = None
    start_random_matches = State(insert_distribution,
                                 name='start_random_matches')
    end_random_matches = State(insert_distribution, name='end_random_matches')
    model.add_states([start_random_matches, end_random_matches])
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(pattern) + 1):
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(len(pattern)):
            distribution_map = dict({
                'A': 0.01,
                'C': 0.01,
                'G': 0.01,
                'T': 0.01
            })
            distribution_map[pattern[i]] = 0.97
            match_states.append(
                State(DiscreteDistribution(distribution_map),
                      name='M%s_%s' % (str(i + 1), repeat)))

        for i in range(len(pattern)):
            delete_states.append(
                State(None, name='D%s_%s' % (str(i + 1), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        last = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 0.5)
        else:
            model.add_transition(model.start, unit_start, 0.5)
            model.add_transition(model.start, start_random_matches, 0.5)
            model.add_transition(start_random_matches, unit_start, 0.5)
            model.add_transition(start_random_matches, start_random_matches,
                                 0.5)

        model.add_transition(unit_end, end_random_matches, 0.5)
        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 0.5)
            model.add_transition(end_random_matches, end_random_matches, 0.5)
            model.add_transition(end_random_matches, model.end, 0.5)

        model.add_transition(unit_start, match_states[0], 0.98)
        model.add_transition(unit_start, delete_states[0], 0.01)
        model.add_transition(unit_start, insert_states[0], 0.01)

        model.add_transition(insert_states[0], insert_states[0], 0.01)
        model.add_transition(insert_states[0], delete_states[0], 0.01)
        model.add_transition(insert_states[0], match_states[0], 0.98)

        model.add_transition(delete_states[last], unit_end, 0.99)
        model.add_transition(delete_states[last], insert_states[last + 1],
                             0.01)

        model.add_transition(match_states[last], unit_end, 0.99)
        model.add_transition(match_states[last], insert_states[last + 1], 0.01)

        model.add_transition(insert_states[last + 1], insert_states[last + 1],
                             0.01)
        model.add_transition(insert_states[last + 1], unit_end, 0.99)

        for i in range(0, len(pattern)):
            model.add_transition(match_states[i], insert_states[i + 1], 0.01)
            model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
            model.add_transition(insert_states[i + 1], insert_states[i + 1],
                                 0.01)
            if i < len(pattern) - 1:
                model.add_transition(insert_states[i + 1], match_states[i + 1],
                                     0.98)
                model.add_transition(insert_states[i + 1],
                                     delete_states[i + 1], 0.01)

                model.add_transition(match_states[i], match_states[i + 1],
                                     0.98)
                model.add_transition(match_states[i], delete_states[i + 1],
                                     0.01)

                model.add_transition(delete_states[i], delete_states[i + 1],
                                     0.01)
                model.add_transition(delete_states[i], match_states[i + 1],
                                     0.98)

        last_end = unit_end

    model.bake()
    if len(patterns) > 1:
        # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True)
        fit_patterns = [pattern * copies for pattern in patterns]
        model.fit(fit_patterns,
                  algorithm='viterbi',
                  transition_pseudocount=1,
                  use_pseudocount=True)

    return model
Ejemplo n.º 19
0
model.add_transition( model.start, rainy, 0.6 )
model.add_transition( model.start, sunny, 0.4 )

# Transition matrix, with 0.05 subtracted from each probability to add to
# the probability of exiting the hmm
model.add_transition( rainy, rainy, 0.65 )
model.add_transition( rainy, sunny, 0.25 )
model.add_transition( sunny, rainy, 0.35 )
model.add_transition( sunny, sunny, 0.55 )

# Add transitions to the end of the model
model.add_transition( rainy, model.end, 0.1 )
model.add_transition( sunny, model.end, 0.1 )

# Finalize the model structure
model.bake( verbose=True )

# Lets sample from this model.
print model.sample()

# Lets call Bob every hour and see what he's doing!
# (aka build up a sequence of observations)
sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ]

# What is the probability of seeing this sequence?
print "Probability of Sequence: ", \
	math.e**model.forward( sequence )[ len(sequence), model.end_index ]
print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \
	math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ]
print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \
	math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]
Ejemplo n.º 20
0
hmm.add_transition(s33, s222, 0.01)
hmm.add_transition(s33, s3, 0.01)
hmm.add_transition(s33, s33, 0.92)
hmm.add_transition(s33, s333, 0.01)

hmm.add_transition(s333, s1, 0.01)
hmm.add_transition(s333, s11, 0.01)
hmm.add_transition(s333, s111, 0.01)
hmm.add_transition(s333, s2, 0.01)
hmm.add_transition(s333, s22, 0.01)
hmm.add_transition(s333, s222, 0.01)
hmm.add_transition(s333, s3, 0.01)
hmm.add_transition(s333, s33, 0.01)
hmm.add_transition(s333, s333, 0.92)

hmm.bake()
hmm.fit(X)  # , weights=w) hmm does not support weights in pomegranate
preds = hmm.predict(X)
probs = hmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+2]

covs = np.array([np.array(hmm.states[m].distribution.parameters[1])
                 for m in range(9)])
means = np.array([np.array(hmm.states[m].distribution.parameters[0])
                  for m in range(9)])
Ejemplo n.º 21
0
        st = State(posdis, name='swing')
        distros.append(st)
        hmm_states.append(st)
        negdis = MGD.from_samples(negative_data)
        st2 = State(negdis, name='stance')
        distros.append(st2)
        hmm_states.append(st2)

        cl.add_states(hmm_states)
        cl.add_transition(cl.start, hmm_states[0], 0.5)
        cl.add_transition(cl.start, hmm_states[1], 0.5)

        for i in range(0, 2):
            for j in range(0, 2):
                cl.add_transition(hmm_states[i], hmm_states[j], t[i][j])
        cl.bake()

        f += 1
        train_data = fd[train_index]
        train_class = fl[train_index]
        test_data = fd[test_index]
        test_class = fl[test_index]
        seq = []
        if batch_training == 1:
            s = 0
            while s < len(train_data):
                k = 0
                seq_entry = []
                while k < 20 and s < len(train_data):
                    seq_entry.append(train_data[s])
                    k += 1
Ejemplo n.º 22
0
state2 = State(NormalDistribution(0, 2), name="normal")
silent = State(None, name="silent")
model.add_state(state)
model.add_state(state2)

model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()

print sequence
print
print model.forward(sequence)[ len(sequence), model.end_index ]
print model.backward(sequence)[0,model.start_index]
print
trans, ems =  model.forward_backward(sequence)
print trans
print ems
print
model.train( [ sequence ] )

print
print model.forward(sequence)[ len(sequence), model.end_index ]
Ejemplo n.º 23
0
# Define the transitions
model = Model("infinite")
model.add_transition(model.start, s1, 0.7)
model.add_transition(model.start, s2, 0.2)
model.add_transition(model.start, s3, 0.1)
model.add_transition(s1, s1, 0.6)
model.add_transition(s1, s2, 0.1)
model.add_transition(s1, s3, 0.3)
model.add_transition(s2, s1, 0.4)
model.add_transition(s2, s2, 0.4)
model.add_transition(s2, s3, 0.2)
model.add_transition(s3, s1, 0.05)
model.add_transition(s3, s2, 0.15)
model.add_transition(s3, s3, 0.8)
model.bake()

sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]

print model.is_infinite()

print "Algorithms On Infinite Model"
sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]
print "Forward"
print model.forward(sequence)

print "\n".join(state.name for state in model.states)
print "Backward"
print model.backward(sequence)

print "Forward-Backward"
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # pathology = 0
    for pathology in range(len(dataset)):
        if pathology == 0:
            rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
            print "**Leave-one-out cross validation with HEALTHY subjects**"
        else:
            rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
            print "**Leave-one-out cross validation with PATIENTS**"
    # if True:
        for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            # full_lou_data = []
            # full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    # full_lou_data.append(d)
                    # full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            # lou_trial = 1
            # if True:
            for lou_trial in range(n_trials):
                rospy.logwarn("Trial {}".format(lou_trial+1))
                print("Trial {}".format(lou_trial+1))

                """Classifier initialization"""
                # distros = []
                hmm_states = []
                for state in range(n_classes):
                    dis = MGD\
                        (np.array(class_means[state]).flatten(),
                         np.array(class_cov[state]))
                    st = State(dis, name=state_names[state])
                    # distros.append(dis)
                    hmm_states.append(st)
                model = HMM(name="Gait")

                model.add_states(hmm_states)
                """Initial transitions"""
                for state in range(n_classes):
                    model.add_transition(model.start, hmm_states[state], startprob[state])
                """Left-right model"""
                for i in range(n_classes):
                    for j in range(n_classes):
                        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

                model.bake()

                """Create training and test data"""
                x_train = []
                x_test = []
                test_gyro_y = lou_data["gyro_y"][lou_trial]
                test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial]
                """Create test data with n-th trial of leave-one-out subject"""
                for sample in range(len(test_gyro_y)):
                    x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])

                """Create training data with n-1 trials of the rest of subjects (healthy group)"""
                for train_sub,train_data in dataset[0].iteritems():
                    count_trials = 0
                    if lou_sub != train_sub:
                    # if train_sub == "daniel":
                        for trial in range(n_trials):
                            if trial != lou_trial and count_trials < 1:
                                # rospy.logwarn(trial)
                                train_gyro_y = train_data["gyro_y"][trial]
                                train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                                for sample in range(len(train_gyro_y)):
                                    x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
                                count_trials += 1
                rospy.logwarn(len(x_train))
                x_train = list([x_train])

                """Training"""
                rospy.logwarn("Training HMM...")
                model.fit(x_train, algorithm='baum-welch', verbose=True)
                # model.fit(x_train, algorithm='viterbi', verbose='True')

                """Find most-likely sequence"""
                rospy.logwarn("Finding most-likely sequence...")
                logp, path = model.viterbi(x_test)
                # rospy.logwarn(len(path))
                # rospy.logwarn(len(lou_data["labels"][lou_trial]))

                class_labels = []
                for i in range(len(lou_data["labels"][lou_trial])):
                    path_phase = path[i][1].name
                    for state in range(n_classes):
                        if path_phase == state_names[state]:
                            class_labels.append(state)
                '''Saving classifier labels into csv file'''
                # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
                # rospy.logwarn("csv file with classifier labels was saved.")
                # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:]

                """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
                rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
                print "Mean time (MT) and Coefficient of Variance (CoV)"

                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
Ejemplo n.º 25
0
coding_model.add_transition(coding_state2, ez_states_tag[0], 0.0000000230000)

coding_model.add_transition(donor0_states[-1], in0, 1)
coding_model.add_transition(donor1_states[-1], in1, 1)
coding_model.add_transition(donor2_states[-1], in2, 1)

coding_model.add_transition(in0_spacers[-1], acceptor0_states[0], 1)
coding_model.add_transition(in1_spacers[-1], acceptor1_states[0], 1)
coding_model.add_transition(in2_spacers[-1], acceptor2_states[0], 1)

coding_model.add_transition(acceptor0_states[-1], coding_state0, 1.0)
coding_model.add_transition(acceptor1_states[-1], coding_state0, 1.0)
coding_model.add_transition(acceptor2_states[-1], coding_state0, 1.0)

coding_model.add_transition(ze_states[-1], coding_state0, 1.0)

coding_model.add_transition(ez_states_taa[-1], exon3_state, 1.0)
coding_model.add_transition(ez_states_tga[-1], exon3_state, 1.0)
coding_model.add_transition(ez_states_tag[-1], exon3_state, 1.0)

coding_model.add_transition(exon3_state, exon3_state, 0.9)
coding_model.add_transition(exon3_state, poly_a_states[0], 0.1)

coding_model.add_transition(poly_a_states[-1], post_poly_spacer[0], 1.0)
coding_model.add_transition(post_poly_spacer[-1], back, 1.0)

coding_model.bake()

with open('coding_model_base_poly.json', 'w', encoding='utf-8') as out:
    out.write(coding_model.to_json())
    def build_dis_classifier(self):
        skf = StratifiedKFold(self.full_labels, n_folds=self.folds)
        classifier_array = []
        stats_array = []
        num_class = len(self.full_data[0])
        print (num_class)
        for cl in range(0, num_class):
            lel = -1
            tp_total = 0.0
            tn_total = 0.0
            fp_total = 0.0
            fn_total = 0.0
            tests = 0
            for train_index, test_index in skf:
                if lel > 0:
                    lel -= 1
                    continue
                stats = []
                distros = []
                hmm_states = []
                state_names = ['swing', 'stance']
                swings = 0
                stances = 0
                for i in range(0, 2):
                    dis = MGD.from_samples(self.class_data[i])
                    st = State(dis, name=state_names[i])
                    distros.append(dis)
                    hmm_states.append(st)

                model = HMM()
                print(model.states)
                model.add_states(hmm_states)
                model.add_transition(model.start, hmm_states[0], 0.5)
                model.add_transition(model.start, hmm_states[1], 0.5)
                model.add_transition(hmm_states[1], model.end, 0.000000000000000001)
                model.add_transition(hmm_states[0], model.end, 0.000000000000000001)

                for i in range(0, 2):
                    for j in range(0, 2):
                        model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j])
                model.bake()

                tp = 0.0
                tn = 0.0
                fp = 0.0
                fn = 0.0

                train_data = self.full_data[train_index, cl]
                train_class = self.full_labels[train_index, cl]
                test_data = self.full_data[test_index]
                test_class = self.full_labels[test_index]

                print(np.isfinite(train_data).all())
                print(np.isfinite(test_data).all())
                print(np.isnan(train_data.any()))
                print(np.isinf(train_data.any()))
                print(np.isnan(test_data.any()))
                print(np.isinf(test_data.any()))

                if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \
                        or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())):
                    rospy.logerr("NaN or Inf Detected")
                    exit()

                try:
                    rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests))
                    seq = np.array(train_data)
                    model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150)

                except ValueError:
                    rospy.logwarn("Something went wrong, exiting")
                    rospy.shutdown()
                    exit()

                seq = []
                if self.batch_test == 1:
                    s = 0
                    # for s in range(0, len(test_data)):
                    while s < len(test_data):
                        k = 0
                        seq_entry = []
                        while k < 20 and s < len(test_data):
                            seq_entry.append(test_data[s])
                            k += 1
                            s += 1
                        seq.append(seq_entry)
                else:
                    seq = np.array(test_data)

                if seq == [] or test_data == []:
                    rospy.logerr("Empty testing sequence")
                    continue

                log, path = model.viterbi(test_data)
                if (len(path) - 2) != len(test_data):
                    rospy.logerr(len(path))
                    rospy.logerr(path[0][1].name)
                    rospy.logerr(path[len(path) - 1][1].name)
                    rospy.logerr(len(test_data))
                    exit()

                tests += 1
                for i in range(0, len(path) - 2):
                    if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end':
                        if path[i + 1][1].name == 'swing':  # prediction is 0
                            swings += 1
                            if test_class[i] == 0:  # class is 0
                                tn += 1.0
                            elif test_class[i] == 1:
                                fn += 1.0  # class is 1

                        elif path[i + 1][1].name == 'stance':  # prediction is 1
                            stances += 1
                            if test_class[i] == 1:  # class is 1
                                tp += 1.0
                            elif test_class[i] == 0:  # class is 0
                                fp += 1.0
                print (swings)
                print (stances)
                if (tp + fn) != 0.0:
                    rospy.logwarn("Sensitivity : " + str(tp / (tp + fn)))
                    # sensitivity = tp / (tp + fn)
                else:
                    rospy.logwarn("Sensitivity : 0.0")
                    # sensitivity = 0.0
                if (tn + fp) != 0.0:
                    rospy.logwarn("Specificity : " + str(tn / (tn + fp)))
                    # specificity = tn_total / (tn_total + fp_total)
                else:
                    rospy.logwarn("Specificity : 0.0")
                    # specificity = 0.0
                if (tn + tp + fn + fp) != 0.0:
                    rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp)))
                    # accuracy = (tn + tp) / (tn + tp + fn + fp)
                else:
                    rospy.logwarn("Accuracy : 0.0")
                    # accuracy = 0.0

                tn_total += tn
                tp_total += tp
                fn_total += fn
                fp_total += fp

            tp_total /= tests
            tn_total /= tests
            fp_total /= tests
            fn_total /= tests
            rospy.logerr("TP :" + str(tp_total))
            rospy.logerr("TN :" + str(tn_total))
            rospy.logerr("FP :" + str(fp_total))
            rospy.logerr("FN :" + str(fn_total))
            rospy.logerr("Tests :" + str(tests))
            if (tp_total + fn_total) != 0.0:
                sensitivity = tp_total / (tp_total + fn_total)
            else:
                sensitivity = 0.0
            if (tn_total + fp_total) != 0.0:
                specificity = tn_total / (tn_total + fp_total)
            else:
                specificity = 0.0
            if (tn_total + tp_total + fn_total + fp_total) != 0.0:
                accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total)
            else:
                accuracy = 0.0

            rospy.logwarn("----------------------------------------------------------")
            rospy.logerr("Total accuracy: " + str(accuracy))
            rospy.logerr("Total sensitivity: " + str(sensitivity))
            rospy.logerr("Total specificity: " + str(specificity))
            stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests,
                     accuracy, sensitivity, specificity]
            rospy.logwarn("-------------------DONE-------------------------")
            classifier_array.append(model)
            stats_array.append(stats)

        pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb'))
        pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb'))
        scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
Ejemplo n.º 27
0
def init_lr_hmm(sequences, steps, states_per_step,
                force_end=False, model_id='Left-Righ HMM', seed=None):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    force_end if we require sequence to end in end state
    """

    # seed random number generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(states_per_step)
    trans = trans / trans.sum()
    for j in range(states_per_step):
        model.add_transition(model.start, states[0, j], trans[j])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> end
    if force_end:
        for j in range(states_per_step):
            trans = np.random.ranf(2)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[(steps - 1), j],
                                 states[(steps - 1), j], trans[0])
            # end transition
            model.add_transition(states[(steps - 1), j], model.end, trans[1])

    model.bake()
    print 'Initialized Left-Right HMM:', model.name, '[', \
        steps, states_per_step, ']'
    return model
Ejemplo n.º 28
0
def init_gaussian_hmm(sequences, n_states, model_id, seed=None):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    """
    # make random transition probability matrix
    # scale each row to sum to 1
    trans = np.random.ranf((n_states, n_states))
    for i in range(n_states):
        trans[i, :] = trans[i, :] / trans[i, :].sum()

    # make distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    # x = math.min(3, x)

    dists = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dists.append(NormalDistribution.from_samples
                     (sequences[:, temp_assignment]))

    # random start probabilities
    # scale to sum to 1
    starts = np.random.ranf(n_states)
    starts = starts / sum(starts)

    model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id)
    """
    # seed random numer generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    states = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dist = \
            NormalDistribution.from_samples(sequences[:, temp_assignment])
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add random start probabilities
    start_probs = np.random.ranf(n_states)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add random transition probabilites out of each state
    for state1 in states:
        transitions = np.random.ranf(n_states)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Ejemplo n.º 29
0
def train_and_test():
    with open('../data extractors/exons_start_1.txt') as in_file:
        total = []
        for line in in_file:
            no_p_line = line.replace('P', '').lower().replace('\n', '')
            total.append(no_p_line)

    converted_total = [converter_to(x, 2) for x in total]

    matrixDonor0 = numpy.array(
        matrix_from_exa('../data extractors/new_donor1.exa'))

    c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt')
    print(c0.p, c1.p, c2.p)
    coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
    coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
    coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

    donor0_data = classify(matrixDonor0, 2)
    donor0_states = sequence_state_factory(donor0_data, 'donor0')

    post = State(DiscreteDistribution(equal_distribution), name='post')

    model = HiddenMarkovModel('coding to donor')

    model.add_state(coding_state0)
    model.add_state(coding_state1)
    model.add_state(coding_state2)

    add_sequence(model, donor0_states)

    model.add_state(post)

    model.add_transition(model.start, coding_state0, 1)

    model.add_transition(coding_state0, coding_state1, 0.6)
    model.add_transition(coding_state0, donor0_states[0], 0.4)

    model.add_transition(coding_state1, coding_state2, 0.6)
    model.add_transition(coding_state1, donor0_states[0], 0.4)

    model.add_transition(coding_state2, coding_state0, 0.6)
    model.add_transition(coding_state2, donor0_states[0], 0.4)

    model.add_transition(donor0_states[-1], post, 1)

    model.add_transition(post, post, 0.9)
    model.add_transition(post, model.end, 0.1)

    model.bake()
    test_model(model)

    model.fit(converted_total,
              transition_pseudocount=1,
              emission_pseudocount=1,
              verbose=True)

    test_model(model)

    with open('partial_model_coding_to_donor_model0.json', 'w') as out:
        out.write(model.to_json())
Ejemplo n.º 30
0
def crop_status_hmm_model(nn_pobability_matrix, timeseries_steps,
                          n_observed_classes):
    # 0            1       2          3          4          5
    ['emergence', 'growth', 'flowers', 'senescing', 'senesced', 'no_crop']

    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    d5 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=5,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_emerge = State(d0, name='emergence')
    s1_growth = State(d1, name='growth')
    s2_fls = State(d2, name='flowers')
    s3_sencing = State(d3, name='senescing')
    s4_senced = State(d4, name='senesced')
    s5_none = State(d5, name='no_crop')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(
        s0_emerge,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [90., 5., 0., 0., 0., 5.])
    model.add_transitions(
        s1_growth,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 90., 2.5, 2.5, 0., 5.])
    model.add_transitions(
        s2_fls, [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 90., 5., 0., 5.])
    model.add_transitions(
        s3_sencing,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 0., 90., 5., 5.])
    model.add_transitions(
        s4_senced,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [0., 0., 0., 0., 90., 10.])
    model.add_transitions(
        s5_none,
        [s0_emerge, s1_growth, s2_fls, s3_sencing, s4_senced, s5_none],
        [10., 0, 0., 0., 0., 90.])

    model.bake(verbose=False)

    return model
Ejemplo n.º 31
0
    def _segment(self, arr, components=2):

        nonzero = arr[arr > 0]
        idx = self.hampel_filter(np.log2(nonzero))
        filtered = nonzero[idx]

        log_gmm = self.get_states(np.log2(filtered))
        log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_
        ln_gmm = self.get_states(filtered) # to improve the sensitivity
        ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_
        if (len(log_means) == 1):
            means, probs = ln_means, ln_probs
            scale = 'linear'
        else:
            means, probs = log_means, log_probs
            scale = 'log'

        logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale))
        model = HiddenMarkovModel()
        # GMM emissions
        dists = []
        for m in means:
            tmp = []
            for i in range(components):
                e = m + (-1)**i * ((i+1)//2) * 0.5
                s = 0.5
                tmp.append(NormalDistribution(e, s))
            mixture = State(GeneralMixtureModel(tmp), name=str(m))
            dists.append(mixture)
        model.add_states(*tuple(dists))
        # transition matrix
        for i in range(len(means)):
            for j in range(len(means)):
                if i==j:
                    model.add_transition(dists[i], dists[j], 0.8)
                else:
                    model.add_transition(dists[i], dists[j], 0.2/(len(means)-1))
        
        # starts and ends
        for i in range(len(means)):
            model.add_transition(model.start, dists[i], probs[i])
        
        model.bake()

        # training sequences
        tmp = np.zeros(nonzero.size)
        tmp[idx] = filtered
        newarr = np.zeros(arr.size)
        newarr[arr > 0] = tmp

        if len(means) > 1:
            model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs,
                    max_iterations=5000, stop_threshold=2e-4)
            
            queue = newarr[newarr > 0]
            
            if scale=='log':
                seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]]
            else:
                seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]]
            seg = self.assign_cnv(queue, seq)
            
            predicted = np.zeros(newarr.size)
            predicted[newarr > 0] = seg
            seg = self.call_intervals(predicted)
        else:
            seg = [(0, newarr.size)]
        
        return newarr, seg, scale
Ejemplo n.º 32
0
def dominant_cover_hmm_model(nn_pobability_matrix, timeseries_steps,
                             n_observed_classes):
    d0 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=0,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d1 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=1,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d2 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=2,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d3 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=3,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)
    d4 = NeuralNetworkWrapperCustom(
        predicted_probabilities=nn_pobability_matrix,
        i=4,
        n_samples=timeseries_steps,
        n_classes=n_observed_classes)

    s0_veg = State(d0, name='vegetation')
    s1_residue = State(d1, name='residue')
    s2_soil = State(d2, name='soil')
    s3_snow = State(d3, name='snow')
    s4_water = State(d4, name='water')

    model = HiddenMarkovModel()

    # Initialize each hidden state.
    # All states have an equal chance of being the starting state.
    for s in [s0_veg, s1_residue, s2_soil, s3_snow, s4_water]:
        model.add_state(s)
        model.add_transition(model.start, s, 1)

    model.add_transitions(s0_veg,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [95., 1.0, 1.0, 1.0, 1.0])
    model.add_transitions(s1_residue,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 95., 1.0, 1.0, 1.0])
    model.add_transitions(s2_soil,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 95., 1.0, 1.0])
    model.add_transitions(s3_snow,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 95., 1.0])
    model.add_transitions(s4_water,
                          [s0_veg, s1_residue, s2_soil, s3_snow, s4_water],
                          [1.0, 1.0, 1.0, 1.0, 95.])

    model.bake(verbose=False)

    return model
Ejemplo n.º 33
0
def get_suffix_matcher_hmm(pattern):
    model = Model(name="Suffix Matcher HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })
    insert_states = []
    match_states = []
    delete_states = []
    hmm_name = 'suffix'
    for i in range(len(pattern) + 1):
        insert_states.append(
            State(insert_distribution, name='I%s_%s' % (i, hmm_name)))

    for i in range(len(pattern)):
        distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01})
        distribution_map[pattern[i]] = 0.97
        match_states.append(
            State(DiscreteDistribution(distribution_map),
                  name='M%s_%s' % (str(i + 1), hmm_name)))

    for i in range(len(pattern)):
        delete_states.append(
            State(None, name='D%s_%s' % (str(i + 1), hmm_name)))

    unit_start = State(None, name='suffix_start_%s' % hmm_name)
    unit_end = State(None, name='suffix_end_%s' % hmm_name)
    model.add_states(insert_states + match_states + delete_states +
                     [unit_start, unit_end])
    last = len(delete_states) - 1

    model.add_transition(model.start, unit_start, 1)

    model.add_transition(unit_end, model.end, 1)

    model.add_transition(unit_start, delete_states[0], 0.01)
    model.add_transition(unit_start, insert_states[0], 0.01)
    for i in range(len(pattern)):
        model.add_transition(unit_start, match_states[i], 0.98 / len(pattern))

    model.add_transition(insert_states[0], insert_states[0], 0.01)
    model.add_transition(insert_states[0], delete_states[0], 0.01)
    model.add_transition(insert_states[0], match_states[0], 0.98)

    model.add_transition(delete_states[last], unit_end, 0.99)
    model.add_transition(delete_states[last], insert_states[last + 1], 0.01)

    model.add_transition(match_states[last], unit_end, 0.99)
    model.add_transition(match_states[last], insert_states[last + 1], 0.01)

    model.add_transition(insert_states[last + 1], insert_states[last + 1],
                         0.01)
    model.add_transition(insert_states[last + 1], unit_end, 0.99)

    for i in range(0, len(pattern)):
        model.add_transition(match_states[i], insert_states[i + 1], 0.01)
        model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
        model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01)
        if i < len(pattern) - 1:
            model.add_transition(insert_states[i + 1], match_states[i + 1],
                                 0.98)
            model.add_transition(insert_states[i + 1], delete_states[i + 1],
                                 0.01)

            model.add_transition(match_states[i], match_states[i + 1], 0.98)
            model.add_transition(match_states[i], delete_states[i + 1], 0.01)

            model.add_transition(delete_states[i], delete_states[i + 1], 0.01)
            model.add_transition(delete_states[i], match_states[i + 1], 0.98)

    model.bake(merge=None)

    return model
Ejemplo n.º 34
0
def get_constant_number_of_repeats_matcher_hmm(patterns, copies):
    model = Model(name="Repeating Pattern Matcher HMM Model")

    transitions, emissions = build_profile_hmm_for_repeats(
        patterns, settings.MAX_ERROR_RATE)
    matches = [m for m in emissions.keys() if m.startswith('M')]

    last_end = None
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(matches) + 1):
            insert_distribution = DiscreteDistribution(emissions['I%s' % i])
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(1, len(matches) + 1):
            match_distribution = DiscreteDistribution(emissions['M%s' % i])
            match_states.append(
                State(match_distribution, name='M%s_%s' % (str(i), repeat)))

        for i in range(1, len(matches) + 1):
            delete_states.append(State(None, name='D%s_%s' % (str(i), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        n = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 1)
        else:
            model.add_transition(model.start, unit_start, 1)

        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 1)

        model.add_transition(unit_start, match_states[0],
                             transitions['unit_start']['M1'])
        model.add_transition(unit_start, delete_states[0],
                             transitions['unit_start']['D1'])
        model.add_transition(unit_start, insert_states[0],
                             transitions['unit_start']['I0'])

        model.add_transition(insert_states[0], insert_states[0],
                             transitions['I0']['I0'])
        model.add_transition(insert_states[0], delete_states[0],
                             transitions['I0']['D1'])
        model.add_transition(insert_states[0], match_states[0],
                             transitions['I0']['M1'])

        model.add_transition(delete_states[n], unit_end,
                             transitions['D%s' % (n + 1)]['unit_end'])
        model.add_transition(delete_states[n], insert_states[n + 1],
                             transitions['D%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(match_states[n], unit_end,
                             transitions['M%s' % (n + 1)]['unit_end'])
        model.add_transition(match_states[n], insert_states[n + 1],
                             transitions['M%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(insert_states[n + 1], insert_states[n + 1],
                             transitions['I%s' % (n + 1)]['I%s' % (n + 1)])
        model.add_transition(insert_states[n + 1], unit_end,
                             transitions['I%s' % (n + 1)]['unit_end'])

        for i in range(1, len(matches) + 1):
            model.add_transition(match_states[i - 1], insert_states[i],
                                 transitions['M%s' % i]['I%s' % i])
            model.add_transition(delete_states[i - 1], insert_states[i],
                                 transitions['D%s' % i]['I%s' % i])
            model.add_transition(insert_states[i], insert_states[i],
                                 transitions['I%s' % i]['I%s' % i])
            if i < len(matches):
                model.add_transition(insert_states[i], match_states[i],
                                     transitions['I%s' % i]['M%s' % (i + 1)])
                model.add_transition(insert_states[i], delete_states[i],
                                     transitions['I%s' % i]['D%s' % (i + 1)])

                model.add_transition(match_states[i - 1], match_states[i],
                                     transitions['M%s' % i]['M%s' % (i + 1)])
                model.add_transition(match_states[i - 1], delete_states[i],
                                     transitions['M%s' % i]['D%s' % (i + 1)])

                model.add_transition(delete_states[i - 1], match_states[i],
                                     transitions['D%s' % i]['M%s' % (i + 1)])
                model.add_transition(delete_states[i - 1], delete_states[i],
                                     transitions['D%s' % i]['D%s' % (i + 1)])

        last_end = unit_end

    model.bake(merge=None)
    return model
Ejemplo n.º 35
0
    'c': 0.45,
    'g': 0.05,
    't': 0.05
}),
                    name='fixed')

hmmodel.add_state(back_state)
hmmodel.add_state(fixed_state)

hmmodel.add_transition(hmmodel.start, back_state, 1)
hmmodel.add_transition(back_state, back_state, 0.9)
hmmodel.add_transition(back_state, fixed_state, 0.1)
hmmodel.add_transition(fixed_state, fixed_state, 0.9)
hmmodel.add_transition(fixed_state, back_state, 0.1)

hmmodel.bake()

seq = list('acgtacgtaaaaccccaaa')

lopg, path = hmmodel.viterbi(seq)

print([x[1].name for x in path])

print(hmmodel.to_json())

to_fit1 = list('acgtacacacacacacac')
to_fit2 = list('acgtacgtacgtacgtacgtacgtacgtcgt')
to_fit3 = list('aaaaacccccaaacc')
to_fit4 = list('aaaaaccgcccaaaccacgtacgtacgtacgtactacgggggg')

lopg, path = hmmodel.viterbi(to_fit4)
Ejemplo n.º 36
0
for tag in data.training_set.tagset:
    tag_emissions = DiscreteDistribution({
        word: emission_counts[tag][word] / tag_unigrams[tag]
        for word in emission_counts[tag]
    })
    tag_states[tag] = State(tag_emissions, name=tag)
    basic_model.add_state(tag_states[tag])

# Add edges between states for the observed transition frequencies P(tag_i | tag_i-1)
for tag in data.training_set.tagset:
    basic_model.add_transition(basic_model.start, tag_states[tag],
                               tag_starts[tag] / tag_unigrams[tag])
    for tag1 in data.training_set.tagset:
        basic_model.add_transition(
            tag_states[tag], tag_states[tag1],
            tag_bigrams[(tag, tag1)] / tag_unigrams[tag])
    basic_model.add_transition(tag_states[tag], basic_model.end,
                               tag_ends[tag] / tag_unigrams[tag])

# finalize the model
basic_model.bake()

# Evaluate the accuracy of HMM tagger on the training and test corp
hmm_training_acc = accuracy(data.training_set.X, data.training_set.Y,
                            basic_model)
print("training accuracy basic hmm model: {:.2f}%".format(100 *
                                                          hmm_training_acc))
hmm_testing_acc = accuracy(data.testing_set.X, data.testing_set.Y, basic_model)
print("testing accuracy basic hmm model: {:.2f}%".format(100 *
                                                         hmm_testing_acc))
Ejemplo n.º 37
0
def main():
    rospy.init_node('hmm_trainer')
    param_vec = []
    rospack = rospkg.RosPack()
    if (len(sys.argv) < 2):
        print("Missing the prefix argument.")
        exit()
    else:
        prefix = sys.argv[1]
    use_measurements = np.zeros(3)

    # patient = rospy.get_param('~patient', 'None')
    # if prefix == 'None':
    #     rospy.logerr("No filename given ,exiting")
    #     exit()

    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    packpath = rospack.get_path('exo_gait_phase_det')
    datapath = packpath + "/log/mat_files/"
    rospy.logwarn("Patient: {}".format(prefix))
    print("Patient: {}".format(prefix))
    verbose = rospy.get_param('~verbose', False)
    """Print console output into text file"""
    # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w")
    """Data loading"""
    n_trials = 3
    data = [[] for x in range(0, n_trials)]
    for i in range(0, n_trials):
        data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) +
                               ".mat")

    accel_x = [[] for x in range(0, n_trials)]
    accel_y = [[] for x in range(0, n_trials)]
    accel_z = [[] for x in range(0, n_trials)]
    gyro_x = [[] for x in range(0, n_trials)]
    gyro_y = [[] for x in range(0, n_trials)]
    gyro_z = [[] for x in range(0, n_trials)]
    time_array = [[] for x in range(0, n_trials)]
    labels = [[] for x in range(0, n_trials)]
    fs_fsr = []
    for i in range(0, n_trials):
        # accel_x[i] = data[i]["accel_x"][0]
        # accel_y[i] = data[i]["accel_y"][0]
        # accel_z[i] = data[i]["accel_z"][0]
        gyro_x[i] = data[i]["gyro_x"][0]
        gyro_y[i] = data[i]["gyro_y"][0]
        gyro_z[i] = data[i]["gyro_z"][0]
        time_array[i] = data[i]["time"][0]
        labels[i] = data[i]["labels"][0]
        fs_fsr.append(data[i]["Fs_fsr"][0][0])
    """Feature extraction"""
    """First derivative"""
    # fder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_x[i][0])
    #     for j in range(1,len(gyro_x[i])-1):
    #         der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2)
    #     der.append(gyro_x[i][-1])
    #     fder_gyro_x.append(der)

    fder_gyro_y = []
    for i in range(n_trials):
        der = []
        der.append(gyro_y[i][0])
        for j in range(1, len(gyro_y[i]) - 1):
            der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2)
        der.append(gyro_y[i][-1])
        fder_gyro_y.append(der)

    # fder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_z[i][0])
    #     for j in range(1,len(gyro_z[i])-1):
    #         der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2)
    #     der.append(gyro_z[i][-1])
    #     fder_gyro_z.append(der)
    """Second derivative"""
    # sder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_x[i][0])
    #     for j in range(1,len(fder_gyro_x[i])-1):
    #         der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2)
    #     der.append(fder_gyro_x[i][-1])
    #     sder_gyro_x.append(der)
    #
    # sder_gyro_y = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_y[i][0])
    #     for j in range(1,len(fder_gyro_y[i])-1):
    #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
    #     der.append(fder_gyro_y[i][-1])
    #     sder_gyro_y.append(der)
    #
    # sder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_z[i][0])
    #     for j in range(1,len(fder_gyro_z[i])-1):
    #         der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2)
    #     der.append(fder_gyro_z[i][-1])
    #     sder_gyro_z.append(der)
    """Peak detector"""
    # window_wid = 15        # Window width should be odd
    # search_ratio = window_wid/2
    # pdet_gyro_x = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_x[i])):
    #         if j <= search_ratio:
    #             win = gyro_x[i][:j+search_ratio+1]
    #         elif j >= len(gyro_x[i])-search_ratio-1:
    #             win = gyro_x[i][j-search_ratio:]
    #         else:
    #             win = gyro_x[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_x[i][j]/max(win))
    #     pdet_gyro_x.append(pdet)

    # print len(gyro_x)
    # print len(pdet_gyro_x)
    # for i in range(3):
    #     print len(gyro_x[i])
    #     print len(pdet_gyro_x[i])

    # pdet_gyro_y = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_y[i])):
    #         if j <= search_ratio:
    #             win = gyro_y[i][:j+search_ratio+1]
    #         elif j >= len(gyro_y[i])-search_ratio-1:
    #             win = gyro_y[i][j-search_ratio:]
    #         else:
    #             win = gyro_y[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_y[i][j]/max(win))
    #     pdet_gyro_y.append(pdet)
    #
    # pdet_gyro_z = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_z[i])):
    #         if j <= search_ratio:
    #             win = gyro_z[i][:j+search_ratio+1]
    #         elif j >= len(gyro_z[i])-search_ratio-1:
    #             win = gyro_z[i][j-search_ratio:]
    #         else:
    #             win = gyro_z[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_z[i][j]/max(win))
    #     pdet_gyro_z.append(pdet)
    """Create training and test data"""
    ff = [[] for x in range(0, n_trials)]
    for j in range(0, n_trials):
        for k in range(0, len(time_array[j])):
            f_ = []
            # f_.append(accel_x[j][k])
            # f_.append(accel_y[j][k])
            # f_.append(accel_z[j][k])
            # f_.append(gyro_x[j][k])
            # f_.append(fder_gyro_x[j][k])
            # f_.append(sder_gyro_x[j][k])
            # f_.append(pdet_gyro_x[j][k])
            f_.append(gyro_y[j][k])
            f_.append(fder_gyro_y[j][k])
            # f_.append(sder_gyro_y[j][k])
            # f_.append(pdet_gyro_y[j][k])
            # f_.append(gyro_z[j][k])
            # f_.append(fder_gyro_z[j][k])
            # f_.append(sder_gyro_z[j][k])
            # f_.append(pdet_gyro_z[j][k])
            ff[j].append(f_)
    n_signals = len(ff[0][0])
    """cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    rospy.logwarn("""Intra-subject training""")
    print("""Intra-subject training""")
    # for leave_one_out in range(0, n_trials):
    for leave_one_out in range(1, 2):
        rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1))
        print("-------TRIAL {}-------".format(leave_one_out + 1))
        """Transition matrix"""
        t = np.zeros((4, 4))  # Transition matrix
        prev = -1
        for i in range(0, len(labels[leave_one_out])):
            # data[i]._replace(label = correct_mapping[data[i].label])
            if prev == -1:
                prev = labels[leave_one_out][i]
            t[prev][labels[leave_one_out][i]] += 1.0
            prev = labels[leave_one_out][i]
        t = normalize(t, axis=1, norm='l1')
        if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

        n_classes = 4
        class_data = [[] for x in range(n_classes)]
        full_data = []
        full_labels = []
        for i in range(len(ff[leave_one_out])):
            full_data.append(ff[leave_one_out][i])
            full_labels.append(labels[leave_one_out][i])
        # print full_data == ff[leave_one_out]
        # print full_labels == labels[leave_one_out]
        # print len(full_data) == len(full_labels)
        # for i in range(0,len(ff[leave_one_out-1])):
        #     full_data.append(ff[leave_one_out-1][i])
        #     full_labels.append(labels[leave_one_out-1][i])
        # for i in range(0,len(ff[(leave_one_out+1) % n_trials])):
        #     full_data.append(ff[(leave_one_out+1) % n_trials][i])
        #     full_labels.append(labels[(leave_one_out+1) % n_trials][i])

        # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials]))
        # print full_data
        # print len(full_data)
        # print full_labels
        # print len(full_labels)

        for i in range(0, len(full_data)):
            class_data[full_labels[i]].append(full_data[i])
        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(n_signals)]
                       for i in range(n_classes)]
        class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_cov = []
        classifiers = []

        for i in range(0, n_classes):
            # cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            cov = np.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(0, n_signals):
                class_means[i][j] = np.array(
                    class_data[i][:])[:, [j]].mean(axis=0)
                class_vars[i][j] = np.array(class_data[i][:])[:,
                                                              [j]].var(axis=0)
                class_std[i][j] = np.array(class_data[i][:])[:,
                                                             [j]].std(axis=0)
        print "\n" + str(class_cov) + "\n"
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(n_classes):
            dis = MGD\
                (np.array(class_means[i]).flatten(),
                 np.array(class_cov[i]))
            st = State(dis, name=state_names[i])
            distros.append(dis)
            hmm_states.append(st)
        model = HMM(name="Gait")

        model.add_states(hmm_states)
        """Initial transitions"""
        for i in range(0, n_classes):
            model.add_transition(model.start, hmm_states[i], startprob[i])
        """Left-right model"""
        for i in range(0, n_classes):
            for j in range(0, n_classes):
                model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

        model.bake()

        # print (model.name)
        # rospy.logwarn("N. observations: " + str(model.d))
        # print (model.edges)
        # rospy.logwarn("N. hidden states: " + str(model.silent_start))
        # print model
        """Training"""
        # limit = int(len(ff1)*(8/10.0))    # 80% of data to test, 20% to train
        # x_train = list([ff1[:limit]])
        # x_train = list([ff1,ff2])
        # x_train = list([ff2])
        x_train = []
        for i in range(0, len(ff[leave_one_out - 1])):
            x_train.append(ff[leave_one_out - 1][i])
        for i in range(0, len(ff[(leave_one_out + 1) % n_trials])):
            x_train.append(ff[(leave_one_out + 1) % n_trials][i])
        x_train = list([x_train])
        rospy.logwarn("Training...")
        model.fit(x_train, algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(seq, algorithm='viterbi', verbose='True')
        """Find most-likely sequence"""
        # logp, path = model.viterbi(ff[limit:])
        logp, path = model.viterbi(ff[leave_one_out])
        # print logp
        # print path
        class_labels = []
        for i in range(len(labels[leave_one_out])):
            path_phase = path[i][1].name
            for state in range(n_classes):
                if path_phase == state_names[state]:
                    class_labels.append(state)
        labels[leave_one_out] = list(labels[leave_one_out][1:])
        # Saving classifier labels into csv file
        # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s')
        # rospy.logwarn("csv file with classifier labels was saved.")

        sum = 0.0
        true_pos = 0.0
        false_pos = 0.0
        true_neg = 0.0
        false_neg = 0.0
        tol = 6e-2  # Tolerance window of 60 ms
        tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out])))
        print "FSR freq: " + str(fs_fsr[leave_one_out])
        print "Tolerance win: " + str(tol_window)
        # print tol_window
        # # print type(tol_window)
        # for i in range(0, len(labels[leave_one_out])):
        #     """Tolerance window"""
        #     if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window:
        #         # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
        #         # print curr_tol
        #         win = []
        #         for j in range(i-tol_window,i+tol_window+1):
        #             win.append(state_names[labels[leave_one_out][j]])
        #         if path[i][1].name in win:
        #             sum += 1.0
        #     else:
        #         if path[i][1].name == labels[leave_one_out][i]:
        #             sum += 1.0
        """Performance Evaluation"""
        rospy.logwarn("Calculating results...")
        time_error = [[] for x in range(n_classes)]
        for phase in range(n_classes):
            for i in range(len(labels[leave_one_out])):
                """Tolerance window"""
                if i >= tol_window and i < len(
                        labels[leave_one_out]) - tol_window:
                    # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
                    # print curr_tol
                    win = []
                    for j in range(i - tol_window, i + tol_window + 1):
                        win.append(labels[leave_one_out][j])
                    """Calculate time error with true positives"""
                    if class_labels[i] == phase:
                        if class_labels[i] in win:
                            for k in range(len(win)):
                                if win[k] == phase:
                                    time_error[phase].append(
                                        (k - tol_window) /
                                        fs_fsr[leave_one_out])
                                    break
                            true_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_pos"
                        else:
                            false_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_pos"
                    else:
                        if phase != labels[leave_one_out][i]:
                            # if phase not in win:
                            true_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_neg"
                        else:
                            false_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_neg"
                else:
                    if class_labels[i] == phase:
                        if class_labels[i] == labels[leave_one_out][i]:
                            true_pos += 1.0
                        else:
                            false_pos += 1.0
                    else:
                        if phase != labels[leave_one_out][i]:
                            true_neg += 1.0
                        else:
                            false_neg += 1.0

        rospy.logwarn("Timing error")
        print("Timing error")
        for phase in range(n_classes):
            rospy.logwarn("(" + state_names[phase] + ")")
            print "(" + state_names[phase] + ")"
            if len(time_error[phase]) > 0:
                rospy.logwarn(
                    str(np.mean(time_error[phase])) + " + " +
                    str(np.std(time_error[phase])))
                print str(np.mean(time_error[phase])) + " + " + str(
                    np.std(time_error[phase]))
            else:
                rospy.logwarn("0.06 + 0")
                print "0.06 + 0"
        """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
        rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
        print("Mean time (MT) and Coefficient of Variance (CoV)")
        n_group = 0
        for label_group in [class_labels, labels[leave_one_out]]:
            if n_group == 0:
                rospy.logwarn("Results for HMM:")
                print("Results for HMM:")
            else:
                rospy.logwarn("Results for FSR:")
                print("Results for FSR:")
            curr_label = -1
            count = 0
            n_phases = 0
            stride_samples = 0
            phases_time = [[] for x in range(n_classes)]
            stride_time = []
            for label in label_group:
                # for label in class_labels:
                if curr_label != label:
                    n_phases += 1
                    stride_samples += count
                    if label == 0:  # Gait start: HS
                        if n_phases == 4:  # If a whole gait cycle has past
                            stride_time.append(stride_samples /
                                               fs_fsr[leave_one_out])
                        n_phases = 0
                        stride_samples = 0
                    phases_time[label - 1].append(count /
                                                  fs_fsr[leave_one_out])
                    curr_label = label
                    count = 1
                else:
                    count += 1.0
            for phase in range(n_classes):
                mean_time = np.mean(phases_time[phase])
                phase_std = np.std(phases_time[phase])
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                              str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
                print("CoV: " + str(phase_std / mean_time * 100.0))
            mean_time = np.mean(stride_time)
            phase_std = np.std(stride_time)
            rospy.logwarn("(Stride)")
            print "(Stride)"
            rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                          str(phase_std))
            print "Mean time: " + str(mean_time) + " + " + str(phase_std)
            rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
            print("CoV: " + str(phase_std / mean_time * 100.0))
            n_group += 1
        """Accuracy"""
        # acc = sum/len(labels[leave_one_out])
        if (true_neg + true_pos + false_neg + false_pos) != 0.0:
            acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg +
                                           false_pos)
        else:
            acc = 0.0
        """Sensitivity or True Positive Rate"""
        if true_pos + false_neg != 0:
            tpr = true_pos / (true_pos + false_neg)
        else:
            tpr = 0.0
        """Specificity or True Negative Rate"""
        if false_pos + true_neg != 0:
            tnr = true_neg / (false_pos + true_neg)
        else:
            tnr = 0.0
        # rospy.logwarn("Accuracy: {}%".format(acc*100))
        rospy.logwarn("Accuracy: {}%".format(acc * 100.0))
        # print("Accuracy: {}%".format(acc*100.0))
        rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0))
        # print("Sensitivity: {}%".format(tpr*100.0))
        rospy.logwarn("Specificity: {}%".format(tnr * 100.0))
        # print("Specificity: {}%".format(tnr*100.0))
        """Goodness index"""
        G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2)
        if G <= 0.25:
            rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
            # print("Optimum classifier (G = {} <= 0.25)".format(G))
        elif G > 0.25 and G <= 0.7:
            rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            # print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
        elif G == 0.7:
            rospy.logwarn("Random classifier (G = 0.7)")
            # print("Random classifier (G = 0.7)")
        else:
            rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))