Ejemplo n.º 1
0
def init_cycle_hmm(sequences, steps, states_per_step, model_id):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(n_states)
    trans = trans / trans.sum()
    for i, state in enumerate(states.flatten().tolist()):
        model.add_transition(model.start, state, trans[i])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> step0
    for j in range(states_per_step):
        trans = np.random.ranf(states_per_step + 1)
        trans = trans / trans.sum()
        # self transition
        model.add_transition(states[(steps - 1), j], states[(steps - 1), j],
                             trans[0])
        # out transition
        for x in range(states_per_step):
            model.add_transition(states[(steps - 1), j], states[0, x],
                                 trans[x + 1])
    model.bake()
    print 'Initialized Cyclic State HMM:', '[', \
        steps, states_per_step, ']'
    return model
Ejemplo n.º 2
0
    def oriHMMParams(self):
        """
        Set initial parameters for the Hidden Markov Model (HMM).
        
        Attributes
        ----------
        HMMParams : dict
            Has 3 keys: "A", state transition matrix, "B" (emission probabilities),
            specifying parameters (Means, Variances, Weights) of the mixture
            Gaussian distributions for each hidden state, and "pi", indicating
            the hidden state weights. This dict will be updated after learning
            procedure.
        """
        hmm = HiddenMarkovModel()
        # GMM emissions
        # 4 Hidden States:
        # 0--start, 1--downstream, 2--upstream, 3--end
        numdists = 3  # Three-distribution Gaussian Mixtures
        var = 7.5 / (numdists - 1)
        means = [[], [], [], []]
        for i in range(numdists):
            means[3].append(i * 7.5 / (numdists - 1) + 2.5)
            means[2].append(i * 7.5 / (numdists - 1))
            means[1].append(-i * 7.5 / (numdists - 1))
            means[0].append(-i * 7.5 / (numdists - 1) - 2.5)
        states = []
        for i, m in enumerate(means):
            tmp = []
            for j in m:
                tmp.append(NormalDistribution(j, var))
            mixture = GeneralMixtureModel(tmp)
            states.append(State(mixture, name=str(i)))
        hmm.add_states(*tuple(states))

        # Transmission matrix
        #A = [[0., 1., 0., 0.],
        #    [0., 0.5, 0.5, 0.],
        #    [0., 0., 0.5, 0.5],
        #    [1., 0., 0., 0.]]
        hmm.add_transition(states[0], states[1], 1)
        hmm.add_transition(states[1], states[1], 0.5)
        hmm.add_transition(states[1], states[2], 0.5)
        hmm.add_transition(states[2], states[2], 0.5)
        hmm.add_transition(states[2], states[3], 0.5)
        hmm.add_transition(states[3], states[0], 1)

        #pi = [0.2, 0.3, 0.3, 0.2]
        hmm.add_transition(hmm.start, states[0], 1)
        hmm.add_transition(states[3], hmm.end, 1)

        hmm.bake()

        return hmm
Ejemplo n.º 3
0
def gaussian_hmm(n_states, lower, upper, variance, model_id):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    np.random.seed(int(time.time()))

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = np.linspace(lower, upper, n_states)
    states = []
    for i in range(n_states):
        dist = \
            NormalDistribution(x[i], variance)
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add uniform start probabilities
    start_prob = 1.0 / n_states
    start_probs = []
    for i in range(n_states):
        start_probs.append(start_prob + np.random.ranf())
    start_probs = np.array(start_probs)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add transition probabilities proportional to probability of generating
    # one state mean from another
    for state1 in states:
        transitions = []
        for other_state in states:
            transitions.append(np.exp(state1.distribution.log_probability(
                other_state.distribution.parameters[0])) + np.random.ranf())
        transitions = np.array(transitions)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Ejemplo n.º 4
0
def init_model(start_dip, stay_state, mean_eu, sd_eu, mean_loh):

    ## define distributions
    d_eu = NormalDistribution(mean_eu, sd_eu)  ## euploid enriched at 0
    d_loh = NormalDistribution(mean_loh,
                               sd_eu)  ## loss of heterozygosity enriched at 1
    d_aneu = NormalDistribution(mean_loh / 2.0,
                                sd_eu * 1.4)  ## aneuploid enriched at 1

    ## define states
    s_eu = State(d_eu, name='EU')  ## enriched at 0
    s_loh = State(d_loh, name='LOH')  ## enriched at 1
    s_aneu = State(d_aneu, name='ANEU')  ## enriched at 1

    ## define model and pass in states
    model = HiddenMarkovModel()
    model.add_states(s_eu, s_loh, s_aneu)

    ## define transition matrix (state a, state b, probability)
    model.add_transition(model.start, s_eu, start_dip)
    model.add_transition(model.start, s_loh, 1.0 - start_dip - 0.1)
    model.add_transition(model.start, s_aneu, 0.1)

    model.add_transition(s_eu, s_eu, stay_state)
    model.add_transition(s_eu, s_loh, 1.0 - 4 * stay_state / 5 - 0.001)
    model.add_transition(s_eu, s_aneu, 1.0 - stay_state / 5 - 0.001)
    model.add_transition(s_eu, model.end, 0.002)

    model.add_transition(s_loh, s_loh, stay_state)
    model.add_transition(s_loh, s_eu, 1.0 - 4 * stay_state / 5 - 0.001)
    model.add_transition(s_loh, s_aneu, 1.0 - stay_state / 5 - 0.001)
    model.add_transition(s_loh, model.end, 0.002)

    model.add_transition(s_aneu, s_aneu, stay_state)
    model.add_transition(s_aneu, s_eu, 1.0 - stay_state / 2 - 0.001)
    model.add_transition(s_aneu, s_loh, 1.0 - stay_state / 2 - 0.001)
    model.add_transition(s_aneu, model.end, 0.002)

    ## finalize internal structure
    model.bake()
    ## only train transitions, not emissions
    model.freeze_distributions()

    return model
Ejemplo n.º 5
0
def ghmm_model(states_labels: tuple,
               transitions: tuple,
               init_prob: tuple,
               end_prob: tuple,
               means: list,
               vars: list) -> HiddenMarkovModel:
    """

    :param states_labels:
    :param transitions:
    :param init_prob:
    :param end_prob:
    :param means:
    :param vars:
    :return:
    """
    hmm_model = HiddenMarkovModel()

    mix_num = len(vars[0])
    states = []
    for state_i, state in enumerate(states_labels):
        mixture = []
        for mix_i in range(mix_num):
            init_mean = means[state_i][mix_i]
            init_var = vars[state_i][mix_i]
            mixture.append(NormalDistribution(init_mean, init_var))
        states.append(State(GeneralMixtureModel(mixture), name=str(state_i)))
    hmm_model.add_states(*tuple(states))

    for row in range(len(states_labels)):
        for col in range(len(states_labels)):
            prob = transitions[row][col]
            if prob != 0.:
                hmm_model.add_transition(states[row], states[col], prob)
    for state_i, prob in enumerate(init_prob):
        if prob != 0.:
            hmm_model.add_transition(hmm_model.start, states[state_i], prob)
    for state_i, prob in enumerate(end_prob):
        if prob != 0.:
            hmm_model.add_transition(states[state_i], hmm_model.end, prob)

    hmm_model.bake()

    return hmm_model
Ejemplo n.º 6
0
def build_the_same_model_in_test_sample_from_site_line_by_line():

    # State olds emission distribution, but not
    #transition distribution, because that's stored in the graph edges.
    s1 = State(NormalDistribution(5, 1))
    s2 = State(NormalDistribution(1, 7))
    s3 = State(NormalDistribution(8, 2))
    model = HiddenMarkovModel()
    model.add_states(s1, s2, s3)
    model.add_transition(model.start, s1, 1.0)
    model.add_transition(s1, s1, 0.7)
    model.add_transition(s1, s2, 0.3)
    model.add_transition(s2, s2, 0.8)
    model.add_transition(s2, s3, 0.2)
    model.add_transition(s3, s3, 0.9)
    model.add_transition(s3, model.end, 0.1)
    model.bake()

    model.plot()
Ejemplo n.º 7
0
def buildHmm(minAmpliconLength, maxGap, windowSize):
    b_bkgd_1 = 0.1
    a_interstate = b_bkgd_1**(2 * minAmpliconLength / windowSize)
    b_amp_0 = (a_interstate)**(0.5 * windowSize / maxGap)
    b_amp_1 = 1 - b_amp_0
    b_bkgd_0 = 1 - b_bkgd_1
    bkgdDist = DiscreteDistribution({0: b_bkgd_0, 1: b_bkgd_1})
    ampDist = DiscreteDistribution({0: b_amp_0, 1: b_amp_1})
    s_bkgd = State(bkgdDist, name='background')
    s_amp = State(ampDist, name='amplicon')
    hmm = HiddenMarkovModel()
    hmm.add_states(s_bkgd, s_amp)
    hmm.add_transition(hmm.start, s_bkgd, 1 - a_interstate)
    hmm.add_transition(hmm.start, s_amp, a_interstate)
    hmm.add_transition(s_bkgd, s_bkgd, 1 - a_interstate)
    hmm.add_transition(s_bkgd, s_amp, a_interstate)
    hmm.add_transition(s_amp, s_bkgd, a_interstate)
    hmm.add_transition(s_amp, s_amp, 1 - a_interstate)
    hmm.bake()
    return hmm

# create the HMM model
model = HiddenMarkovModel(name="Example Model")

# emission probability distributions, P(umbrella | weather)
sunny_emissions = DiscreteDistribution({"yes": 0.1, "no": 0.9})
sunny_state = State(sunny_emissions, name="Sunny")

# TODO: create a discrete distribution for the rainy emissions from the probability table
# above & use that distribution to create a state named Rainy
rainy_emissions = DiscreteDistribution({"yes": 0.8, "no": 0.2})
rainy_state = State(rainy_emissions, name="Rainy")

# add the states to the model
model.add_states(sunny_state, rainy_state)

assert rainy_emissions.probability("yes") == 0.8, "The director brings his umbrella with probability 0.8 on rainy days"
print("Looks good so farget_ipython().getoutput("")")


# create edges for each possible state transition in the model
# equal probability of a sequence starting on either a rainy or sunny day
model.add_transition(model.start, sunny_state, 0.5)
model.add_transition(model.start, rainy_state, 0.5)

# add sunny day transitions (we already know estimates of these probabilities
# from the problem statement)
model.add_transition(sunny_state, sunny_state, 0.8)  # 80% sunny->sunny
model.add_transition(sunny_state, rainy_state, 0.2)  # 20% sunny->rainy
Ejemplo n.º 9
0
starting_tag_count=starting_counts(starting_tag_list)            #the number of times a tag occured at the start
ending_tag_count=ending_counts(ending_tag_list)                  #the number of times a tag occured at the end



to_pass_states = []
for tag, words_dict in tag_words_count.items():
    total = float(sum(words_dict.values()))
    distribution = {word: count/total for word, count in words_dict.items()}
    tag_emissions = DiscreteDistribution(distribution)
    tag_state = State(tag_emissions, name=tag)
    to_pass_states.append(tag_state)


basic_model.add_states()    
    

start_prob={}

for tag in tags:
    start_prob[tag]=starting_tag_count[tag]/tags_count[tag]

for tag_state in to_pass_states :
    basic_model.add_transition(basic_model.start,tag_state,start_prob[tag_state.name])    

end_prob={}

for tag in tags:
    end_prob[tag]=ending_tag_count[tag]/tags_count[tag]
for tag_state in to_pass_states :
Ejemplo n.º 10
0
skf = StratifiedKFold(full_labels, n_folds=folds)

for train_index, test_index in skf:

    model = HMM(name="Gait")
    hmm_states = []

    for i in range(0, 2):
        # dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i]))
        dis = MGD.from_samples(class_data[i])
        st = State(dis, name=state_names[i])
        distros.append(dis)
        hmm_states.append(st)

    model.add_states(hmm_states)
    model.add_transition(model.start, hmm_states[0], 0.5)
    model.add_transition(model.start, hmm_states[1], 0.5)

    for i in range(0, 2):
        for j in range(0, 2):
            model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

    model.bake()
    rospy.logwarn("Baked model")
    print("TRAIN:", train_index, "TEST:", test_index)
    train_data = full_data[train_index]
    # print(len(train_data))
    train_class = full_labels[train_index]
    # print(len(train_class))
    test_data = full_data[test_index]
Ejemplo n.º 11
0
def main():
    rospy.init_node('hmm_trainer')
    param_vec = []
    rospack = rospkg.RosPack()
    if (len(sys.argv) < 2):
        print("Missing the prefix argument.")
        exit()
    else:
        prefix = sys.argv[1]
    use_measurements = np.zeros(3)

    # patient = rospy.get_param('~patient', 'None')
    # if prefix == 'None':
    #     rospy.logerr("No filename given ,exiting")
    #     exit()

    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    packpath = rospack.get_path('exo_gait_phase_det')
    datapath = packpath + "/log/mat_files/"
    rospy.logwarn("Patient: {}".format(prefix))
    print("Patient: {}".format(prefix))
    verbose = rospy.get_param('~verbose', False)
    """Print console output into text file"""
    # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w")
    """Data loading"""
    n_trials = 3
    data = [[] for x in range(0, n_trials)]
    for i in range(0, n_trials):
        data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) +
                               ".mat")

    accel_x = [[] for x in range(0, n_trials)]
    accel_y = [[] for x in range(0, n_trials)]
    accel_z = [[] for x in range(0, n_trials)]
    gyro_x = [[] for x in range(0, n_trials)]
    gyro_y = [[] for x in range(0, n_trials)]
    gyro_z = [[] for x in range(0, n_trials)]
    time_array = [[] for x in range(0, n_trials)]
    labels = [[] for x in range(0, n_trials)]
    fs_fsr = []
    for i in range(0, n_trials):
        # accel_x[i] = data[i]["accel_x"][0]
        # accel_y[i] = data[i]["accel_y"][0]
        # accel_z[i] = data[i]["accel_z"][0]
        gyro_x[i] = data[i]["gyro_x"][0]
        gyro_y[i] = data[i]["gyro_y"][0]
        gyro_z[i] = data[i]["gyro_z"][0]
        time_array[i] = data[i]["time"][0]
        labels[i] = data[i]["labels"][0]
        fs_fsr.append(data[i]["Fs_fsr"][0][0])
    """Feature extraction"""
    """First derivative"""
    # fder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_x[i][0])
    #     for j in range(1,len(gyro_x[i])-1):
    #         der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2)
    #     der.append(gyro_x[i][-1])
    #     fder_gyro_x.append(der)

    fder_gyro_y = []
    for i in range(n_trials):
        der = []
        der.append(gyro_y[i][0])
        for j in range(1, len(gyro_y[i]) - 1):
            der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2)
        der.append(gyro_y[i][-1])
        fder_gyro_y.append(der)

    # fder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_z[i][0])
    #     for j in range(1,len(gyro_z[i])-1):
    #         der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2)
    #     der.append(gyro_z[i][-1])
    #     fder_gyro_z.append(der)
    """Second derivative"""
    # sder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_x[i][0])
    #     for j in range(1,len(fder_gyro_x[i])-1):
    #         der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2)
    #     der.append(fder_gyro_x[i][-1])
    #     sder_gyro_x.append(der)
    #
    # sder_gyro_y = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_y[i][0])
    #     for j in range(1,len(fder_gyro_y[i])-1):
    #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
    #     der.append(fder_gyro_y[i][-1])
    #     sder_gyro_y.append(der)
    #
    # sder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_z[i][0])
    #     for j in range(1,len(fder_gyro_z[i])-1):
    #         der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2)
    #     der.append(fder_gyro_z[i][-1])
    #     sder_gyro_z.append(der)
    """Peak detector"""
    # window_wid = 15        # Window width should be odd
    # search_ratio = window_wid/2
    # pdet_gyro_x = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_x[i])):
    #         if j <= search_ratio:
    #             win = gyro_x[i][:j+search_ratio+1]
    #         elif j >= len(gyro_x[i])-search_ratio-1:
    #             win = gyro_x[i][j-search_ratio:]
    #         else:
    #             win = gyro_x[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_x[i][j]/max(win))
    #     pdet_gyro_x.append(pdet)

    # print len(gyro_x)
    # print len(pdet_gyro_x)
    # for i in range(3):
    #     print len(gyro_x[i])
    #     print len(pdet_gyro_x[i])

    # pdet_gyro_y = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_y[i])):
    #         if j <= search_ratio:
    #             win = gyro_y[i][:j+search_ratio+1]
    #         elif j >= len(gyro_y[i])-search_ratio-1:
    #             win = gyro_y[i][j-search_ratio:]
    #         else:
    #             win = gyro_y[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_y[i][j]/max(win))
    #     pdet_gyro_y.append(pdet)
    #
    # pdet_gyro_z = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_z[i])):
    #         if j <= search_ratio:
    #             win = gyro_z[i][:j+search_ratio+1]
    #         elif j >= len(gyro_z[i])-search_ratio-1:
    #             win = gyro_z[i][j-search_ratio:]
    #         else:
    #             win = gyro_z[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_z[i][j]/max(win))
    #     pdet_gyro_z.append(pdet)
    """Create training and test data"""
    ff = [[] for x in range(0, n_trials)]
    for j in range(0, n_trials):
        for k in range(0, len(time_array[j])):
            f_ = []
            # f_.append(accel_x[j][k])
            # f_.append(accel_y[j][k])
            # f_.append(accel_z[j][k])
            # f_.append(gyro_x[j][k])
            # f_.append(fder_gyro_x[j][k])
            # f_.append(sder_gyro_x[j][k])
            # f_.append(pdet_gyro_x[j][k])
            f_.append(gyro_y[j][k])
            f_.append(fder_gyro_y[j][k])
            # f_.append(sder_gyro_y[j][k])
            # f_.append(pdet_gyro_y[j][k])
            # f_.append(gyro_z[j][k])
            # f_.append(fder_gyro_z[j][k])
            # f_.append(sder_gyro_z[j][k])
            # f_.append(pdet_gyro_z[j][k])
            ff[j].append(f_)
    n_signals = len(ff[0][0])
    """cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    rospy.logwarn("""Intra-subject training""")
    print("""Intra-subject training""")
    # for leave_one_out in range(0, n_trials):
    for leave_one_out in range(1, 2):
        rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1))
        print("-------TRIAL {}-------".format(leave_one_out + 1))
        """Transition matrix"""
        t = np.zeros((4, 4))  # Transition matrix
        prev = -1
        for i in range(0, len(labels[leave_one_out])):
            # data[i]._replace(label = correct_mapping[data[i].label])
            if prev == -1:
                prev = labels[leave_one_out][i]
            t[prev][labels[leave_one_out][i]] += 1.0
            prev = labels[leave_one_out][i]
        t = normalize(t, axis=1, norm='l1')
        if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

        n_classes = 4
        class_data = [[] for x in range(n_classes)]
        full_data = []
        full_labels = []
        for i in range(len(ff[leave_one_out])):
            full_data.append(ff[leave_one_out][i])
            full_labels.append(labels[leave_one_out][i])
        # print full_data == ff[leave_one_out]
        # print full_labels == labels[leave_one_out]
        # print len(full_data) == len(full_labels)
        # for i in range(0,len(ff[leave_one_out-1])):
        #     full_data.append(ff[leave_one_out-1][i])
        #     full_labels.append(labels[leave_one_out-1][i])
        # for i in range(0,len(ff[(leave_one_out+1) % n_trials])):
        #     full_data.append(ff[(leave_one_out+1) % n_trials][i])
        #     full_labels.append(labels[(leave_one_out+1) % n_trials][i])

        # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials]))
        # print full_data
        # print len(full_data)
        # print full_labels
        # print len(full_labels)

        for i in range(0, len(full_data)):
            class_data[full_labels[i]].append(full_data[i])
        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(n_signals)]
                       for i in range(n_classes)]
        class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_cov = []
        classifiers = []

        for i in range(0, n_classes):
            # cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            cov = np.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(0, n_signals):
                class_means[i][j] = np.array(
                    class_data[i][:])[:, [j]].mean(axis=0)
                class_vars[i][j] = np.array(class_data[i][:])[:,
                                                              [j]].var(axis=0)
                class_std[i][j] = np.array(class_data[i][:])[:,
                                                             [j]].std(axis=0)
        print "\n" + str(class_cov) + "\n"
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(n_classes):
            dis = MGD\
                (np.array(class_means[i]).flatten(),
                 np.array(class_cov[i]))
            st = State(dis, name=state_names[i])
            distros.append(dis)
            hmm_states.append(st)
        model = HMM(name="Gait")

        model.add_states(hmm_states)
        """Initial transitions"""
        for i in range(0, n_classes):
            model.add_transition(model.start, hmm_states[i], startprob[i])
        """Left-right model"""
        for i in range(0, n_classes):
            for j in range(0, n_classes):
                model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

        model.bake()

        # print (model.name)
        # rospy.logwarn("N. observations: " + str(model.d))
        # print (model.edges)
        # rospy.logwarn("N. hidden states: " + str(model.silent_start))
        # print model
        """Training"""
        # limit = int(len(ff1)*(8/10.0))    # 80% of data to test, 20% to train
        # x_train = list([ff1[:limit]])
        # x_train = list([ff1,ff2])
        # x_train = list([ff2])
        x_train = []
        for i in range(0, len(ff[leave_one_out - 1])):
            x_train.append(ff[leave_one_out - 1][i])
        for i in range(0, len(ff[(leave_one_out + 1) % n_trials])):
            x_train.append(ff[(leave_one_out + 1) % n_trials][i])
        x_train = list([x_train])
        rospy.logwarn("Training...")
        model.fit(x_train, algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(seq, algorithm='viterbi', verbose='True')
        """Find most-likely sequence"""
        # logp, path = model.viterbi(ff[limit:])
        logp, path = model.viterbi(ff[leave_one_out])
        # print logp
        # print path
        class_labels = []
        for i in range(len(labels[leave_one_out])):
            path_phase = path[i][1].name
            for state in range(n_classes):
                if path_phase == state_names[state]:
                    class_labels.append(state)
        labels[leave_one_out] = list(labels[leave_one_out][1:])
        # Saving classifier labels into csv file
        # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s')
        # rospy.logwarn("csv file with classifier labels was saved.")

        sum = 0.0
        true_pos = 0.0
        false_pos = 0.0
        true_neg = 0.0
        false_neg = 0.0
        tol = 6e-2  # Tolerance window of 60 ms
        tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out])))
        print "FSR freq: " + str(fs_fsr[leave_one_out])
        print "Tolerance win: " + str(tol_window)
        # print tol_window
        # # print type(tol_window)
        # for i in range(0, len(labels[leave_one_out])):
        #     """Tolerance window"""
        #     if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window:
        #         # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
        #         # print curr_tol
        #         win = []
        #         for j in range(i-tol_window,i+tol_window+1):
        #             win.append(state_names[labels[leave_one_out][j]])
        #         if path[i][1].name in win:
        #             sum += 1.0
        #     else:
        #         if path[i][1].name == labels[leave_one_out][i]:
        #             sum += 1.0
        """Performance Evaluation"""
        rospy.logwarn("Calculating results...")
        time_error = [[] for x in range(n_classes)]
        for phase in range(n_classes):
            for i in range(len(labels[leave_one_out])):
                """Tolerance window"""
                if i >= tol_window and i < len(
                        labels[leave_one_out]) - tol_window:
                    # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
                    # print curr_tol
                    win = []
                    for j in range(i - tol_window, i + tol_window + 1):
                        win.append(labels[leave_one_out][j])
                    """Calculate time error with true positives"""
                    if class_labels[i] == phase:
                        if class_labels[i] in win:
                            for k in range(len(win)):
                                if win[k] == phase:
                                    time_error[phase].append(
                                        (k - tol_window) /
                                        fs_fsr[leave_one_out])
                                    break
                            true_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_pos"
                        else:
                            false_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_pos"
                    else:
                        if phase != labels[leave_one_out][i]:
                            # if phase not in win:
                            true_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_neg"
                        else:
                            false_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_neg"
                else:
                    if class_labels[i] == phase:
                        if class_labels[i] == labels[leave_one_out][i]:
                            true_pos += 1.0
                        else:
                            false_pos += 1.0
                    else:
                        if phase != labels[leave_one_out][i]:
                            true_neg += 1.0
                        else:
                            false_neg += 1.0

        rospy.logwarn("Timing error")
        print("Timing error")
        for phase in range(n_classes):
            rospy.logwarn("(" + state_names[phase] + ")")
            print "(" + state_names[phase] + ")"
            if len(time_error[phase]) > 0:
                rospy.logwarn(
                    str(np.mean(time_error[phase])) + " + " +
                    str(np.std(time_error[phase])))
                print str(np.mean(time_error[phase])) + " + " + str(
                    np.std(time_error[phase]))
            else:
                rospy.logwarn("0.06 + 0")
                print "0.06 + 0"
        """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
        rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
        print("Mean time (MT) and Coefficient of Variance (CoV)")
        n_group = 0
        for label_group in [class_labels, labels[leave_one_out]]:
            if n_group == 0:
                rospy.logwarn("Results for HMM:")
                print("Results for HMM:")
            else:
                rospy.logwarn("Results for FSR:")
                print("Results for FSR:")
            curr_label = -1
            count = 0
            n_phases = 0
            stride_samples = 0
            phases_time = [[] for x in range(n_classes)]
            stride_time = []
            for label in label_group:
                # for label in class_labels:
                if curr_label != label:
                    n_phases += 1
                    stride_samples += count
                    if label == 0:  # Gait start: HS
                        if n_phases == 4:  # If a whole gait cycle has past
                            stride_time.append(stride_samples /
                                               fs_fsr[leave_one_out])
                        n_phases = 0
                        stride_samples = 0
                    phases_time[label - 1].append(count /
                                                  fs_fsr[leave_one_out])
                    curr_label = label
                    count = 1
                else:
                    count += 1.0
            for phase in range(n_classes):
                mean_time = np.mean(phases_time[phase])
                phase_std = np.std(phases_time[phase])
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                              str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
                print("CoV: " + str(phase_std / mean_time * 100.0))
            mean_time = np.mean(stride_time)
            phase_std = np.std(stride_time)
            rospy.logwarn("(Stride)")
            print "(Stride)"
            rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                          str(phase_std))
            print "Mean time: " + str(mean_time) + " + " + str(phase_std)
            rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
            print("CoV: " + str(phase_std / mean_time * 100.0))
            n_group += 1
        """Accuracy"""
        # acc = sum/len(labels[leave_one_out])
        if (true_neg + true_pos + false_neg + false_pos) != 0.0:
            acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg +
                                           false_pos)
        else:
            acc = 0.0
        """Sensitivity or True Positive Rate"""
        if true_pos + false_neg != 0:
            tpr = true_pos / (true_pos + false_neg)
        else:
            tpr = 0.0
        """Specificity or True Negative Rate"""
        if false_pos + true_neg != 0:
            tnr = true_neg / (false_pos + true_neg)
        else:
            tnr = 0.0
        # rospy.logwarn("Accuracy: {}%".format(acc*100))
        rospy.logwarn("Accuracy: {}%".format(acc * 100.0))
        # print("Accuracy: {}%".format(acc*100.0))
        rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0))
        # print("Sensitivity: {}%".format(tpr*100.0))
        rospy.logwarn("Specificity: {}%".format(tnr * 100.0))
        # print("Specificity: {}%".format(tnr*100.0))
        """Goodness index"""
        G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2)
        if G <= 0.25:
            rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
            # print("Optimum classifier (G = {} <= 0.25)".format(G))
        elif G > 0.25 and G <= 0.7:
            rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            # print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
        elif G == 0.7:
            rospy.logwarn("Random classifier (G = 0.7)")
            # print("Random classifier (G = 0.7)")
        else:
            rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
Ejemplo n.º 12
0
    def _segment(self, arr, components=2):

        nonzero = arr[arr > 0]
        idx = self.hampel_filter(np.log2(nonzero))
        filtered = nonzero[idx]

        log_gmm = self.get_states(np.log2(filtered))
        log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_
        ln_gmm = self.get_states(filtered) # to improve the sensitivity
        ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_
        if (len(log_means) == 1):
            means, probs = ln_means, ln_probs
            scale = 'linear'
        else:
            means, probs = log_means, log_probs
            scale = 'log'

        logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale))
        model = HiddenMarkovModel()
        # GMM emissions
        dists = []
        for m in means:
            tmp = []
            for i in range(components):
                e = m + (-1)**i * ((i+1)//2) * 0.5
                s = 0.5
                tmp.append(NormalDistribution(e, s))
            mixture = State(GeneralMixtureModel(tmp), name=str(m))
            dists.append(mixture)
        model.add_states(*tuple(dists))
        # transition matrix
        for i in range(len(means)):
            for j in range(len(means)):
                if i==j:
                    model.add_transition(dists[i], dists[j], 0.8)
                else:
                    model.add_transition(dists[i], dists[j], 0.2/(len(means)-1))
        
        # starts and ends
        for i in range(len(means)):
            model.add_transition(model.start, dists[i], probs[i])
        
        model.bake()

        # training sequences
        tmp = np.zeros(nonzero.size)
        tmp[idx] = filtered
        newarr = np.zeros(arr.size)
        newarr[arr > 0] = tmp

        if len(means) > 1:
            model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs,
                    max_iterations=5000, stop_threshold=2e-4)
            
            queue = newarr[newarr > 0]
            
            if scale=='log':
                seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]]
            else:
                seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]]
            seg = self.assign_cnv(queue, seq)
            
            predicted = np.zeros(newarr.size)
            predicted[newarr > 0] = seg
            seg = self.call_intervals(predicted)
        else:
            seg = [(0, newarr.size)]
        
        return newarr, seg, scale
class RealTimeHMM():
    def __init__(self, n_trials=3, leave_one_out=1):
        """Variable initialization"""
        self.patient = rospy.get_param("gait_phase_det/patient")
        self.verbose = rospy.get_param("gait_phase_det/verbose")
        self.n_trials = n_trials
        self.n_features = 2      # Raw data and 1st-derivative
        self.leave_one_out = leave_one_out
        self.rec_data = 0.0       # Number of recorded IMU data
        self.proc_data = 0.0      # Number of extracted features
        self.win_size = 3
        self.raw_win = [None] * self.win_size
        # self.fder_win = [0] * self.win_size
        self.ff = [[] for x in range(self.n_trials)]      # Training and test dataset
        self.labels = [[] for x in range(self.n_trials)]  # Reference labels from local data
        self.first_eval = True
        self.model_loaded = False
        algorithm = rospy.get_param("gait_phase_det/algorithm")
        rospy.loginfo('Decoding algorithm: {}'.format(algorithm))
        if algorithm not in DECODER_ALGORITHMS:
            raise ValueError("Unknown decoder {!r}".format(algorithm))
        self.decode = {
            "fov": self._run_fov,
            "bvsw": self._run_bvsw
        }[algorithm]
        self.imu_callback = {
            "fov": self._fov_callback,
            "bvsw": self._bvsw_callback
        }[algorithm]
        """HMM variables"""
        ''' State list:
            s1: Heel Strike (HS)
            s2: Flat Foot   (FF)
            s3: Heel Off    (HO)
            s4: Swing Phase (SP)'''
        self.model_name = "Gait"
        self.has_model = False
        self.must_train = False
        self.states = ['s1', 's2', 's3', 's4']
        self.n_states = len(self.states)
        self.state2phase = {"s1": "hs", "s2": "ff", "s3": "ho", "s4": "sp"}
        self.train_data = []
        self.mgds = {}
        self.dis_means = [[] for x in range(self.n_states)]
        self.dis_covars = [[] for x in range(self.n_states)]
        self.start_prob = [1.0/self.n_states]*self.n_states
        self.trans_mat = np.array([(0.9, 0.1, 0, 0), (0, 0.9, 0.1, 0), (0, 0, 0.9, 0.1), (0.1, 0, 0, 0.9)])    # Left-right model
        # self.trans_mat = np.array([0.8, 0.1, 0, 0.1], [0.1, 0.8, 0.1, 0], [0, 0.1, 0.8, 0.1], [0.1, 0, 0.1, 0.8])    # Left-right-left model
        self.log_startprob = []
        self.log_transmat = np.empty((self.n_states, self.n_states))
        self.max_win_len = 11       # ms (120 ms: mean IC duration for healthy subjects walking at comfortable speed)
        self.viterbi_path = np.empty((self.max_win_len+1, self.n_states))
        self.backtrack = [[None for x in range(self.n_states)] for y in range(self.max_win_len+1)]
        self.global_path = []
        self.work_buffer = np.empty(self.n_states)
        self.boundary = 1
        self.buff_len = 0
        self.states_pos = {}
        for i in range(len(self.states)):
            self.states_pos[self.states[i]] = i
        self.last_state = -1
        self.curr_state = -1
        self.conv_point = 0
        self.conv_found = False
        self.smp_freq = 100.0   # Hz
        self.fp_thresh = 1/self.smp_freq*4    # Threshold corresponds to 8 samples
        self.time_passed = 0.0
        self.obs = [[None for x in range(self.n_features)] for y in range(self.max_win_len)]
        self.model = HMM(name=self.model_name)
        """ROS init"""
        rospy.init_node('real_time_HMM', anonymous=True)
        rospack = rospkg.RosPack()
        self.packpath = rospack.get_path('hmm_gait_phase_classifier')
        self.init_subs()
        self.init_pubs()
        """HMM-training (if no model exists)"""
        try:
            '''HMM-model loading'''
            with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt') as infile:
                json_model = json.load(infile)
                self.model = HMM.from_json(json_model)
                rospy.logwarn(self.patient + "'s HMM model was loaded.")
                self.has_model = True
        except IOError:
            if os.path.isfile(self.packpath + "/log/mat_files/" + self.patient + "_proc_data1.mat"):
                """Training with data collected with FSR-based reference system"""
                self.data_ext = 'mat'
                self.must_train = True
            elif os.path.isfile(self.packpath + "/log/IMU_data/" + self.patient + "_labels.csv"):
                """Training with data collected with offline threshold-based gait phase detection method"""
                self.data_ext = 'csv'
                self.must_train = True
            else:
                rospy.logerr("Please collect data for training ({})!".format(self.patient))
        if self.must_train:
            rospy.logwarn("HMM model not trained yet for {}!".format(self.patient))
            rospy.logwarn("Training HMM with local data...")
            self.load_data()
            self.init_hmm()
            self.train_hmm()
            self.has_model = True
        if self.has_model:
            try:
                '''MGDs loading if model exists'''
                for st in self.states:
                    with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st]+'.txt') as infile:
                        yaml_dis = yaml.safe_load(infile)
                        dis = MGD.from_yaml(yaml_dis)
                        self.mgds[st] = dis
                        rospy.logwarn(self.patient +"'s " + self.state2phase[st] + " MGC was loaded.")
                        '''Loading means and covariance matrix'''
                        self.dis_means[self.states_pos[st]] = self.mgds[st].parameters[0]
                        self.dis_covars[self.states_pos[st]] = self.mgds[st].parameters[1]
            except yaml.YAMLError as exc:
                rospy.logwarn("Not able to load distributions: " + exc)
            """Transition and initial (log) probabilities matrices upon training"""
            trans_mat = self.model.dense_transition_matrix()[:self.n_states,:self.n_states]
            if self.verbose: print '**TRANSITION MATRIX (post-training)**\n'+ str(trans_mat)
            for i in range(self.n_states):
                self.log_startprob.append(ln(self.start_prob[i]))
                for j in range(self.n_states):
                    self.log_transmat[i,j] = ln(trans_mat[i][j])
            self.model_loaded = True

    """Init ROS publishers"""
    def init_pubs(self):
        self.phase_pub = rospy.Publisher('/gait_phase', Int8, queue_size=100)

    """Init ROS subcribers"""
    def init_subs(self):
        rospy.Subscriber('/imu_data', Imu, self.imu_callback)

    """Callback function upon arrival of IMU data for forward-only decoding"""
    def _fov_callback(self, data):
        self.rec_data += 1.0
        self.raw_win.append(data.angular_velocity.y)
        self.raw_win.pop(0)       # Drop first element

        if self.rec_data >= self.win_size and self.model_loaded:      # At least one previous and one subsequent data should have been received
            """Extract feature and append it to test dataset"""
            fder = (self.raw_win[self.win_size/2 + 1] - self.raw_win[self.win_size/2 - 1])/2
            # peak_detector = self.raw_win[self.win_size/2]/max(self.raw_win)
            # self.fder_win.append(fder)
            # self.fder_win.pop(0)
            # sder = (self.fder_win[2] - self.fder_win[0])/2
            # test_set = [self.raw_win[self.win_size/2], self.raw_win[self.win_size/2 - 2], self.raw_win[self.win_size/2 - 1], self.raw_win[self.win_size/2 + 1], self.raw_win[self.win_size/2 + 2]]         # Temporally proximal features
            test_set = [self.raw_win[self.win_size/2], fder]         # Temporally proximal features
            '''Forward-only decoding approach'''
            state = self.decode(test_set)
            # rospy.loginfo("Decoded phase: {}".format(state))
            self.time_passed += 1/self.smp_freq
            if self.last_state == state:
                if (self.time_passed >= self.fp_thresh) and (self.curr_state == 3 and state == 0) or (state == self.curr_state + 1):
                    self.curr_state = state
                    self.phase_pub.publish(state)
                else:
                    # rospy.loginfo("Current phase: {}".format(self.state2phase[self.states[self.curr_state]]))
                    # self.phase_pub.publish((self.curr_state-1)%4)
                    self.phase_pub.publish(self.curr_state)
            else:
                self.last_state = state
                self.time_passed = 1/self.smp_freq
                # rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[self.last_state]]))

    """Callback function upon arrival of IMU data for BVSW"""
    def _bvsw_callback(self, data):
        self.rec_data += 1.0
        self.raw_win.append(data.angular_velocity.y)
        self.raw_win.pop(0)       # Drop first element

        if self.rec_data >= 3 and self.model_loaded:      # At least one previous and one subsequent data should have been received
            """Extract feature and append it to test dataset"""
            test_set = [self.raw_win[1], (self.raw_win[0]+self.raw_win[2])/2]    # First-derivate of angular velocity
            '''Bounded sliding window decoding approach'''
            self.obs.append(test_set)
            self.obs.pop(0)                     # This way, -1 element corresponds to last received features
            states = self.decode(test_set)
            if len(states) != 0:
                for st in states:
                    self.phase_pub.publish(st)
                    if self.curr_state != st:
                        rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[st]]))
                        self.curr_state = st
                self.proc_data += 1.0     # One gyro data has been processed

    """Local data loading from mat file and feature extraction"""
    def load_data(self):
        """Data loading"""
        '''Load mat file (Data processed offline in Matlab)'''
        if self.data_ext == 'mat':
            rospy.logwarn("Initializing parameters via FSR-based reference system")
            # subs = ['daniel', 'erika', 'felipe', 'jonathan', 'luis', 'nathalia', 'paula', 'pedro', 'tatiana']      # Healthy subjects
            # subs = ['carmen', 'carolina', 'catalina', 'claudia', 'emmanuel', 'fabian', 'gustavo']      # Pathological subjects
            # subs = ['daniel']      # Single subject
            datapath = self.packpath + "/log/mat_files/"
            gyro_y = [[] for x in range(self.n_trials)]
            time_array = [[] for x in range(self.n_trials)]
            # for patient in subs:
            for i in range(self.n_trials):
                data = scio.loadmat(datapath + self.patient + "_proc_data" + str(i+1) + ".mat")
                # data = scio.loadmat(datapath + patient + "_proc_data" + str(i+1) + ".mat")
                gyro_y[i] = data["gyro_y"][0]
                # gyro_y[i] += list(data["gyro_y"][0])
                time_array[i] = data["time"][0]
                # time_array[i] += list(data["time"][0])
                self.labels[i] = data["labels"][0]
                # self.labels[i] += list(data["labels"][0])

            """Feature extraction"""
            '''First derivative'''
            fder_gyro_y = []
            for i in range(self.n_trials):
                der = []
                der.append(gyro_y[i][0])
                for j in range(1,len(gyro_y[i])-1):
                    der.append((gyro_y[i][j+1]-gyro_y[i][j-1])/2)
                der.append(gyro_y[i][-1])
                fder_gyro_y.append(der)

            '''Second derivative'''
            # sder_gyro_y = []
            # for i in range(self.n_trials):
            #     der = []
            #     der.append(fder_gyro_y[i][0])
            #     for j in range(1,len(fder_gyro_y[i])-1):
            #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
            #     der.append(fder_gyro_y[i][-1])
            #     sder_gyro_y.append(der)

            '''Peak detector'''
            # peak_detector = []
            # for i in range(self.n_trials):
            #     win = []
            #     for j in range(len(gyro_y[i])):
            #         if (j - self.win_size/2) < 0:
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][0:j + self.win_size/2]))
            #         elif (j + self.win_size/2) > (len(gyro_y[i])-1):
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:len(gyro_y[i])-1]))
            #         else:
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:j + self.win_size/2]))
            #     peak_detector.append(win)

            """Create training data"""
            for j in range(self.n_trials):
                # for k in range(self.win_size/2,len(gyro_y[j])-1-self.win_size/2):
                for k in range(len(gyro_y[j])):
                    f_ = []
                    f_.append(gyro_y[j][k])
                    '''Approximate differentials'''
                    f_.append(fder_gyro_y[j][k])
                    # f_.append(sder_gyro_y[j][k])
                    '''Temporally proximal feature'''
                    # f_.append(gyro_y[j][k-1])
                    # f_.append(gyro_y[j][k-2])
                    # f_.append(gyro_y[j][k+1])
                    # f_.append(gyro_y[j][k+2])
                    '''Peak detector'''
                    # f_.append(peak_detector[j][k])
                    self.ff[j].append(f_)
            self.ff = np.array(self.ff)
            self.n_features = len(self.ff[0][0])

            for i in range(len(self.ff[self.leave_one_out-1])):
                self.train_data.append(self.ff[self.leave_one_out-1][i])
            for i in range(len(self.ff[(self.leave_one_out+1) % self.n_trials])):
                self.train_data.append(self.ff[(self.leave_one_out+1) % self.n_trials][i])
            self.train_data = [self.train_data]       # Keep this line or training won't work

        """Parameter initialization"""
        '''Kmeans clustering'''
        # n_components = 4       # No. components of MGD
        # init = 'kmeans++'      # "kmeans||", "first-k", "random"
        # n_init = 1
        # weights = None
        # max_kmeans_iterations = 1
        # batch_size = None
        # batches_per_epoch = None
        #
        # X_concat = x_train
        #
        # # X_concat = numpy.concatenate(x_train)
        # # if X_concat.ndim == 1:
        # #     X_concat = X_concat.reshape(X_concat.shape[0], 1)
        # # n, d = X_concat.shape
        #
        # rospy.logwarn("K-means clustering...")
        # clf = Kmeans(n_components, init=init, n_init=n_init)
        # clf.fit(X_concat, weights, max_iterations=max_kmeans_iterations,
        #     batch_size=batch_size, batches_per_epoch=batches_per_epoch)
        # y = clf.predict(X_concat)
        #
        # rospy.logwarn("Creating distributions...")
        # class_data = [[] for x in range(self.n_states)]
        # for i in range(len(x_train)):
        #     class_data[y[i]].append(x_train[i])
        # if self.verbose:
        #     sum = 0
        #     print "Kmeans clusters:"
        #     for i in range(self.n_states):
        #         temp = len(class_data[i])
        #         sum += temp
        #         print temp
        #     print sum, len(x_train)

        """Offline threshold-based classification"""
        if self.data_ext == 'csv':
            rospy.logwarn("Initializing parameters via threshold-based gait phase detection method")
            self.leave_one_out = 0
            gyro_y = []
            datapath = self.packpath + '/log/IMU_data/'
            with open(datapath+'{}.csv'.format(self.patient), 'r') as imu_file:
                reader = csv.DictReader(imu_file)
                for row in reader:
                    gyro_y.append(float(dict(row)['gyro_y']))
            '''Feature extraction: 1st derivative'''
            for i in range(1, len(gyro_y)-1):
                feature_vect = [gyro_y[i], (gyro_y[i+1]-gyro_y[i-1])/2]
                self.train_data.append(feature_vect)
                self.ff[self.leave_one_out].append(feature_vect)
            '''Reference labels'''
            with open(datapath+'{}_labels.csv'.format(self.patient), 'r') as labels_file:
                reader = csv.reader(labels_file)
                for row in reader:
                    self.labels[self.leave_one_out].append(int(row[0]))
            self.train_data = [self.train_data]

    """Init HMM if no previous training"""
    def init_hmm(self):
        if self.data_ext == 'mat':
            rospy.logwarn("-------Leaving trial {} out-------".format(self.leave_one_out+1))

        """Transition matrix (A)"""
        '''Transition matrix from reference labels'''
        # prev = -1
        # for i in range(len(self.labels[self.leave_one_out])):
        #     if prev == -1:
        #         prev = self.labels[self.leave_one_out][i]
        #     self.trans_mat[prev][self.labels[self.leave_one_out][i]] += 1.0
        #     prev = self.labels[self.leave_one_out][i]
        # self.trans_mat = normalize(self.trans_mat, axis=1, norm='l1')
        if self.verbose: rospy.logwarn("**TRANSITION MATRIX (pre-training)**\n" + str(self.trans_mat) + '\nMatrix type: {}'.format(type(self.trans_mat)))

        class_data = [[] for x in range(self.n_states)]
        for i in range(len(self.ff[self.leave_one_out])):
            class_data[self.labels[self.leave_one_out][i]].append(self.ff[self.leave_one_out][i])

        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        # class_vars = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        # class_std = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        class_cov = []
        for i in range(self.n_states):
            cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(self.n_features):
                class_means[i][j] = np.array(class_data[i][:])[:, [j]].mean(axis=0)
                # class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0)
                # class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0)
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(self.n_states):
            dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i]))
            st = State(dis, name=self.states[i])
            distros.append(dis)
            hmm_states.append(st)
        self.model.add_states(hmm_states)
        '''Initial transitions'''
        for i in range(self.n_states):
            self.model.add_transition(self.model.start, hmm_states[i], self.start_prob[i])
        '''Left-right model'''
        for i in range(self.n_states):
            for j in range(self.n_states):
                self.model.add_transition(hmm_states[i], hmm_states[j], self.trans_mat[i][j])
        '''Finish model setup'''
        self.model.bake()

    """Train initialized model"""
    def train_hmm(self):
        rospy.logwarn("Training initialized model...")
        self.model.fit(self.train_data, algorithm='baum-welch', verbose=self.verbose)
        self.model.freeze_distributions()     # Freeze all model distributions, preventing update from ocurring
        if self.verbose: print "**HMM model:\n{}**".format(self.model)

        """Save Multivariate Gaussian Distributions into yaml file"""
        for st in self.model.states:
            if st.name != self.model_name+"-start" and st.name != self.model_name+"-end":
                dis = st.distribution
                dis_yaml = dis.to_yaml()
                try:
                    with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st.name]+'.txt', 'w') as outfile:
                        yaml.dump(dis_yaml, outfile, default_flow_style=False)
                    rospy.logwarn(self.patient+"'s "+self.state2phase[st.name]+" distribution was saved.")
                except IOError:
                    rospy.logwarn('It was not possible to write GMM distribution.')
        """Save model (json script) into txt file"""
        model_json = self.model.to_json()
        try:
            with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt', 'w') as outfile:
                json.dump(model_json, outfile)
            rospy.logwarn(self.patient+"'s HMM model was saved.")
        except IOError:
            rospy.logwarn('It was not possible to write HMM model.')

    """Compute the log probability under a multivariate Gaussian distribution.
    Parameters
    ----------
        X : array_like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row corresponds to a
            single data point.
        means : array_like, shape (n_components, n_features)
            List of n_features-dimensional mean vectors for n_components Gaussians.
            Each row corresponds to a single mean vector.
        covars : array_like
            List of n_components covariance parameters for each Gaussian. The shape
            is (n_components, n_features, n_features) if 'full'
        Returns
    ----------
        lpr : array_like, shape (n_samples, n_components)
            Array containing the log probabilities of each data point in
            X under each of the n_components multivariate Gaussian distributions."""
    def log_multivariate_normal_density(self, X, min_covar=1.e-7):
        """Log probability for full covariance matrices."""
        n_samples, n_dim = X.shape
        nmix = len(self.dis_means)
        log_prob = np.empty((n_samples, nmix))
        for c, (mu, cv) in enumerate(zip(self.dis_means, self.dis_covars)):
            try:
                cv_chol = linalg.cholesky(cv, lower=True)
            except linalg.LinAlgError:
                # The model is most probably stuck in a component with too
                # few observations, we need to reinitialize this components
                try:
                    cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
                                              lower=True)
                except linalg.LinAlgError:
                    raise ValueError("'covars' must be symmetric, "
                                     "positive-definite")

            cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
            cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
            log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) +
                                     n_dim * np.log(2 * np.pi) + cv_log_det)
        return log_prob

    """Find argument (pos) that has the maximum value in array-like object"""
    def _argmax(self, X):
        X_max = float("-inf")
        pos = 0
        for i in range(X.shape[0]):
            if X[i] > X_max:
                X_max = X[i]
                pos = i
        return pos

    """Find max value in array-like object"""
    def _max(self, X):
        return X[self._argmax(X)]

    """Backtracking process to decode most-likely state sequence"""
    def _optim_backtrack(self, k):
        opt = []
        self.last_state = where_from = self._argmax(self.viterbi_path[k])
        opt.append(where_from)
        for lp in range(k-1, -1, -1):
            opt.insert(0, self.backtrack[lp + 1][where_from])
            where_from = self.backtrack[lp + 1][where_from]
        self.global_path.extend(opt)
        return opt

    '''Forward-only decoding approach'''
    def _run_fov(self, test_set):
        # Probability distribution of state given observation
        framelogprob = self.log_multivariate_normal_density(np.array([test_set]))
        if self.first_eval:
            for i in range(self.n_states):
                self.viterbi_path[0, i] = self.log_startprob[i] + framelogprob[0, i]
            self.first_eval = False
            return self._argmax(self.viterbi_path[0])
        else:       # Recursion
            for i in range(self.n_states):
                for j in range(self.n_states):
                    self.work_buffer[j] = (self.log_transmat[j, i] + self.viterbi_path[0, j])
                self.viterbi_path[1, i] = self._max(self.work_buffer) + framelogprob[0, i]
            self.viterbi_path[0] = self.viterbi_path[1]       # Prepare for next feature vector
            return self._argmax(self.viterbi_path[1])

    '''Bounded sliding variable window approach'''
    def _run_bvsw(self, test_set):
        framelogprob = self.log_multivariate_normal_density(np.array([test_set]))
        if self.first_eval:
            for i in range(self.n_states):
                self.viterbi_path[0,i] = self.log_startprob[i] + framelogprob[0,i]
                self.backtrack[0][i] = None
            self.first_eval = False
            return []
        else:
            '''Find likelihood probability and backpointer'''
            for j in range(self.n_states):
                for i in range(self.n_states):
                    self.work_buffer[i] = self.viterbi_path[self.boundary - 1][i] + self.log_transmat[i, j]
                self.viterbi_path[self.boundary][j] = self._max(self.work_buffer) + framelogprob[0][j]
                self.backtrack[self.boundary][j] = self._argmax(self.work_buffer)
            '''Backtracking local paths'''
            local_paths = [[] for x in range(self.n_states)]
            for j in range(self.n_states):
                where_from = j
                for smp in range(self.boundary-1, -1, -1):
                    local_paths[j].insert(0, self.backtrack[smp+1][where_from])
                    where_from = self.backtrack[smp+1][where_from]
            # if self.verbose:
            #     print "\n{}, {}".format(t, b)
            #     for path in local_paths:
            #         print path
            '''Given all local paths, find fusion point'''
            tmp = [None] * self.n_states
            for k in range(len(local_paths[0])-1, 0, -1):
                for st in range(self.n_states):
                    tmp[st] = local_paths[st][k]
                if tmp.count(tmp[0]) == len(tmp):      # All local paths point to only one state?
                    self.conv_found = True
                    self.conv_point = k
                    # if self.verbose: print "Found, {}".format(k)
                    break
            '''Find local path if fusion point was found'''
            if self.boundary < self.max_win_len and self.conv_found:
                self.buff_len += self.conv_point
                opt = self._optim_backtrack(self.conv_point)
                self.conv_found = False
                # if self.verbose: print "\nOpt1: " + str(opt) + ", {}".format(len(self.global_path))
                '''Reinitialize local variables'''
                for i in range(self.n_states):
                    if i == self.last_state:
                        self.log_startprob[i] = ln(1.0)
                    else:
                        self.log_startprob[i] = ln(0.0)
                    self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i]
                    self.backtrack[0][i] = None
                for smp in range(1, self.boundary-self.conv_point+1):
                    for j in range(self.n_states):
                        for i in range(self.n_states):
                            self.work_buffer[i] = self.viterbi_path[smp - 1][i] + self.log_transmat[i, j]
                        self.viterbi_path[smp][j] = self._max(self.work_buffer) + self.log_multivariate_normal_density(np.array([self.obs[self.conv_point-self.boundary+smp-1]]))[0,j]
                        self.backtrack[smp][j] = self._argmax(self.work_buffer)
                self.boundary -= self.conv_point-1
                return opt
            elif self.boundary >= self.max_win_len:
                '''Bounding threshold was exceeded'''
                self.buff_len += self.max_win_len
                opt = self._optim_backtrack(self.boundary-1)
                # if self.verbose: print "\nOpt2: " + str(opt) + ", {}".format(len(self.global_path))
                '''Reinitialize local variables'''
                self.boundary = 1
                for i in range(self.n_states):
                    if i == self.last_state:
                        self.log_startprob[i] = ln(1.0)
                    else:
                        self.log_startprob[i] = ln(0.0)
                    self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i]
                    self.backtrack[0][i] = None
                return opt
            else:
                self.boundary += 1
                return []
Ejemplo n.º 14
0
    return ending_tags

tag_ends = ending_counts(data.training_set.Y)

basic_model = HiddenMarkovModel(name="base-hmm-tagger")
tag_word_counter = pair_counts(tags, words)
states = {}

for tag, words in tag_word_counter.items():
    words_tag_state = defaultdict(list)
    for word in words.keys():
        words_tag_state[word] = tag_word_counter[tag][word]/ tag_unigrams[tag]
    emission = DiscreteDistribution(dict(words_tag_state))
    states[tag] = State(emission, name = tag)

basic_model.add_states(list(states.values()))

for tag in data.training_set.tagset:
    state = states[tag]
    basic_model.add_transition(basic_model.start, state, tag_starts[tag]/len(data.training_set))

for tag in data.training_set.tagset:
    state= states[tag]
    basic_model.add_transition(state, basic_model.end, tag_ends[tag]/tag_unigrams[tag])

for tag1 in data.training_set.tagset:
    state1 = states[tag1]
    for tag2 in data.training_set.tagset:
        state2 = states[tag2]
        basic_model.add_transition(state1, state2, tag_bigrams[(tag1, tag2)]/tag_unigrams[tag1])
Ejemplo n.º 15
0
        for i in range(0, len(fl)):
            if fl[i] == 1:
                positive_data.append(fd[i])
            else:
                negative_data.append(fd[i])

        posdis = MGD.from_samples(positive_data)
        st = State(posdis, name='swing')
        distros.append(st)
        hmm_states.append(st)
        negdis = MGD.from_samples(negative_data)
        st2 = State(negdis, name='stance')
        distros.append(st2)
        hmm_states.append(st2)

        cl.add_states(hmm_states)
        cl.add_transition(cl.start, hmm_states[0], 0.5)
        cl.add_transition(cl.start, hmm_states[1], 0.5)

        for i in range(0, 2):
            for j in range(0, 2):
                cl.add_transition(hmm_states[i], hmm_states[j], t[i][j])
        cl.bake()

        f += 1
        train_data = fd[train_index]
        train_class = fl[train_index]
        test_data = fd[test_index]
        test_class = fl[test_index]
        seq = []
        if batch_training == 1:
Ejemplo n.º 16
0
ccc = MultivariateGaussianDistribution.from_samples(X_333)

s1 = State(a, name="M1")
s11 = State(aa, name="M11")
s111 = State(aaa, name="M111")

s2 = State(b, name="M2")
s22 = State(bb, name="M22")
s222 = State(bbb, name="M222")

s3 = State(c, name="M3")
s33 = State(cc, name="M33")
s333 = State(ccc, name="M333")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s11, s111, s2, s22, s222, s3, s33, s333)

hmm.add_transition(hmm.start, s1, 0.12)
hmm.add_transition(hmm.start, s11, 0.11)
hmm.add_transition(hmm.start, s111, 0.11)
hmm.add_transition(hmm.start, s2, 0.11)
hmm.add_transition(hmm.start, s22, 0.11)
hmm.add_transition(hmm.start, s222, 0.11)
hmm.add_transition(hmm.start, s3, 0.11)
hmm.add_transition(hmm.start, s33, 0.11)
hmm.add_transition(hmm.start, s333, 0.11)


hmm.add_transition(s1, s1, 0.92)
hmm.add_transition(s1, s11, 0.01)
hmm.add_transition(s1, s111, 0.01)
Ejemplo n.º 17
0
def init_gaussian_hmm(sequences, n_states, model_id, seed=None):
    """
    insantiate a model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    """
    """
    # make random transition probability matrix
    # scale each row to sum to 1
    trans = np.random.ranf((n_states, n_states))
    for i in range(n_states):
        trans[i, :] = trans[i, :] / trans[i, :].sum()

    # make distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    # x = math.min(3, x)

    dists = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dists.append(NormalDistribution.from_samples
                     (sequences[:, temp_assignment]))

    # random start probabilities
    # scale to sum to 1
    starts = np.random.ranf(n_states)
    starts = starts / sum(starts)

    model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id)
    """
    # seed random numer generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)

    # make states with distrobutions from random subsets of timepoints
    x = int(math.ceil(sequences.shape[1] / float(n_states)))
    states = []
    for i in range(n_states):
        temp_assignment = np.random.choice(sequences.shape[1], x)
        dist = \
            NormalDistribution.from_samples(sequences[:, temp_assignment])
        states.append(State(dist, name=str(i)))

    model.add_states(states)

    # add random start probabilities
    start_probs = np.random.ranf(n_states)
    start_probs = start_probs / start_probs.sum()
    for i, state in enumerate(states):
        model.add_transition(model.start, state, start_probs[i])

    # add random transition probabilites out of each state
    for state1 in states:
        transitions = np.random.ranf(n_states)
        transitions = transitions / transitions.sum()
        for i, state2 in enumerate(states):
            model.add_transition(state1, state2, transitions[i])

    model.bake()
    print 'Initialized HMM: ', model.name
    return model
Ejemplo n.º 18
0
def init_lr_hmm(sequences, steps, states_per_step,
                force_end=False, model_id='Left-Righ HMM', seed=None):
    """
    insantiate a left-right model with random parameters
    randomly generates start and transition matrices
    generates nomal distrobutions for each state from partition on sequences
    force_end if we require sequence to end in end state
    """

    # seed random number generator
    if seed is not None:
        np.random.seed(seed)

    model = HiddenMarkovModel(model_id)
    n_states = steps * states_per_step

    # make distrobutions from chronological subsets of timepoints
    step_size = int(math.ceil(sequences.shape[1] / float(n_states+1)))

    # generate states
    states = np.empty((steps, states_per_step), dtype=object)
    for i in range(steps):
        for j in range(states_per_step):
            temp_assignment = np.arange(step_size * i, step_size * (i+1))
            dist = \
                NormalDistribution.from_samples(sequences[:, temp_assignment])
            state_name = str(i) + '-' + str(j)
            states[i, j] = State(dist, name=str(state_name))

    # add states to model
    model.add_states(states.flatten().tolist())

    # make random transition from start -> step0
    trans = np.random.ranf(states_per_step)
    trans = trans / trans.sum()
    for j in range(states_per_step):
        model.add_transition(model.start, states[0, j], trans[j])

    # make random transition from step(i) -> step(i+1)
    for i in range(steps-1):
        for j in range(states_per_step):
            trans = np.random.ranf(states_per_step + 1)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[i, j], states[i, j], trans[0])
            # out transition
            for x in range(states_per_step):
                model.add_transition(states[i, j], states[i + 1, x],
                                     trans[x + 1])

    # make random transition from stepn -> end
    if force_end:
        for j in range(states_per_step):
            trans = np.random.ranf(2)
            trans = trans / trans.sum()
            # self transition
            model.add_transition(states[(steps - 1), j],
                                 states[(steps - 1), j], trans[0])
            # end transition
            model.add_transition(states[(steps - 1), j], model.end, trans[1])

    model.bake()
    print 'Initialized Left-Right HMM:', model.name, '[', \
        steps, states_per_step, ']'
    return model
############################# 6. BUILD HMM MODEL ##############################
HMM_model = HiddenMarkovModel(name="HMM-Tagger")
tag_states = []  # state for each tag

################# (6.1) ADD STATES w/ EMISSION PROBABILITIES ##################
''' 
tag_emissions: P(word_i|tag_j) 
             = P(word_i, tag_j)/P(tag_j)
             = C((word_i, tag_j) pairs)/C(tag_j)
'''
for tag in data.training_set.tagset:
    tag_emissions = DiscreteDistribution({word:pair_counts[tag][word]/single_tag_counts[tag] \
                                          for word in data.training_set.vocab})
    tag_state = State(tag_emissions, name=tag)
    tag_states.append(tag_state)
    HMM_model.add_states(tag_state)

############## (6.2) ADD TRANSITIONS w/ TRANSITION PROBABILITIES ##############
''' 
P(tag_1|START) = P(START, tag_1)/P(START) = C(tag_1 @ start of sentence)/C(START)
                                          = C(tag_1 @ start of sentence)/# of sentences
P(END|tag_1) = P(tag_1, END)/P(tag_1) = C(tag_1 @ end of sentence)/C(tag_1)
P(tag_2|tag_1) = P(tag_1, tag_2)/P(tag_1) = C((word_1, tag_2) pairs)/C(tag_1)
'''
n_sentences = len(data.training_set.keys)

for tag_state1 in tag_states:
    for tag_state2 in tag_states:
        tag1, tag2 = tag_state1.name, tag_state2.name
        HMM_model.add_transition(HMM_model.start, tag_state1,
                                 start_tag_counts[tag1] / n_sentences)
Ejemplo n.º 20
0
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # for pathology in range(len(dataset)):
    #     if pathology == 0:
    #         rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
    #         print "**Leave-one-out cross validation with HEALTHY subjects**"
    #     else:
    #         rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
    #         print "**Leave-one-out cross validation with PATIENTS**"
    if True:
        # for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
        for lou_sub,lou_data in dataset[0].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            full_lou_data = []
            full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    full_lou_data.append(d)
                    full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []
            classifiers = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            """Classifier initialization"""
            distros = []
            hmm_states = []
            for state in range(n_classes):
                dis = MGD\
                    (np.array(class_means[state]).flatten(),
                     np.array(class_cov[state]))
                st = State(dis, name=state_names[state])
                distros.append(dis)
                hmm_states.append(st)
            model = HMM(name="Gait")

            model.add_states(hmm_states)
            """Initial transitions"""
            for state in range(n_classes):
                model.add_transition(model.start, hmm_states[state], startprob[state])
            """Left-right model"""
            for i in range(n_classes):
                for j in range(n_classes):
                    model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

            model.bake()

            """Create training and test data"""
            x_train = []
            x_test = []
            test_gyro_y = lou_data["gyro_y"][-1]
            test_fder_gyro_y = lou_data["fder_gyro_y"][-1]
            """Create test data with n-th trial of leave-one-out subject"""
            for sample in range(len(test_gyro_y)):
                x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])
            """Create training data with n-1 trials of all subjects (patients group)"""
            # if pathology == 1:
            #     for trial in range(n_trials-1):
            #         train_gyro_y = lou_data["gyro_y"][trial]
            #         train_fder_gyro_y = lou_data["fder_gyro_y"][trial]
            #         for sample in range(len(train_gyro_y)):
            #             x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])

            """Create training data with n-1 trials of the rest of subjects (healthy group)"""
            # for train_sub,train_data in dataset[pathology].iteritems():
            for train_sub,train_data in dataset[0].iteritems():
                # if lou_sub != train_sub:
                if True:
                    for trial in range(n_trials-1):
                        train_gyro_y = train_data["gyro_y"][trial]
                        train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                        for sample in range(len(train_gyro_y)):
                            x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
            x_train = list([x_train])

            """Training"""
            rospy.logwarn("Training HMM...")
            model.fit(x_train, algorithm='baum-welch', verbose=True)
            # model.fit(x_train, algorithm='viterbi', verbose='True')

            """Find most-likely sequence"""
            logp, path = model.viterbi(x_test)
            class_labels = []
            for i in range(len(lou_data["labels"][-1])):
                path_phase = path[i][1].name
                for state in range(n_classes):
                    if path_phase == state_names[state]:
                        class_labels.append(state)
            # Saving classifier labels into csv file
            np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
            rospy.logwarn("csv file with classifier labels was saved.")
            lou_data["labels"][-1] = lou_data["labels"][-1][1:]

            """Results"""
            sum = 0.0
            true_pos = 0.0
            false_pos = 0.0
            true_neg = 0.0
            false_neg = 0.0
            tol_window = int((tol/2) / (1/float(lou_data["Fs_fsr"])))

            rospy.logwarn("Calculating results...")
            time_error = [[] for x in range(n_classes)]
            for phase in range(n_classes):
                for i in range(len(lou_data["labels"][-1])):
                    """Tolerance window"""
                    if i >= tol_window and i < len(lou_data["labels"][-1])-tol_window:
                        win = []
                        for win_label in lou_data["labels"][-1][i-tol_window:i+tol_window+1]:
                            win.append(win_label)
                        if class_labels[i] == phase:
                            if class_labels[i] in win:
                                for k in range(len(win)):
                                    if win[k] == phase:
                                        time_error[phase].append((k-tol_window)/lou_data["Fs_fsr"])
                                        break
                                true_pos += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_pos"
                            else:
                                false_pos += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_pos"
                        else:
                            if phase != lou_data["labels"][-1][i]:
                            # if phase not in win:
                                true_neg += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_neg"
                            else:
                                false_neg += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_neg"
                    else:
                        if class_labels[i] == phase:
                            if class_labels[i] == lou_data["labels"][-1][i]:
                                true_pos += 1.0
                            else:
                                false_pos += 1.0
                        else:
                            if phase != lou_data["labels"][-1][i]:
                                true_neg += 1.0
                            else:
                                false_neg += 1.0

            rospy.logwarn("Timing error")
            print "Timing error"
            for phase in range(n_classes):
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                if len(time_error[phase]) > 0:
                    rospy.logwarn(str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase])))
                    print str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))
                else:
                    rospy.logwarn("0.06 + 0")
                    print "0.06 + 0"

            """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
            rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
            print "Mean time (MT) and Coefficient of Variance (CoV)"
            n_group = 0
            for label_group in [class_labels, lou_data["labels"][-1]]:
                if n_group == 0:
                    rospy.logwarn("Results for HMM:")
                    print "Results for HMM:"
                else:
                    rospy.logwarn("Results for FSR:")
                    print "Results for FSR:"
                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in label_group:
                # for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
                n_group += 1

            """Accuracy"""
            if (true_neg+true_pos+false_neg+false_pos) != 0.0:
                acc = (true_neg + true_pos)/(true_neg + true_pos + false_neg + false_pos)
            else:
                acc = 0.0
            """Sensitivity or True Positive Rate"""
            if true_pos+false_neg != 0:
                tpr = true_pos / (true_pos+false_neg)
            else:
                tpr = 0.0
            """Specificity or True Negative Rate"""
            if false_pos+true_neg != 0:
                tnr = true_neg / (false_pos+true_neg)
            else:
                tnr = 0.0
            rospy.logwarn("Accuracy: {}%".format(acc*100.0))
            print("Accuracy: {}%".format(acc*100.0))
            rospy.logwarn("Sensitivity: {}%".format(tpr*100.0))
            print("Sensitivity: {}%".format(tpr*100.0))
            rospy.logwarn("Specificity: {}%".format(tnr*100.0))
            print("Specificity: {}%".format(tnr*100.0))
            """Goodness index"""
            G = np.sqrt((1-tpr)**2 + (1-tnr)**2)
            if G <= 0.25:
                rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
                print("Optimum classifier (G = {} <= 0.25)".format(G))
            elif G > 0.25 and G <= 0.7:
                rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
                print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            elif G == 0.7:
                rospy.logwarn("Random classifier (G = 0.7)")
                print("Random classifier (G = 0.7)")
            else:
                rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
                print("Bad classifier (G = {} > 0.7)".format(G))

    del test_gyro_y, test_fder_gyro_y, train_gyro_y, train_fder_gyro_y, d, l
Ejemplo n.º 21
0
start_sum = sum(start_prob.values())
end_sum = sum(end_prob.values())
start_state_dict = {}
end_state_dict = {}

for k, v in start_prob.items():
    start_state_dict[k] = (v / start_sum)

for k, v in end_prob.items():
    end_state_dict[k] = (v / start_sum)

example = emission_hash['NOUN']
model = HiddenMarkovModel('example')

# add emmission states P(W|T)
model.add_states([val for val in emission_hash.values()])

# add start states P(T|S)
for k in start_state_dict:
    model.add_transition(model.start, emission_hash[k], start_state_dict[k])

# add end states P(T|E)
for k in end_state_dict:
    model.add_transition(emission_hash[k], model.end, end_state_dict[k])

# finally add P(T|T^-1)
for k in bigram_hash:
    evidence, prior = k.split()[0], k.split()[1]
    model.add_transition(emission_hash[evidence], emission_hash[prior],
                         bigram_hash[k])
    def build_dis_classifier(self):
        skf = StratifiedKFold(self.full_labels, n_folds=self.folds)
        classifier_array = []
        stats_array = []
        num_class = len(self.full_data[0])
        print (num_class)
        for cl in range(0, num_class):
            lel = -1
            tp_total = 0.0
            tn_total = 0.0
            fp_total = 0.0
            fn_total = 0.0
            tests = 0
            for train_index, test_index in skf:
                if lel > 0:
                    lel -= 1
                    continue
                stats = []
                distros = []
                hmm_states = []
                state_names = ['swing', 'stance']
                swings = 0
                stances = 0
                for i in range(0, 2):
                    dis = MGD.from_samples(self.class_data[i])
                    st = State(dis, name=state_names[i])
                    distros.append(dis)
                    hmm_states.append(st)

                model = HMM()
                print(model.states)
                model.add_states(hmm_states)
                model.add_transition(model.start, hmm_states[0], 0.5)
                model.add_transition(model.start, hmm_states[1], 0.5)
                model.add_transition(hmm_states[1], model.end, 0.000000000000000001)
                model.add_transition(hmm_states[0], model.end, 0.000000000000000001)

                for i in range(0, 2):
                    for j in range(0, 2):
                        model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j])
                model.bake()

                tp = 0.0
                tn = 0.0
                fp = 0.0
                fn = 0.0

                train_data = self.full_data[train_index, cl]
                train_class = self.full_labels[train_index, cl]
                test_data = self.full_data[test_index]
                test_class = self.full_labels[test_index]

                print(np.isfinite(train_data).all())
                print(np.isfinite(test_data).all())
                print(np.isnan(train_data.any()))
                print(np.isinf(train_data.any()))
                print(np.isnan(test_data.any()))
                print(np.isinf(test_data.any()))

                if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \
                        or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())):
                    rospy.logerr("NaN or Inf Detected")
                    exit()

                try:
                    rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests))
                    seq = np.array(train_data)
                    model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150)

                except ValueError:
                    rospy.logwarn("Something went wrong, exiting")
                    rospy.shutdown()
                    exit()

                seq = []
                if self.batch_test == 1:
                    s = 0
                    # for s in range(0, len(test_data)):
                    while s < len(test_data):
                        k = 0
                        seq_entry = []
                        while k < 20 and s < len(test_data):
                            seq_entry.append(test_data[s])
                            k += 1
                            s += 1
                        seq.append(seq_entry)
                else:
                    seq = np.array(test_data)

                if seq == [] or test_data == []:
                    rospy.logerr("Empty testing sequence")
                    continue

                log, path = model.viterbi(test_data)
                if (len(path) - 2) != len(test_data):
                    rospy.logerr(len(path))
                    rospy.logerr(path[0][1].name)
                    rospy.logerr(path[len(path) - 1][1].name)
                    rospy.logerr(len(test_data))
                    exit()

                tests += 1
                for i in range(0, len(path) - 2):
                    if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end':
                        if path[i + 1][1].name == 'swing':  # prediction is 0
                            swings += 1
                            if test_class[i] == 0:  # class is 0
                                tn += 1.0
                            elif test_class[i] == 1:
                                fn += 1.0  # class is 1

                        elif path[i + 1][1].name == 'stance':  # prediction is 1
                            stances += 1
                            if test_class[i] == 1:  # class is 1
                                tp += 1.0
                            elif test_class[i] == 0:  # class is 0
                                fp += 1.0
                print (swings)
                print (stances)
                if (tp + fn) != 0.0:
                    rospy.logwarn("Sensitivity : " + str(tp / (tp + fn)))
                    # sensitivity = tp / (tp + fn)
                else:
                    rospy.logwarn("Sensitivity : 0.0")
                    # sensitivity = 0.0
                if (tn + fp) != 0.0:
                    rospy.logwarn("Specificity : " + str(tn / (tn + fp)))
                    # specificity = tn_total / (tn_total + fp_total)
                else:
                    rospy.logwarn("Specificity : 0.0")
                    # specificity = 0.0
                if (tn + tp + fn + fp) != 0.0:
                    rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp)))
                    # accuracy = (tn + tp) / (tn + tp + fn + fp)
                else:
                    rospy.logwarn("Accuracy : 0.0")
                    # accuracy = 0.0

                tn_total += tn
                tp_total += tp
                fn_total += fn
                fp_total += fp

            tp_total /= tests
            tn_total /= tests
            fp_total /= tests
            fn_total /= tests
            rospy.logerr("TP :" + str(tp_total))
            rospy.logerr("TN :" + str(tn_total))
            rospy.logerr("FP :" + str(fp_total))
            rospy.logerr("FN :" + str(fn_total))
            rospy.logerr("Tests :" + str(tests))
            if (tp_total + fn_total) != 0.0:
                sensitivity = tp_total / (tp_total + fn_total)
            else:
                sensitivity = 0.0
            if (tn_total + fp_total) != 0.0:
                specificity = tn_total / (tn_total + fp_total)
            else:
                specificity = 0.0
            if (tn_total + tp_total + fn_total + fp_total) != 0.0:
                accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total)
            else:
                accuracy = 0.0

            rospy.logwarn("----------------------------------------------------------")
            rospy.logerr("Total accuracy: " + str(accuracy))
            rospy.logerr("Total sensitivity: " + str(sensitivity))
            rospy.logerr("Total specificity: " + str(specificity))
            stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests,
                     accuracy, sensitivity, specificity]
            rospy.logwarn("-------------------DONE-------------------------")
            classifier_array.append(model)
            stats_array.append(stats)

        pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb'))
        pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb'))
        scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
Ejemplo n.º 23
0
def get_constant_number_of_repeats_matcher_hmm(patterns, copies):
    model = Model(name="Repeating Pattern Matcher HMM Model")

    transitions, emissions = build_profile_hmm_for_repeats(
        patterns, settings.MAX_ERROR_RATE)
    matches = [m for m in emissions.keys() if m.startswith('M')]

    last_end = None
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(matches) + 1):
            insert_distribution = DiscreteDistribution(emissions['I%s' % i])
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(1, len(matches) + 1):
            match_distribution = DiscreteDistribution(emissions['M%s' % i])
            match_states.append(
                State(match_distribution, name='M%s_%s' % (str(i), repeat)))

        for i in range(1, len(matches) + 1):
            delete_states.append(State(None, name='D%s_%s' % (str(i), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        n = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 1)
        else:
            model.add_transition(model.start, unit_start, 1)

        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 1)

        model.add_transition(unit_start, match_states[0],
                             transitions['unit_start']['M1'])
        model.add_transition(unit_start, delete_states[0],
                             transitions['unit_start']['D1'])
        model.add_transition(unit_start, insert_states[0],
                             transitions['unit_start']['I0'])

        model.add_transition(insert_states[0], insert_states[0],
                             transitions['I0']['I0'])
        model.add_transition(insert_states[0], delete_states[0],
                             transitions['I0']['D1'])
        model.add_transition(insert_states[0], match_states[0],
                             transitions['I0']['M1'])

        model.add_transition(delete_states[n], unit_end,
                             transitions['D%s' % (n + 1)]['unit_end'])
        model.add_transition(delete_states[n], insert_states[n + 1],
                             transitions['D%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(match_states[n], unit_end,
                             transitions['M%s' % (n + 1)]['unit_end'])
        model.add_transition(match_states[n], insert_states[n + 1],
                             transitions['M%s' % (n + 1)]['I%s' % (n + 1)])

        model.add_transition(insert_states[n + 1], insert_states[n + 1],
                             transitions['I%s' % (n + 1)]['I%s' % (n + 1)])
        model.add_transition(insert_states[n + 1], unit_end,
                             transitions['I%s' % (n + 1)]['unit_end'])

        for i in range(1, len(matches) + 1):
            model.add_transition(match_states[i - 1], insert_states[i],
                                 transitions['M%s' % i]['I%s' % i])
            model.add_transition(delete_states[i - 1], insert_states[i],
                                 transitions['D%s' % i]['I%s' % i])
            model.add_transition(insert_states[i], insert_states[i],
                                 transitions['I%s' % i]['I%s' % i])
            if i < len(matches):
                model.add_transition(insert_states[i], match_states[i],
                                     transitions['I%s' % i]['M%s' % (i + 1)])
                model.add_transition(insert_states[i], delete_states[i],
                                     transitions['I%s' % i]['D%s' % (i + 1)])

                model.add_transition(match_states[i - 1], match_states[i],
                                     transitions['M%s' % i]['M%s' % (i + 1)])
                model.add_transition(match_states[i - 1], delete_states[i],
                                     transitions['M%s' % i]['D%s' % (i + 1)])

                model.add_transition(delete_states[i - 1], match_states[i],
                                     transitions['D%s' % i]['M%s' % (i + 1)])
                model.add_transition(delete_states[i - 1], delete_states[i],
                                     transitions['D%s' % i]['D%s' % (i + 1)])

        last_end = unit_end

    model.bake(merge=None)
    return model
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # pathology = 0
    for pathology in range(len(dataset)):
        if pathology == 0:
            rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
            print "**Leave-one-out cross validation with HEALTHY subjects**"
        else:
            rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
            print "**Leave-one-out cross validation with PATIENTS**"
    # if True:
        for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            # full_lou_data = []
            # full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    # full_lou_data.append(d)
                    # full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            # lou_trial = 1
            # if True:
            for lou_trial in range(n_trials):
                rospy.logwarn("Trial {}".format(lou_trial+1))
                print("Trial {}".format(lou_trial+1))

                """Classifier initialization"""
                # distros = []
                hmm_states = []
                for state in range(n_classes):
                    dis = MGD\
                        (np.array(class_means[state]).flatten(),
                         np.array(class_cov[state]))
                    st = State(dis, name=state_names[state])
                    # distros.append(dis)
                    hmm_states.append(st)
                model = HMM(name="Gait")

                model.add_states(hmm_states)
                """Initial transitions"""
                for state in range(n_classes):
                    model.add_transition(model.start, hmm_states[state], startprob[state])
                """Left-right model"""
                for i in range(n_classes):
                    for j in range(n_classes):
                        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

                model.bake()

                """Create training and test data"""
                x_train = []
                x_test = []
                test_gyro_y = lou_data["gyro_y"][lou_trial]
                test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial]
                """Create test data with n-th trial of leave-one-out subject"""
                for sample in range(len(test_gyro_y)):
                    x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])

                """Create training data with n-1 trials of the rest of subjects (healthy group)"""
                for train_sub,train_data in dataset[0].iteritems():
                    count_trials = 0
                    if lou_sub != train_sub:
                    # if train_sub == "daniel":
                        for trial in range(n_trials):
                            if trial != lou_trial and count_trials < 1:
                                # rospy.logwarn(trial)
                                train_gyro_y = train_data["gyro_y"][trial]
                                train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                                for sample in range(len(train_gyro_y)):
                                    x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
                                count_trials += 1
                rospy.logwarn(len(x_train))
                x_train = list([x_train])

                """Training"""
                rospy.logwarn("Training HMM...")
                model.fit(x_train, algorithm='baum-welch', verbose=True)
                # model.fit(x_train, algorithm='viterbi', verbose='True')

                """Find most-likely sequence"""
                rospy.logwarn("Finding most-likely sequence...")
                logp, path = model.viterbi(x_test)
                # rospy.logwarn(len(path))
                # rospy.logwarn(len(lou_data["labels"][lou_trial]))

                class_labels = []
                for i in range(len(lou_data["labels"][lou_trial])):
                    path_phase = path[i][1].name
                    for state in range(n_classes):
                        if path_phase == state_names[state]:
                            class_labels.append(state)
                '''Saving classifier labels into csv file'''
                # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
                # rospy.logwarn("csv file with classifier labels was saved.")
                # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:]

                """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
                rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
                print "Mean time (MT) and Coefficient of Variance (CoV)"

                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
Ejemplo n.º 25
0
    X_2 = X[y == 1]
    X_3 = X[y == 2]

else:
    X_1 = X[2000:4000]
    X_2 = X[400:800]
    X_3 = X[7000:8000]
a = MultivariateGaussianDistribution.from_samples(X_1)
b = MultivariateGaussianDistribution.from_samples(X_2)
c = MultivariateGaussianDistribution.from_samples(X_3)
s1 = State(a, name="M1")
s2 = State(b, name="M2")
s3 = State(c, name="M3")

hmm = HiddenMarkovModel()
hmm.add_states(s1, s2, s3)
hmm.add_transition(hmm.start, s1, 0.34)
hmm.add_transition(hmm.start, s3, 0.33)
hmm.add_transition(hmm.start, s2, 0.33)

hmm.add_transition(s1, s1, 0.9)
hmm.add_transition(s1, s2, 0.05)
hmm.add_transition(s1, s3, 0.05)

hmm.add_transition(s2, s1, 0.05)
hmm.add_transition(s2, s3, 0.05)
hmm.add_transition(s2, s2, 0.9)

hmm.add_transition(s3, s3, 0.9)
hmm.add_transition(s3, s2, 0.05)
hmm.add_transition(s3, s1, 0.05)
Ejemplo n.º 26
0
st_intron_2_py0 = State(close_py_dist, name='i2cpy0')
st_intron_2_py1 = State(close_py_dist, name='i2cpy1')
st_intron_2_py2 = State(close_py_dist, name='i2cpy2')
st_intron_2_py3 = State(close_py_dist, name='i2cpy3')
st_intron_2_fill = State(DiscreteDistribution({'c': 0.49, 't': 0.49, 'a': 0.2}), name='i2fill')

st_intron_2c = State(DiscreteDistribution({'c': 0.5, 't': 0.5}), name='intron2c')
states.append(st_intron_2c)
st_intron_2a = State(DiscreteDistribution({'a': 1}), name='intron2a')
states.append(st_intron_2a)
st_intron_2g = State(DiscreteDistribution({'g': 1}), name='intron2g')
states.append(st_intron_2g)

model = HiddenMarkovModel()
model.add_states(states)

#transition_maker(model,
#                 [st_back],
#                 [(st_back, 0.9), (st_start_triplet0, 0.1)])

#transition_maker(model,
#                 [st_start_triplet0],
#                 [(st_start_triplet1, 1)])

#transition_maker(model,
#                 [st_start_triplet1],
#                 [(st_start_triplet2, 1)])

#transition_maker(model,
#                 [st_start_triplet2],
Ejemplo n.º 27
0
starting_tag_count=starting_counts(starting_tag_list)#the number of times a tag occured at the start
ending_tag_count=ending_counts(ending_tag_list)      #the number of times a tag occured at the enf



to_pass_states = []
for tag, words_dict in tag_words_count.items():
    total = float(sum(words_dict.values()))
    distribution = {word: count/total for word, count in words_dict.items()}
    tag_emissions = DiscreteDistribution(distribution)
    tag_state = State(tag_emissions, name=tag)
    to_pass_states.append(tag_state)


basic_model.add_states(to_pass_states)    
    

start_prob={} #a dict to store 

for tag in tags:
    start_prob[tag]=starting_tag_count[tag]/tags_count[tag]

for tag_state in to_pass_states :
    basic_model.add_transition(basic_model.start,tag_state,start_prob[tag_state.name])    

end_prob={}

for tag in tags:
    end_prob[tag]=ending_tag_count[tag]/tags_count[tag]
for tag_state in to_pass_states :
Ejemplo n.º 28
0
def get_suffix_matcher_hmm(pattern):
    model = Model(name="Suffix Matcher HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })
    insert_states = []
    match_states = []
    delete_states = []
    hmm_name = 'suffix'
    for i in range(len(pattern) + 1):
        insert_states.append(
            State(insert_distribution, name='I%s_%s' % (i, hmm_name)))

    for i in range(len(pattern)):
        distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01})
        distribution_map[pattern[i]] = 0.97
        match_states.append(
            State(DiscreteDistribution(distribution_map),
                  name='M%s_%s' % (str(i + 1), hmm_name)))

    for i in range(len(pattern)):
        delete_states.append(
            State(None, name='D%s_%s' % (str(i + 1), hmm_name)))

    unit_start = State(None, name='suffix_start_%s' % hmm_name)
    unit_end = State(None, name='suffix_end_%s' % hmm_name)
    model.add_states(insert_states + match_states + delete_states +
                     [unit_start, unit_end])
    last = len(delete_states) - 1

    model.add_transition(model.start, unit_start, 1)

    model.add_transition(unit_end, model.end, 1)

    model.add_transition(unit_start, delete_states[0], 0.01)
    model.add_transition(unit_start, insert_states[0], 0.01)
    for i in range(len(pattern)):
        model.add_transition(unit_start, match_states[i], 0.98 / len(pattern))

    model.add_transition(insert_states[0], insert_states[0], 0.01)
    model.add_transition(insert_states[0], delete_states[0], 0.01)
    model.add_transition(insert_states[0], match_states[0], 0.98)

    model.add_transition(delete_states[last], unit_end, 0.99)
    model.add_transition(delete_states[last], insert_states[last + 1], 0.01)

    model.add_transition(match_states[last], unit_end, 0.99)
    model.add_transition(match_states[last], insert_states[last + 1], 0.01)

    model.add_transition(insert_states[last + 1], insert_states[last + 1],
                         0.01)
    model.add_transition(insert_states[last + 1], unit_end, 0.99)

    for i in range(0, len(pattern)):
        model.add_transition(match_states[i], insert_states[i + 1], 0.01)
        model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
        model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01)
        if i < len(pattern) - 1:
            model.add_transition(insert_states[i + 1], match_states[i + 1],
                                 0.98)
            model.add_transition(insert_states[i + 1], delete_states[i + 1],
                                 0.01)

            model.add_transition(match_states[i], match_states[i + 1], 0.98)
            model.add_transition(match_states[i], delete_states[i + 1], 0.01)

            model.add_transition(delete_states[i], delete_states[i + 1], 0.01)
            model.add_transition(delete_states[i], match_states[i + 1], 0.98)

    model.bake(merge=None)

    return model
Ejemplo n.º 29
0
tag_ends = ending_counts(data.training_set.Y)

basic_model = HiddenMarkovModel(name="base-hmm-tagger")

prob_emission = {}
states = {}
for tag, word_counts in emission_counts.items():
    prob_emission = {
        word: word_count / sum(word_counts.values())
        for word, word_count in word_counts.items()
    }
    states[tag] = State(DiscreteDistribution(prob_emission), name=tag)

unique_tags = list(data.training_set.tagset)
for tag in unique_tags:
    basic_model.add_states(states[tag])

# add the starting edges
for tag, tag_count in tag_starts.items():
    basic_model.add_transition(basic_model.start, states[tag],
                               tag_count / len(data.training_set.X))

# add the ending edges
for tag, tag_count in tag_ends.items():
    basic_model.add_transition(states[tag], basic_model.end,
                               tag_count / len(data.training_set.X))

# add the transitions
for bi_tag, tag_count in tag_bigrams.items():
    tag0 = bi_tag[0]
    tag1 = bi_tag[1]
Ejemplo n.º 30
0
def build_reference_repeat_finder_hmm(patterns, copies=1):
    pattern = patterns[0]
    model = Model(name="HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })

    last_end = None
    start_random_matches = State(insert_distribution,
                                 name='start_random_matches')
    end_random_matches = State(insert_distribution, name='end_random_matches')
    model.add_states([start_random_matches, end_random_matches])
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(pattern) + 1):
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(len(pattern)):
            distribution_map = dict({
                'A': 0.01,
                'C': 0.01,
                'G': 0.01,
                'T': 0.01
            })
            distribution_map[pattern[i]] = 0.97
            match_states.append(
                State(DiscreteDistribution(distribution_map),
                      name='M%s_%s' % (str(i + 1), repeat)))

        for i in range(len(pattern)):
            delete_states.append(
                State(None, name='D%s_%s' % (str(i + 1), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        last = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 0.5)
        else:
            model.add_transition(model.start, unit_start, 0.5)
            model.add_transition(model.start, start_random_matches, 0.5)
            model.add_transition(start_random_matches, unit_start, 0.5)
            model.add_transition(start_random_matches, start_random_matches,
                                 0.5)

        model.add_transition(unit_end, end_random_matches, 0.5)
        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 0.5)
            model.add_transition(end_random_matches, end_random_matches, 0.5)
            model.add_transition(end_random_matches, model.end, 0.5)

        model.add_transition(unit_start, match_states[0], 0.98)
        model.add_transition(unit_start, delete_states[0], 0.01)
        model.add_transition(unit_start, insert_states[0], 0.01)

        model.add_transition(insert_states[0], insert_states[0], 0.01)
        model.add_transition(insert_states[0], delete_states[0], 0.01)
        model.add_transition(insert_states[0], match_states[0], 0.98)

        model.add_transition(delete_states[last], unit_end, 0.99)
        model.add_transition(delete_states[last], insert_states[last + 1],
                             0.01)

        model.add_transition(match_states[last], unit_end, 0.99)
        model.add_transition(match_states[last], insert_states[last + 1], 0.01)

        model.add_transition(insert_states[last + 1], insert_states[last + 1],
                             0.01)
        model.add_transition(insert_states[last + 1], unit_end, 0.99)

        for i in range(0, len(pattern)):
            model.add_transition(match_states[i], insert_states[i + 1], 0.01)
            model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
            model.add_transition(insert_states[i + 1], insert_states[i + 1],
                                 0.01)
            if i < len(pattern) - 1:
                model.add_transition(insert_states[i + 1], match_states[i + 1],
                                     0.98)
                model.add_transition(insert_states[i + 1],
                                     delete_states[i + 1], 0.01)

                model.add_transition(match_states[i], match_states[i + 1],
                                     0.98)
                model.add_transition(match_states[i], delete_states[i + 1],
                                     0.01)

                model.add_transition(delete_states[i], delete_states[i + 1],
                                     0.01)
                model.add_transition(delete_states[i], match_states[i + 1],
                                     0.98)

        last_end = unit_end

    model.bake()
    if len(patterns) > 1:
        # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True)
        fit_patterns = [pattern * copies for pattern in patterns]
        model.fit(fit_patterns,
                  algorithm='viterbi',
                  transition_pseudocount=1,
                  use_pseudocount=True)

    return model
# P(word | tag), given the tag, what is the prob of the word = count of word with the tag/ count of all words of that tag

emission_tag_word_counts = pair_counts(data.training_set.Y, data.training_set.X) #{'NOUN':{'word': count}}
tag_unigrams = unigram_counts(data.training_set.Y) # count of occurrance of a tag
tag_bigrams = bigram_counts(data.training_set.Y)
starting_tag_count = starting_counts(data.training_set.Y) #the number of times a tag occured at the start
ending_tag_count = ending_counts(data.training_set.Y)

# Initialize a dictionary to store the states object by tag key
states = dict()
for tag, words_dict in emission_tag_word_counts.items():
    tag_count = tag_unigrams[tag]
    emission_distribution = {word: word_count/tag_count for word, word_count in words_dict.items()}
    prob_word_given_tag = DiscreteDistribution(emission_distribution)
    word_given_tag_state = State(prob_word_given_tag, name=tag)
    basic_model.add_states(word_given_tag_state)
    states[tag] = word_given_tag_state

#Part2: transition probability of start and end state

# add start state prob = p(tag| start state) = p(start state,tag)/p(start state)
# = num of times a tag appears in the start/ number of first tags in whole dataaset
# add end state prob = p(tag| end state) = p(end state,tag)/p(tag) ****** this is diff from the start state prob!!!
# = num of times a tag appears in the end/ number of that tag in whole dataaset
for tag in data.training_set.tagset:
    
    tag_start_prob = starting_tag_count[tag]/sum(starting_tag_count.values())
    basic_model.add_transition(basic_model.start, states[tag], tag_start_prob)
    tag_end_prob = ending_tag_count[tag]/tag_unigrams[tag]
    basic_model.add_transition(states[tag], basic_model.end, tag_end_prob)