Beispiel #1
0
hmm.add_transition(s33, s3, 0.01)
hmm.add_transition(s33, s33, 0.92)
hmm.add_transition(s33, s333, 0.01)

hmm.add_transition(s333, s1, 0.01)
hmm.add_transition(s333, s11, 0.01)
hmm.add_transition(s333, s111, 0.01)
hmm.add_transition(s333, s2, 0.01)
hmm.add_transition(s333, s22, 0.01)
hmm.add_transition(s333, s222, 0.01)
hmm.add_transition(s333, s3, 0.01)
hmm.add_transition(s333, s33, 0.01)
hmm.add_transition(s333, s333, 0.92)

hmm.bake()
hmm.fit(X)  # , weights=w) hmm does not support weights in pomegranate
preds = hmm.predict(X)
probs = hmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+2]

covs = np.array([np.array(hmm.states[m].distribution.parameters[1])
                 for m in range(9)])
means = np.array([np.array(hmm.states[m].distribution.parameters[0])
                  for m in range(9)])

# transform cov for non-standardizeed data:
Beispiel #2
0
def build_reference_repeat_finder_hmm(patterns, copies=1):
    pattern = patterns[0]
    model = Model(name="HMM Model")
    insert_distribution = DiscreteDistribution({
        'A': 0.25,
        'C': 0.25,
        'G': 0.25,
        'T': 0.25
    })

    last_end = None
    start_random_matches = State(insert_distribution,
                                 name='start_random_matches')
    end_random_matches = State(insert_distribution, name='end_random_matches')
    model.add_states([start_random_matches, end_random_matches])
    for repeat in range(copies):
        insert_states = []
        match_states = []
        delete_states = []
        for i in range(len(pattern) + 1):
            insert_states.append(
                State(insert_distribution, name='I%s_%s' % (i, repeat)))

        for i in range(len(pattern)):
            distribution_map = dict({
                'A': 0.01,
                'C': 0.01,
                'G': 0.01,
                'T': 0.01
            })
            distribution_map[pattern[i]] = 0.97
            match_states.append(
                State(DiscreteDistribution(distribution_map),
                      name='M%s_%s' % (str(i + 1), repeat)))

        for i in range(len(pattern)):
            delete_states.append(
                State(None, name='D%s_%s' % (str(i + 1), repeat)))

        unit_start = State(None, name='unit_start_%s' % repeat)
        unit_end = State(None, name='unit_end_%s' % repeat)
        model.add_states(insert_states + match_states + delete_states +
                         [unit_start, unit_end])
        last = len(delete_states) - 1

        if repeat > 0:
            model.add_transition(last_end, unit_start, 0.5)
        else:
            model.add_transition(model.start, unit_start, 0.5)
            model.add_transition(model.start, start_random_matches, 0.5)
            model.add_transition(start_random_matches, unit_start, 0.5)
            model.add_transition(start_random_matches, start_random_matches,
                                 0.5)

        model.add_transition(unit_end, end_random_matches, 0.5)
        if repeat == copies - 1:
            model.add_transition(unit_end, model.end, 0.5)
            model.add_transition(end_random_matches, end_random_matches, 0.5)
            model.add_transition(end_random_matches, model.end, 0.5)

        model.add_transition(unit_start, match_states[0], 0.98)
        model.add_transition(unit_start, delete_states[0], 0.01)
        model.add_transition(unit_start, insert_states[0], 0.01)

        model.add_transition(insert_states[0], insert_states[0], 0.01)
        model.add_transition(insert_states[0], delete_states[0], 0.01)
        model.add_transition(insert_states[0], match_states[0], 0.98)

        model.add_transition(delete_states[last], unit_end, 0.99)
        model.add_transition(delete_states[last], insert_states[last + 1],
                             0.01)

        model.add_transition(match_states[last], unit_end, 0.99)
        model.add_transition(match_states[last], insert_states[last + 1], 0.01)

        model.add_transition(insert_states[last + 1], insert_states[last + 1],
                             0.01)
        model.add_transition(insert_states[last + 1], unit_end, 0.99)

        for i in range(0, len(pattern)):
            model.add_transition(match_states[i], insert_states[i + 1], 0.01)
            model.add_transition(delete_states[i], insert_states[i + 1], 0.01)
            model.add_transition(insert_states[i + 1], insert_states[i + 1],
                                 0.01)
            if i < len(pattern) - 1:
                model.add_transition(insert_states[i + 1], match_states[i + 1],
                                     0.98)
                model.add_transition(insert_states[i + 1],
                                     delete_states[i + 1], 0.01)

                model.add_transition(match_states[i], match_states[i + 1],
                                     0.98)
                model.add_transition(match_states[i], delete_states[i + 1],
                                     0.01)

                model.add_transition(delete_states[i], delete_states[i + 1],
                                     0.01)
                model.add_transition(delete_states[i], match_states[i + 1],
                                     0.98)

        last_end = unit_end

    model.bake()
    if len(patterns) > 1:
        # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True)
        fit_patterns = [pattern * copies for pattern in patterns]
        model.fit(fit_patterns,
                  algorithm='viterbi',
                  transition_pseudocount=1,
                  use_pseudocount=True)

    return model
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # pathology = 0
    for pathology in range(len(dataset)):
        if pathology == 0:
            rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
            print "**Leave-one-out cross validation with HEALTHY subjects**"
        else:
            rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
            print "**Leave-one-out cross validation with PATIENTS**"
    # if True:
        for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            # full_lou_data = []
            # full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    # full_lou_data.append(d)
                    # full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            # lou_trial = 1
            # if True:
            for lou_trial in range(n_trials):
                rospy.logwarn("Trial {}".format(lou_trial+1))
                print("Trial {}".format(lou_trial+1))

                """Classifier initialization"""
                # distros = []
                hmm_states = []
                for state in range(n_classes):
                    dis = MGD\
                        (np.array(class_means[state]).flatten(),
                         np.array(class_cov[state]))
                    st = State(dis, name=state_names[state])
                    # distros.append(dis)
                    hmm_states.append(st)
                model = HMM(name="Gait")

                model.add_states(hmm_states)
                """Initial transitions"""
                for state in range(n_classes):
                    model.add_transition(model.start, hmm_states[state], startprob[state])
                """Left-right model"""
                for i in range(n_classes):
                    for j in range(n_classes):
                        model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

                model.bake()

                """Create training and test data"""
                x_train = []
                x_test = []
                test_gyro_y = lou_data["gyro_y"][lou_trial]
                test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial]
                """Create test data with n-th trial of leave-one-out subject"""
                for sample in range(len(test_gyro_y)):
                    x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])

                """Create training data with n-1 trials of the rest of subjects (healthy group)"""
                for train_sub,train_data in dataset[0].iteritems():
                    count_trials = 0
                    if lou_sub != train_sub:
                    # if train_sub == "daniel":
                        for trial in range(n_trials):
                            if trial != lou_trial and count_trials < 1:
                                # rospy.logwarn(trial)
                                train_gyro_y = train_data["gyro_y"][trial]
                                train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                                for sample in range(len(train_gyro_y)):
                                    x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
                                count_trials += 1
                rospy.logwarn(len(x_train))
                x_train = list([x_train])

                """Training"""
                rospy.logwarn("Training HMM...")
                model.fit(x_train, algorithm='baum-welch', verbose=True)
                # model.fit(x_train, algorithm='viterbi', verbose='True')

                """Find most-likely sequence"""
                rospy.logwarn("Finding most-likely sequence...")
                logp, path = model.viterbi(x_test)
                # rospy.logwarn(len(path))
                # rospy.logwarn(len(lou_data["labels"][lou_trial]))

                class_labels = []
                for i in range(len(lou_data["labels"][lou_trial])):
                    path_phase = path[i][1].name
                    for state in range(n_classes):
                        if path_phase == state_names[state]:
                            class_labels.append(state)
                '''Saving classifier labels into csv file'''
                # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
                # rospy.logwarn("csv file with classifier labels was saved.")
                # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:]

                """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
                rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
                print "Mean time (MT) and Coefficient of Variance (CoV)"

                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
            seq2 = test_data

        if seq2 == [] or test_data == []:
            rospy.logerr("Empty testing sequence")
            continue

        print(np.array(seq).shape)
        print(np.array(seq[0]).shape)
        # print np.array(seq)
        print(np.array(seq2).shape)
        print(np.array(seq2[0]).shape)
        # print np.array(seq2)
        # seq = np.array(seq)
        seq2 = np.array(seq2)
        # rospy.logwarn(seq)
        cl.fit(seq, algorithm='baum-welch', verbose='True')
        # print(model)

        rospy.logwarn("Start Viterbi")
        log, path = cl.viterbi(seq)
        rospy.logwarn("Viterbi Done")
        # rospy.logwarn(len(path))
        sum_ = 0.0

        if (len(path) - 1) != len(test_data):
            print(len(path))
            print(path[0][1].name)
            print(path[len(path) - 1][1].name)
            print(len(test_data))
            exit()
    def build_dis_classifier(self):
        skf = StratifiedKFold(self.full_labels, n_folds=self.folds)
        classifier_array = []
        stats_array = []
        num_class = len(self.full_data[0])
        print (num_class)
        for cl in range(0, num_class):
            lel = -1
            tp_total = 0.0
            tn_total = 0.0
            fp_total = 0.0
            fn_total = 0.0
            tests = 0
            for train_index, test_index in skf:
                if lel > 0:
                    lel -= 1
                    continue
                stats = []
                distros = []
                hmm_states = []
                state_names = ['swing', 'stance']
                swings = 0
                stances = 0
                for i in range(0, 2):
                    dis = MGD.from_samples(self.class_data[i])
                    st = State(dis, name=state_names[i])
                    distros.append(dis)
                    hmm_states.append(st)

                model = HMM()
                print(model.states)
                model.add_states(hmm_states)
                model.add_transition(model.start, hmm_states[0], 0.5)
                model.add_transition(model.start, hmm_states[1], 0.5)
                model.add_transition(hmm_states[1], model.end, 0.000000000000000001)
                model.add_transition(hmm_states[0], model.end, 0.000000000000000001)

                for i in range(0, 2):
                    for j in range(0, 2):
                        model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j])
                model.bake()

                tp = 0.0
                tn = 0.0
                fp = 0.0
                fn = 0.0

                train_data = self.full_data[train_index, cl]
                train_class = self.full_labels[train_index, cl]
                test_data = self.full_data[test_index]
                test_class = self.full_labels[test_index]

                print(np.isfinite(train_data).all())
                print(np.isfinite(test_data).all())
                print(np.isnan(train_data.any()))
                print(np.isinf(train_data.any()))
                print(np.isnan(test_data.any()))
                print(np.isinf(test_data.any()))

                if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \
                        or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())):
                    rospy.logerr("NaN or Inf Detected")
                    exit()

                try:
                    rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests))
                    seq = np.array(train_data)
                    model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150)

                except ValueError:
                    rospy.logwarn("Something went wrong, exiting")
                    rospy.shutdown()
                    exit()

                seq = []
                if self.batch_test == 1:
                    s = 0
                    # for s in range(0, len(test_data)):
                    while s < len(test_data):
                        k = 0
                        seq_entry = []
                        while k < 20 and s < len(test_data):
                            seq_entry.append(test_data[s])
                            k += 1
                            s += 1
                        seq.append(seq_entry)
                else:
                    seq = np.array(test_data)

                if seq == [] or test_data == []:
                    rospy.logerr("Empty testing sequence")
                    continue

                log, path = model.viterbi(test_data)
                if (len(path) - 2) != len(test_data):
                    rospy.logerr(len(path))
                    rospy.logerr(path[0][1].name)
                    rospy.logerr(path[len(path) - 1][1].name)
                    rospy.logerr(len(test_data))
                    exit()

                tests += 1
                for i in range(0, len(path) - 2):
                    if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end':
                        if path[i + 1][1].name == 'swing':  # prediction is 0
                            swings += 1
                            if test_class[i] == 0:  # class is 0
                                tn += 1.0
                            elif test_class[i] == 1:
                                fn += 1.0  # class is 1

                        elif path[i + 1][1].name == 'stance':  # prediction is 1
                            stances += 1
                            if test_class[i] == 1:  # class is 1
                                tp += 1.0
                            elif test_class[i] == 0:  # class is 0
                                fp += 1.0
                print (swings)
                print (stances)
                if (tp + fn) != 0.0:
                    rospy.logwarn("Sensitivity : " + str(tp / (tp + fn)))
                    # sensitivity = tp / (tp + fn)
                else:
                    rospy.logwarn("Sensitivity : 0.0")
                    # sensitivity = 0.0
                if (tn + fp) != 0.0:
                    rospy.logwarn("Specificity : " + str(tn / (tn + fp)))
                    # specificity = tn_total / (tn_total + fp_total)
                else:
                    rospy.logwarn("Specificity : 0.0")
                    # specificity = 0.0
                if (tn + tp + fn + fp) != 0.0:
                    rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp)))
                    # accuracy = (tn + tp) / (tn + tp + fn + fp)
                else:
                    rospy.logwarn("Accuracy : 0.0")
                    # accuracy = 0.0

                tn_total += tn
                tp_total += tp
                fn_total += fn
                fp_total += fp

            tp_total /= tests
            tn_total /= tests
            fp_total /= tests
            fn_total /= tests
            rospy.logerr("TP :" + str(tp_total))
            rospy.logerr("TN :" + str(tn_total))
            rospy.logerr("FP :" + str(fp_total))
            rospy.logerr("FN :" + str(fn_total))
            rospy.logerr("Tests :" + str(tests))
            if (tp_total + fn_total) != 0.0:
                sensitivity = tp_total / (tp_total + fn_total)
            else:
                sensitivity = 0.0
            if (tn_total + fp_total) != 0.0:
                specificity = tn_total / (tn_total + fp_total)
            else:
                specificity = 0.0
            if (tn_total + tp_total + fn_total + fp_total) != 0.0:
                accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total)
            else:
                accuracy = 0.0

            rospy.logwarn("----------------------------------------------------------")
            rospy.logerr("Total accuracy: " + str(accuracy))
            rospy.logerr("Total sensitivity: " + str(sensitivity))
            rospy.logerr("Total specificity: " + str(specificity))
            stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests,
                     accuracy, sensitivity, specificity]
            rospy.logwarn("-------------------DONE-------------------------")
            classifier_array.append(model)
            stats_array.append(stats)

        pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb'))
        pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb'))
        scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
            total.append(no_p_line)
converted_total = [converter_to(x, 2) for x in total]

#print(converted_total)


def test(model):
    with open('new_intron_acceptor.txt') as test_file:
        cont_ok = 0
        cont_not_ok = 0
        for line in test_file:
            test_line = line.lower().replace('\n', '')
            logp, path = model.viterbi(converter_to(test_line, 2))
            if (path[-40][1].name == 'acceptor015'):
                cont_ok += 1
            else:
                cont_not_ok += 1
        print(cont_ok / (cont_not_ok + cont_ok))


test(model)

model.fit(converted_total,
          transition_pseudocount=1,
          emission_pseudocount=1,
          verbose=True)

test(model)

with open('partial_model_intron_acceptor_model.json', 'w') as out:
    out.write(model.to_json())
Beispiel #7
0
    def _segment(self, arr, components=2):

        nonzero = arr[arr > 0]
        idx = self.hampel_filter(np.log2(nonzero))
        filtered = nonzero[idx]

        log_gmm = self.get_states(np.log2(filtered))
        log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_
        ln_gmm = self.get_states(filtered) # to improve the sensitivity
        ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_
        if (len(log_means) == 1):
            means, probs = ln_means, ln_probs
            scale = 'linear'
        else:
            means, probs = log_means, log_probs
            scale = 'log'

        logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale))
        model = HiddenMarkovModel()
        # GMM emissions
        dists = []
        for m in means:
            tmp = []
            for i in range(components):
                e = m + (-1)**i * ((i+1)//2) * 0.5
                s = 0.5
                tmp.append(NormalDistribution(e, s))
            mixture = State(GeneralMixtureModel(tmp), name=str(m))
            dists.append(mixture)
        model.add_states(*tuple(dists))
        # transition matrix
        for i in range(len(means)):
            for j in range(len(means)):
                if i==j:
                    model.add_transition(dists[i], dists[j], 0.8)
                else:
                    model.add_transition(dists[i], dists[j], 0.2/(len(means)-1))
        
        # starts and ends
        for i in range(len(means)):
            model.add_transition(model.start, dists[i], probs[i])
        
        model.bake()

        # training sequences
        tmp = np.zeros(nonzero.size)
        tmp[idx] = filtered
        newarr = np.zeros(arr.size)
        newarr[arr > 0] = tmp

        if len(means) > 1:
            model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs,
                    max_iterations=5000, stop_threshold=2e-4)
            
            queue = newarr[newarr > 0]
            
            if scale=='log':
                seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]]
            else:
                seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]]
            seg = self.assign_cnv(queue, seq)
            
            predicted = np.zeros(newarr.size)
            predicted[newarr > 0] = seg
            seg = self.call_intervals(predicted)
        else:
            seg = [(0, newarr.size)]
        
        return newarr, seg, scale
def train_and_test():
    with open('../data extractors/exons_start_1.txt') as in_file:
        total = []
        for line in in_file:
            no_p_line = line.replace('P', '').lower().replace('\n', '')
            total.append(no_p_line)

    converted_total = [converter_to(x, 2) for x in total]

    matrixDonor0 = numpy.array(
        matrix_from_exa('../data extractors/new_donor1.exa'))

    c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt')
    print(c0.p, c1.p, c2.p)
    coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0')
    coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1')
    coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2')

    donor0_data = classify(matrixDonor0, 2)
    donor0_states = sequence_state_factory(donor0_data, 'donor0')

    post = State(DiscreteDistribution(equal_distribution), name='post')

    model = HiddenMarkovModel('coding to donor')

    model.add_state(coding_state0)
    model.add_state(coding_state1)
    model.add_state(coding_state2)

    add_sequence(model, donor0_states)

    model.add_state(post)

    model.add_transition(model.start, coding_state0, 1)

    model.add_transition(coding_state0, coding_state1, 0.6)
    model.add_transition(coding_state0, donor0_states[0], 0.4)

    model.add_transition(coding_state1, coding_state2, 0.6)
    model.add_transition(coding_state1, donor0_states[0], 0.4)

    model.add_transition(coding_state2, coding_state0, 0.6)
    model.add_transition(coding_state2, donor0_states[0], 0.4)

    model.add_transition(donor0_states[-1], post, 1)

    model.add_transition(post, post, 0.9)
    model.add_transition(post, model.end, 0.1)

    model.bake()
    test_model(model)

    model.fit(converted_total,
              transition_pseudocount=1,
              emission_pseudocount=1,
              verbose=True)

    test_model(model)

    with open('partial_model_coding_to_donor_model0.json', 'w') as out:
        out.write(model.to_json())
Beispiel #9
0
def main():
    rospy.init_node('hmm_trainer')
    param_vec = []
    rospack = rospkg.RosPack()
    if (len(sys.argv) < 2):
        print("Missing the prefix argument.")
        exit()
    else:
        prefix = sys.argv[1]
    use_measurements = np.zeros(3)

    # patient = rospy.get_param('~patient', 'None')
    # if prefix == 'None':
    #     rospy.logerr("No filename given ,exiting")
    #     exit()

    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    packpath = rospack.get_path('exo_gait_phase_det')
    datapath = packpath + "/log/mat_files/"
    rospy.logwarn("Patient: {}".format(prefix))
    print("Patient: {}".format(prefix))
    verbose = rospy.get_param('~verbose', False)
    """Print console output into text file"""
    # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w")
    """Data loading"""
    n_trials = 3
    data = [[] for x in range(0, n_trials)]
    for i in range(0, n_trials):
        data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) +
                               ".mat")

    accel_x = [[] for x in range(0, n_trials)]
    accel_y = [[] for x in range(0, n_trials)]
    accel_z = [[] for x in range(0, n_trials)]
    gyro_x = [[] for x in range(0, n_trials)]
    gyro_y = [[] for x in range(0, n_trials)]
    gyro_z = [[] for x in range(0, n_trials)]
    time_array = [[] for x in range(0, n_trials)]
    labels = [[] for x in range(0, n_trials)]
    fs_fsr = []
    for i in range(0, n_trials):
        # accel_x[i] = data[i]["accel_x"][0]
        # accel_y[i] = data[i]["accel_y"][0]
        # accel_z[i] = data[i]["accel_z"][0]
        gyro_x[i] = data[i]["gyro_x"][0]
        gyro_y[i] = data[i]["gyro_y"][0]
        gyro_z[i] = data[i]["gyro_z"][0]
        time_array[i] = data[i]["time"][0]
        labels[i] = data[i]["labels"][0]
        fs_fsr.append(data[i]["Fs_fsr"][0][0])
    """Feature extraction"""
    """First derivative"""
    # fder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_x[i][0])
    #     for j in range(1,len(gyro_x[i])-1):
    #         der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2)
    #     der.append(gyro_x[i][-1])
    #     fder_gyro_x.append(der)

    fder_gyro_y = []
    for i in range(n_trials):
        der = []
        der.append(gyro_y[i][0])
        for j in range(1, len(gyro_y[i]) - 1):
            der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2)
        der.append(gyro_y[i][-1])
        fder_gyro_y.append(der)

    # fder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(gyro_z[i][0])
    #     for j in range(1,len(gyro_z[i])-1):
    #         der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2)
    #     der.append(gyro_z[i][-1])
    #     fder_gyro_z.append(der)
    """Second derivative"""
    # sder_gyro_x = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_x[i][0])
    #     for j in range(1,len(fder_gyro_x[i])-1):
    #         der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2)
    #     der.append(fder_gyro_x[i][-1])
    #     sder_gyro_x.append(der)
    #
    # sder_gyro_y = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_y[i][0])
    #     for j in range(1,len(fder_gyro_y[i])-1):
    #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
    #     der.append(fder_gyro_y[i][-1])
    #     sder_gyro_y.append(der)
    #
    # sder_gyro_z = []
    # for i in range(n_trials):
    #     der = []
    #     der.append(fder_gyro_z[i][0])
    #     for j in range(1,len(fder_gyro_z[i])-1):
    #         der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2)
    #     der.append(fder_gyro_z[i][-1])
    #     sder_gyro_z.append(der)
    """Peak detector"""
    # window_wid = 15        # Window width should be odd
    # search_ratio = window_wid/2
    # pdet_gyro_x = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_x[i])):
    #         if j <= search_ratio:
    #             win = gyro_x[i][:j+search_ratio+1]
    #         elif j >= len(gyro_x[i])-search_ratio-1:
    #             win = gyro_x[i][j-search_ratio:]
    #         else:
    #             win = gyro_x[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_x[i][j]/max(win))
    #     pdet_gyro_x.append(pdet)

    # print len(gyro_x)
    # print len(pdet_gyro_x)
    # for i in range(3):
    #     print len(gyro_x[i])
    #     print len(pdet_gyro_x[i])

    # pdet_gyro_y = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_y[i])):
    #         if j <= search_ratio:
    #             win = gyro_y[i][:j+search_ratio+1]
    #         elif j >= len(gyro_y[i])-search_ratio-1:
    #             win = gyro_y[i][j-search_ratio:]
    #         else:
    #             win = gyro_y[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_y[i][j]/max(win))
    #     pdet_gyro_y.append(pdet)
    #
    # pdet_gyro_z = []
    # for i in range(n_trials):
    #     pdet = []
    #     for j in range(len(gyro_z[i])):
    #         if j <= search_ratio:
    #             win = gyro_z[i][:j+search_ratio+1]
    #         elif j >= len(gyro_z[i])-search_ratio-1:
    #             win = gyro_z[i][j-search_ratio:]
    #         else:
    #             win = gyro_z[i][j-search_ratio:j+search_ratio+1]
    #         pdet.append(gyro_z[i][j]/max(win))
    #     pdet_gyro_z.append(pdet)
    """Create training and test data"""
    ff = [[] for x in range(0, n_trials)]
    for j in range(0, n_trials):
        for k in range(0, len(time_array[j])):
            f_ = []
            # f_.append(accel_x[j][k])
            # f_.append(accel_y[j][k])
            # f_.append(accel_z[j][k])
            # f_.append(gyro_x[j][k])
            # f_.append(fder_gyro_x[j][k])
            # f_.append(sder_gyro_x[j][k])
            # f_.append(pdet_gyro_x[j][k])
            f_.append(gyro_y[j][k])
            f_.append(fder_gyro_y[j][k])
            # f_.append(sder_gyro_y[j][k])
            # f_.append(pdet_gyro_y[j][k])
            # f_.append(gyro_z[j][k])
            # f_.append(fder_gyro_z[j][k])
            # f_.append(sder_gyro_z[j][k])
            # f_.append(pdet_gyro_z[j][k])
            ff[j].append(f_)
    n_signals = len(ff[0][0])
    """cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    rospy.logwarn("""Intra-subject training""")
    print("""Intra-subject training""")
    # for leave_one_out in range(0, n_trials):
    for leave_one_out in range(1, 2):
        rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1))
        print("-------TRIAL {}-------".format(leave_one_out + 1))
        """Transition matrix"""
        t = np.zeros((4, 4))  # Transition matrix
        prev = -1
        for i in range(0, len(labels[leave_one_out])):
            # data[i]._replace(label = correct_mapping[data[i].label])
            if prev == -1:
                prev = labels[leave_one_out][i]
            t[prev][labels[leave_one_out][i]] += 1.0
            prev = labels[leave_one_out][i]
        t = normalize(t, axis=1, norm='l1')
        if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

        n_classes = 4
        class_data = [[] for x in range(n_classes)]
        full_data = []
        full_labels = []
        for i in range(len(ff[leave_one_out])):
            full_data.append(ff[leave_one_out][i])
            full_labels.append(labels[leave_one_out][i])
        # print full_data == ff[leave_one_out]
        # print full_labels == labels[leave_one_out]
        # print len(full_data) == len(full_labels)
        # for i in range(0,len(ff[leave_one_out-1])):
        #     full_data.append(ff[leave_one_out-1][i])
        #     full_labels.append(labels[leave_one_out-1][i])
        # for i in range(0,len(ff[(leave_one_out+1) % n_trials])):
        #     full_data.append(ff[(leave_one_out+1) % n_trials][i])
        #     full_labels.append(labels[(leave_one_out+1) % n_trials][i])

        # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials]))
        # print full_data
        # print len(full_data)
        # print full_labels
        # print len(full_labels)

        for i in range(0, len(full_data)):
            class_data[full_labels[i]].append(full_data[i])
        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(n_signals)]
                       for i in range(n_classes)]
        class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
        class_cov = []
        classifiers = []

        for i in range(0, n_classes):
            # cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            cov = np.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(0, n_signals):
                class_means[i][j] = np.array(
                    class_data[i][:])[:, [j]].mean(axis=0)
                class_vars[i][j] = np.array(class_data[i][:])[:,
                                                              [j]].var(axis=0)
                class_std[i][j] = np.array(class_data[i][:])[:,
                                                             [j]].std(axis=0)
        print "\n" + str(class_cov) + "\n"
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(n_classes):
            dis = MGD\
                (np.array(class_means[i]).flatten(),
                 np.array(class_cov[i]))
            st = State(dis, name=state_names[i])
            distros.append(dis)
            hmm_states.append(st)
        model = HMM(name="Gait")

        model.add_states(hmm_states)
        """Initial transitions"""
        for i in range(0, n_classes):
            model.add_transition(model.start, hmm_states[i], startprob[i])
        """Left-right model"""
        for i in range(0, n_classes):
            for j in range(0, n_classes):
                model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

        model.bake()

        # print (model.name)
        # rospy.logwarn("N. observations: " + str(model.d))
        # print (model.edges)
        # rospy.logwarn("N. hidden states: " + str(model.silent_start))
        # print model
        """Training"""
        # limit = int(len(ff1)*(8/10.0))    # 80% of data to test, 20% to train
        # x_train = list([ff1[:limit]])
        # x_train = list([ff1,ff2])
        # x_train = list([ff2])
        x_train = []
        for i in range(0, len(ff[leave_one_out - 1])):
            x_train.append(ff[leave_one_out - 1][i])
        for i in range(0, len(ff[(leave_one_out + 1) % n_trials])):
            x_train.append(ff[(leave_one_out + 1) % n_trials][i])
        x_train = list([x_train])
        rospy.logwarn("Training...")
        model.fit(x_train, algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose)
        # model.fit(seq, algorithm='viterbi', verbose='True')
        """Find most-likely sequence"""
        # logp, path = model.viterbi(ff[limit:])
        logp, path = model.viterbi(ff[leave_one_out])
        # print logp
        # print path
        class_labels = []
        for i in range(len(labels[leave_one_out])):
            path_phase = path[i][1].name
            for state in range(n_classes):
                if path_phase == state_names[state]:
                    class_labels.append(state)
        labels[leave_one_out] = list(labels[leave_one_out][1:])
        # Saving classifier labels into csv file
        # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s')
        # rospy.logwarn("csv file with classifier labels was saved.")

        sum = 0.0
        true_pos = 0.0
        false_pos = 0.0
        true_neg = 0.0
        false_neg = 0.0
        tol = 6e-2  # Tolerance window of 60 ms
        tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out])))
        print "FSR freq: " + str(fs_fsr[leave_one_out])
        print "Tolerance win: " + str(tol_window)
        # print tol_window
        # # print type(tol_window)
        # for i in range(0, len(labels[leave_one_out])):
        #     """Tolerance window"""
        #     if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window:
        #         # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
        #         # print curr_tol
        #         win = []
        #         for j in range(i-tol_window,i+tol_window+1):
        #             win.append(state_names[labels[leave_one_out][j]])
        #         if path[i][1].name in win:
        #             sum += 1.0
        #     else:
        #         if path[i][1].name == labels[leave_one_out][i]:
        #             sum += 1.0
        """Performance Evaluation"""
        rospy.logwarn("Calculating results...")
        time_error = [[] for x in range(n_classes)]
        for phase in range(n_classes):
            for i in range(len(labels[leave_one_out])):
                """Tolerance window"""
                if i >= tol_window and i < len(
                        labels[leave_one_out]) - tol_window:
                    # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window]
                    # print curr_tol
                    win = []
                    for j in range(i - tol_window, i + tol_window + 1):
                        win.append(labels[leave_one_out][j])
                    """Calculate time error with true positives"""
                    if class_labels[i] == phase:
                        if class_labels[i] in win:
                            for k in range(len(win)):
                                if win[k] == phase:
                                    time_error[phase].append(
                                        (k - tol_window) /
                                        fs_fsr[leave_one_out])
                                    break
                            true_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_pos"
                        else:
                            false_pos += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_pos"
                    else:
                        if phase != labels[leave_one_out][i]:
                            # if phase not in win:
                            true_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", true_neg"
                        else:
                            false_neg += 1.0
                            if verbose:
                                print phase + ", " + state_names[labels[
                                    leave_one_out][i]] + ", " + class_labels[
                                        i] + ", false_neg"
                else:
                    if class_labels[i] == phase:
                        if class_labels[i] == labels[leave_one_out][i]:
                            true_pos += 1.0
                        else:
                            false_pos += 1.0
                    else:
                        if phase != labels[leave_one_out][i]:
                            true_neg += 1.0
                        else:
                            false_neg += 1.0

        rospy.logwarn("Timing error")
        print("Timing error")
        for phase in range(n_classes):
            rospy.logwarn("(" + state_names[phase] + ")")
            print "(" + state_names[phase] + ")"
            if len(time_error[phase]) > 0:
                rospy.logwarn(
                    str(np.mean(time_error[phase])) + " + " +
                    str(np.std(time_error[phase])))
                print str(np.mean(time_error[phase])) + " + " + str(
                    np.std(time_error[phase]))
            else:
                rospy.logwarn("0.06 + 0")
                print "0.06 + 0"
        """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
        rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
        print("Mean time (MT) and Coefficient of Variance (CoV)")
        n_group = 0
        for label_group in [class_labels, labels[leave_one_out]]:
            if n_group == 0:
                rospy.logwarn("Results for HMM:")
                print("Results for HMM:")
            else:
                rospy.logwarn("Results for FSR:")
                print("Results for FSR:")
            curr_label = -1
            count = 0
            n_phases = 0
            stride_samples = 0
            phases_time = [[] for x in range(n_classes)]
            stride_time = []
            for label in label_group:
                # for label in class_labels:
                if curr_label != label:
                    n_phases += 1
                    stride_samples += count
                    if label == 0:  # Gait start: HS
                        if n_phases == 4:  # If a whole gait cycle has past
                            stride_time.append(stride_samples /
                                               fs_fsr[leave_one_out])
                        n_phases = 0
                        stride_samples = 0
                    phases_time[label - 1].append(count /
                                                  fs_fsr[leave_one_out])
                    curr_label = label
                    count = 1
                else:
                    count += 1.0
            for phase in range(n_classes):
                mean_time = np.mean(phases_time[phase])
                phase_std = np.std(phases_time[phase])
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                              str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
                print("CoV: " + str(phase_std / mean_time * 100.0))
            mean_time = np.mean(stride_time)
            phase_std = np.std(stride_time)
            rospy.logwarn("(Stride)")
            print "(Stride)"
            rospy.logwarn("Mean time: " + str(mean_time) + " + " +
                          str(phase_std))
            print "Mean time: " + str(mean_time) + " + " + str(phase_std)
            rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0))
            print("CoV: " + str(phase_std / mean_time * 100.0))
            n_group += 1
        """Accuracy"""
        # acc = sum/len(labels[leave_one_out])
        if (true_neg + true_pos + false_neg + false_pos) != 0.0:
            acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg +
                                           false_pos)
        else:
            acc = 0.0
        """Sensitivity or True Positive Rate"""
        if true_pos + false_neg != 0:
            tpr = true_pos / (true_pos + false_neg)
        else:
            tpr = 0.0
        """Specificity or True Negative Rate"""
        if false_pos + true_neg != 0:
            tnr = true_neg / (false_pos + true_neg)
        else:
            tnr = 0.0
        # rospy.logwarn("Accuracy: {}%".format(acc*100))
        rospy.logwarn("Accuracy: {}%".format(acc * 100.0))
        # print("Accuracy: {}%".format(acc*100.0))
        rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0))
        # print("Sensitivity: {}%".format(tpr*100.0))
        rospy.logwarn("Specificity: {}%".format(tnr * 100.0))
        # print("Specificity: {}%".format(tnr*100.0))
        """Goodness index"""
        G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2)
        if G <= 0.25:
            rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
            # print("Optimum classifier (G = {} <= 0.25)".format(G))
        elif G > 0.25 and G <= 0.7:
            rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            # print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
        elif G == 0.7:
            rospy.logwarn("Random classifier (G = 0.7)")
            # print("Random classifier (G = 0.7)")
        else:
            rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
Beispiel #10
0
        for j in range(0, n_classes):
            model.add_transition(hmm_states[i], hmm_states[j], t[i][j])
            print(hmm_states[i].name + "(" + str(i) + ")-> " +
                  hmm_states[j].name + "(" + str(j) + ") : " + str(t[i][j]))

    model.bake()

    # seq = list([full_features[:limit]])
    # print skf[2]

    for train_index, test_index in skf:
        X_train = full_features[train_index]
        Y_train = full_labels[train_index]
        X_test = full_features[test_index]
        Y_test = full_labels[test_index]
        model.fit(list([X_train]), algorithm='viterbi', verbose='False')
        if batch == 0:
            logp, path = model.viterbi(list(X_test))
            sum_ = 0.0
            path = path[1:]

            for i in range(0, len(path)):
                # print path[i][1].name + " " + phase_labels[Y_test[i]]
                if path[i][1].name == phase_labels[Y_test[i]]:
                    sum_ += 1.0
            print(str(sum_) + "/" + str(len(Y_test)))
            print(sum_ / float(len(Y_test)))
            print('------------------------------------')
        else:
            b = 0
            while b < len(X_test):
Beispiel #11
0
hmmodel.add_transition(back_state, back_state, 0.9)
hmmodel.add_transition(back_state, fixed_state, 0.1)
hmmodel.add_transition(fixed_state, fixed_state, 0.9)
hmmodel.add_transition(fixed_state, back_state, 0.1)

hmmodel.bake()

seq = list('acgtacgtaaaaccccaaa')

lopg, path = hmmodel.viterbi(seq)

print([x[1].name for x in path])

print(hmmodel.to_json())

to_fit1 = list('acgtacacacacacacac')
to_fit2 = list('acgtacgtacgtacgtacgtacgtacgtcgt')
to_fit3 = list('aaaaacccccaaacc')
to_fit4 = list('aaaaaccgcccaaaccacgtacgtacgtacgtactacgggggg')

lopg, path = hmmodel.viterbi(to_fit4)

print([x[1].name for x in path])

hmmodel.fit([to_fit1, to_fit2, to_fit3, to_fit4])

lopg, path = hmmodel.viterbi(seq)

print([x[1].name for x in path])
print(hmmodel.to_json())
model.bake()

# print (model.name)
rospy.logwarn("N. observations: " + str(model.d))
# print (model.edges)
rospy.logwarn("N. hidden states: " + str(model.silent_start))
# print model
"""Training"""
limit = int(len(ff) * (8 / 10.0))  # 80% of data to test, 20% to train
# seq = list([ff[:limit]])
x_train = list([ff[limit:]])
print len(x_train)
rospy.logwarn("Training...")
# time_start = time.clock()
model.fit(x_train, algorithm='baum-welch', verbose='True')
# model.fit(x_train, algorithm='viterbi', verbose='True')
# time_elapsed = time.clock()- time_start
# rospy.logwarn("Training time: " + str(time_elapsed) + "s")
"""Find most-likely sequence"""
# logp, path = model.viterbi(ff[limit:])
x_test = ff[:limit]
logp, path = model.viterbi(x_test)
sum_ = 0.0
for i in range(0, len(path)):
    if path[i][1].name != 'Gait-start':
        # if path[i][1].name == state_names[labels[i+limit - 1]]:
        if path[i][1].name == state_names[labels[i - 1]]:
            # print path[i][1].name + " = " + state_names[labels[i+limit - 1]]
            sum_ += 1.0
        # else:
Beispiel #13
0
def main():
    rospy.init_node('hmm_trainer')
    phase_pub = rospy.Publisher('/phase', Int32, queue_size=10)
    rospack = rospkg.RosPack()
    packpath = rospack.get_path('exo_control')
    datapath = packpath + "/log/mat_files/"
    verbose = rospy.get_param('~verbose', False)

    """Print console output into text file"""
    sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation.txt", "w")

    """Data loading"""
    n_trials = 3
    n_sub = 9
    healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"]
    patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"]
    study_subs = [healthy_subs, patients]

    dataset = [{} for x in range(len(study_subs))]
    for i in range(len(study_subs)):
        for sub in study_subs[i]:
            dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)],
                               "fder_gyro_y": [[] for x in range(n_trials)],
                               "time": [[] for x in range(n_trials)],
                               "labels": [[] for x in range(n_trials)],
                               "Fs_fsr": 0.0}

    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat")
                for signal in data:
                    if signal not in ["pathol","fder_gyro_y"]:
                        if signal == "Fs_fsr":
                            data[signal] = mat_file[signal][0][0]
                        else:
                            data[signal][trial] = mat_file[signal][0]
    del mat_file

    """Feature extraction"""
    """First derivative"""
    for group in dataset:
        for sub,data in group.iteritems():
            for trial in range(n_trials):
                der = []
                gyro_y = data["gyro_y"][trial]
                der.append(gyro_y[0])
                for i in range(1,len(gyro_y)-1):
                    der.append((gyro_y[i+1]-gyro_y[i-1])/2)
                der.append(gyro_y[-1])
                data["fder_gyro_y"][trial] = der
    del der, sub, data

    """Global variables of cHMM"""
    startprob = [0.25, 0.25, 0.25, 0.25]
    state_names = ['hs', 'ff', 'ho', 'sw']
    n_classes = 4
    n_signals = 2
    tol = 6e-2       # Tolerance window of 60 ms

    # for pathology in range(len(dataset)):
    #     if pathology == 0:
    #         rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**")
    #         print "**Leave-one-out cross validation with HEALTHY subjects**"
    #     else:
    #         rospy.logwarn("**Leave-one-out cross validation with PATIENTS**")
    #         print "**Leave-one-out cross validation with PATIENTS**"
    if True:
        # for lou_sub,lou_data in dataset[pathology].iteritems():       # Iterate through leave-one-out subject's data
        for lou_sub,lou_data in dataset[0].iteritems():       # Iterate through leave-one-out subject's data
            rospy.logwarn("Leave " + lou_sub + " out:")
            print "Leave " + lou_sub + " out:"

            t = np.zeros((4, 4))        # Transition matrix
            prev = -1
            for trial in range(n_trials):
                for label in lou_data["labels"][trial]:
                    if prev == -1:
                        prev = label
                    t[prev][label] += 1.0
                    prev = label
            t = normalize(t, axis=1, norm='l1')
            if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t))

            class_data = [[] for x in range(n_classes)]
            full_lou_data = []
            full_lou_labels = []
            for trial in range(n_trials):
                for sample in range(len(lou_data["gyro_y"][trial])):
                    d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]]
                    l = lou_data["labels"][trial][sample]
                    full_lou_data.append(d)
                    full_lou_labels.append(l)
                    class_data[l].append(d)

            """Multivariate Gaussian Distributions for each hidden state"""
            class_means = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_std = [[[] for x in range(n_signals)] for i in range(n_classes)]
            class_cov = []
            classifiers = []

            for state in range(n_classes):
                cov = np.ma.cov(np.array(class_data[state]), rowvar=False)
                class_cov.append(cov)
                for signal in range(n_signals):
                    class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0)
                    class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0)
                    class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0)

            """Classifier initialization"""
            distros = []
            hmm_states = []
            for state in range(n_classes):
                dis = MGD\
                    (np.array(class_means[state]).flatten(),
                     np.array(class_cov[state]))
                st = State(dis, name=state_names[state])
                distros.append(dis)
                hmm_states.append(st)
            model = HMM(name="Gait")

            model.add_states(hmm_states)
            """Initial transitions"""
            for state in range(n_classes):
                model.add_transition(model.start, hmm_states[state], startprob[state])
            """Left-right model"""
            for i in range(n_classes):
                for j in range(n_classes):
                    model.add_transition(hmm_states[i], hmm_states[j], t[i][j])

            model.bake()

            """Create training and test data"""
            x_train = []
            x_test = []
            test_gyro_y = lou_data["gyro_y"][-1]
            test_fder_gyro_y = lou_data["fder_gyro_y"][-1]
            """Create test data with n-th trial of leave-one-out subject"""
            for sample in range(len(test_gyro_y)):
                x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]])
            """Create training data with n-1 trials of all subjects (patients group)"""
            # if pathology == 1:
            #     for trial in range(n_trials-1):
            #         train_gyro_y = lou_data["gyro_y"][trial]
            #         train_fder_gyro_y = lou_data["fder_gyro_y"][trial]
            #         for sample in range(len(train_gyro_y)):
            #             x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])

            """Create training data with n-1 trials of the rest of subjects (healthy group)"""
            # for train_sub,train_data in dataset[pathology].iteritems():
            for train_sub,train_data in dataset[0].iteritems():
                # if lou_sub != train_sub:
                if True:
                    for trial in range(n_trials-1):
                        train_gyro_y = train_data["gyro_y"][trial]
                        train_fder_gyro_y = train_data["fder_gyro_y"][trial]
                        for sample in range(len(train_gyro_y)):
                            x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]])
            x_train = list([x_train])

            """Training"""
            rospy.logwarn("Training HMM...")
            model.fit(x_train, algorithm='baum-welch', verbose=True)
            # model.fit(x_train, algorithm='viterbi', verbose='True')

            """Find most-likely sequence"""
            logp, path = model.viterbi(x_test)
            class_labels = []
            for i in range(len(lou_data["labels"][-1])):
                path_phase = path[i][1].name
                for state in range(n_classes):
                    if path_phase == state_names[state]:
                        class_labels.append(state)
            # Saving classifier labels into csv file
            np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s')
            rospy.logwarn("csv file with classifier labels was saved.")
            lou_data["labels"][-1] = lou_data["labels"][-1][1:]

            """Results"""
            sum = 0.0
            true_pos = 0.0
            false_pos = 0.0
            true_neg = 0.0
            false_neg = 0.0
            tol_window = int((tol/2) / (1/float(lou_data["Fs_fsr"])))

            rospy.logwarn("Calculating results...")
            time_error = [[] for x in range(n_classes)]
            for phase in range(n_classes):
                for i in range(len(lou_data["labels"][-1])):
                    """Tolerance window"""
                    if i >= tol_window and i < len(lou_data["labels"][-1])-tol_window:
                        win = []
                        for win_label in lou_data["labels"][-1][i-tol_window:i+tol_window+1]:
                            win.append(win_label)
                        if class_labels[i] == phase:
                            if class_labels[i] in win:
                                for k in range(len(win)):
                                    if win[k] == phase:
                                        time_error[phase].append((k-tol_window)/lou_data["Fs_fsr"])
                                        break
                                true_pos += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_pos"
                            else:
                                false_pos += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_pos"
                        else:
                            if phase != lou_data["labels"][-1][i]:
                            # if phase not in win:
                                true_neg += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_neg"
                            else:
                                false_neg += 1.0
                                if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_neg"
                    else:
                        if class_labels[i] == phase:
                            if class_labels[i] == lou_data["labels"][-1][i]:
                                true_pos += 1.0
                            else:
                                false_pos += 1.0
                        else:
                            if phase != lou_data["labels"][-1][i]:
                                true_neg += 1.0
                            else:
                                false_neg += 1.0

            rospy.logwarn("Timing error")
            print "Timing error"
            for phase in range(n_classes):
                rospy.logwarn("(" + state_names[phase] + ")")
                print "(" + state_names[phase] + ")"
                if len(time_error[phase]) > 0:
                    rospy.logwarn(str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase])))
                    print str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))
                else:
                    rospy.logwarn("0.06 + 0")
                    print "0.06 + 0"

            """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)"""
            rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)")
            print "Mean time (MT) and Coefficient of Variance (CoV)"
            n_group = 0
            for label_group in [class_labels, lou_data["labels"][-1]]:
                if n_group == 0:
                    rospy.logwarn("Results for HMM:")
                    print "Results for HMM:"
                else:
                    rospy.logwarn("Results for FSR:")
                    print "Results for FSR:"
                curr_label = -1
                count = 0
                n_phases = 0
                stride_samples = 0
                phases_time = [[] for x in range(n_classes)]
                stride_time = []
                for label in label_group:
                # for label in class_labels:
                    if curr_label != label:
                        n_phases += 1
                        stride_samples += count
                        if label == 0:  # Gait start: HS
                            if n_phases == 4:   # If a whole gait cycle has past
                                stride_time.append(stride_samples/lou_data["Fs_fsr"])
                            n_phases = 0
                            stride_samples = 0
                        phases_time[label-1].append(count/lou_data["Fs_fsr"])
                        curr_label = label
                        count = 1
                    else:
                        count += 1.0
                for phase in range(n_classes):
                    mean_time = np.mean(phases_time[phase])
                    phase_std = np.std(phases_time[phase])
                    rospy.logwarn("(" + state_names[phase] + ")")
                    print "(" + state_names[phase] + ")"
                    rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                    print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                    rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                    print("CoV: " + str(phase_std/mean_time*100.0))
                mean_time = np.mean(stride_time)
                phase_std = np.std(stride_time)
                rospy.logwarn("(Stride)")
                print "(Stride)"
                rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std))
                print "Mean time: " + str(mean_time) + " + " + str(phase_std)
                rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0))
                print("CoV: " + str(phase_std/mean_time*100.0))
                n_group += 1

            """Accuracy"""
            if (true_neg+true_pos+false_neg+false_pos) != 0.0:
                acc = (true_neg + true_pos)/(true_neg + true_pos + false_neg + false_pos)
            else:
                acc = 0.0
            """Sensitivity or True Positive Rate"""
            if true_pos+false_neg != 0:
                tpr = true_pos / (true_pos+false_neg)
            else:
                tpr = 0.0
            """Specificity or True Negative Rate"""
            if false_pos+true_neg != 0:
                tnr = true_neg / (false_pos+true_neg)
            else:
                tnr = 0.0
            rospy.logwarn("Accuracy: {}%".format(acc*100.0))
            print("Accuracy: {}%".format(acc*100.0))
            rospy.logwarn("Sensitivity: {}%".format(tpr*100.0))
            print("Sensitivity: {}%".format(tpr*100.0))
            rospy.logwarn("Specificity: {}%".format(tnr*100.0))
            print("Specificity: {}%".format(tnr*100.0))
            """Goodness index"""
            G = np.sqrt((1-tpr)**2 + (1-tnr)**2)
            if G <= 0.25:
                rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G))
                print("Optimum classifier (G = {} <= 0.25)".format(G))
            elif G > 0.25 and G <= 0.7:
                rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G))
                print("Good classifier (0.25 < G = {} <= 0.7)".format(G))
            elif G == 0.7:
                rospy.logwarn("Random classifier (G = 0.7)")
                print("Random classifier (G = 0.7)")
            else:
                rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
                print("Bad classifier (G = {} > 0.7)".format(G))

    del test_gyro_y, test_fder_gyro_y, train_gyro_y, train_fder_gyro_y, d, l
class RealTimeHMM():
    def __init__(self, n_trials=3, leave_one_out=1):
        """Variable initialization"""
        self.patient = rospy.get_param("gait_phase_det/patient")
        self.verbose = rospy.get_param("gait_phase_det/verbose")
        self.n_trials = n_trials
        self.n_features = 2      # Raw data and 1st-derivative
        self.leave_one_out = leave_one_out
        self.rec_data = 0.0       # Number of recorded IMU data
        self.proc_data = 0.0      # Number of extracted features
        self.win_size = 3
        self.raw_win = [None] * self.win_size
        # self.fder_win = [0] * self.win_size
        self.ff = [[] for x in range(self.n_trials)]      # Training and test dataset
        self.labels = [[] for x in range(self.n_trials)]  # Reference labels from local data
        self.first_eval = True
        self.model_loaded = False
        algorithm = rospy.get_param("gait_phase_det/algorithm")
        rospy.loginfo('Decoding algorithm: {}'.format(algorithm))
        if algorithm not in DECODER_ALGORITHMS:
            raise ValueError("Unknown decoder {!r}".format(algorithm))
        self.decode = {
            "fov": self._run_fov,
            "bvsw": self._run_bvsw
        }[algorithm]
        self.imu_callback = {
            "fov": self._fov_callback,
            "bvsw": self._bvsw_callback
        }[algorithm]
        """HMM variables"""
        ''' State list:
            s1: Heel Strike (HS)
            s2: Flat Foot   (FF)
            s3: Heel Off    (HO)
            s4: Swing Phase (SP)'''
        self.model_name = "Gait"
        self.has_model = False
        self.must_train = False
        self.states = ['s1', 's2', 's3', 's4']
        self.n_states = len(self.states)
        self.state2phase = {"s1": "hs", "s2": "ff", "s3": "ho", "s4": "sp"}
        self.train_data = []
        self.mgds = {}
        self.dis_means = [[] for x in range(self.n_states)]
        self.dis_covars = [[] for x in range(self.n_states)]
        self.start_prob = [1.0/self.n_states]*self.n_states
        self.trans_mat = np.array([(0.9, 0.1, 0, 0), (0, 0.9, 0.1, 0), (0, 0, 0.9, 0.1), (0.1, 0, 0, 0.9)])    # Left-right model
        # self.trans_mat = np.array([0.8, 0.1, 0, 0.1], [0.1, 0.8, 0.1, 0], [0, 0.1, 0.8, 0.1], [0.1, 0, 0.1, 0.8])    # Left-right-left model
        self.log_startprob = []
        self.log_transmat = np.empty((self.n_states, self.n_states))
        self.max_win_len = 11       # ms (120 ms: mean IC duration for healthy subjects walking at comfortable speed)
        self.viterbi_path = np.empty((self.max_win_len+1, self.n_states))
        self.backtrack = [[None for x in range(self.n_states)] for y in range(self.max_win_len+1)]
        self.global_path = []
        self.work_buffer = np.empty(self.n_states)
        self.boundary = 1
        self.buff_len = 0
        self.states_pos = {}
        for i in range(len(self.states)):
            self.states_pos[self.states[i]] = i
        self.last_state = -1
        self.curr_state = -1
        self.conv_point = 0
        self.conv_found = False
        self.smp_freq = 100.0   # Hz
        self.fp_thresh = 1/self.smp_freq*4    # Threshold corresponds to 8 samples
        self.time_passed = 0.0
        self.obs = [[None for x in range(self.n_features)] for y in range(self.max_win_len)]
        self.model = HMM(name=self.model_name)
        """ROS init"""
        rospy.init_node('real_time_HMM', anonymous=True)
        rospack = rospkg.RosPack()
        self.packpath = rospack.get_path('hmm_gait_phase_classifier')
        self.init_subs()
        self.init_pubs()
        """HMM-training (if no model exists)"""
        try:
            '''HMM-model loading'''
            with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt') as infile:
                json_model = json.load(infile)
                self.model = HMM.from_json(json_model)
                rospy.logwarn(self.patient + "'s HMM model was loaded.")
                self.has_model = True
        except IOError:
            if os.path.isfile(self.packpath + "/log/mat_files/" + self.patient + "_proc_data1.mat"):
                """Training with data collected with FSR-based reference system"""
                self.data_ext = 'mat'
                self.must_train = True
            elif os.path.isfile(self.packpath + "/log/IMU_data/" + self.patient + "_labels.csv"):
                """Training with data collected with offline threshold-based gait phase detection method"""
                self.data_ext = 'csv'
                self.must_train = True
            else:
                rospy.logerr("Please collect data for training ({})!".format(self.patient))
        if self.must_train:
            rospy.logwarn("HMM model not trained yet for {}!".format(self.patient))
            rospy.logwarn("Training HMM with local data...")
            self.load_data()
            self.init_hmm()
            self.train_hmm()
            self.has_model = True
        if self.has_model:
            try:
                '''MGDs loading if model exists'''
                for st in self.states:
                    with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st]+'.txt') as infile:
                        yaml_dis = yaml.safe_load(infile)
                        dis = MGD.from_yaml(yaml_dis)
                        self.mgds[st] = dis
                        rospy.logwarn(self.patient +"'s " + self.state2phase[st] + " MGC was loaded.")
                        '''Loading means and covariance matrix'''
                        self.dis_means[self.states_pos[st]] = self.mgds[st].parameters[0]
                        self.dis_covars[self.states_pos[st]] = self.mgds[st].parameters[1]
            except yaml.YAMLError as exc:
                rospy.logwarn("Not able to load distributions: " + exc)
            """Transition and initial (log) probabilities matrices upon training"""
            trans_mat = self.model.dense_transition_matrix()[:self.n_states,:self.n_states]
            if self.verbose: print '**TRANSITION MATRIX (post-training)**\n'+ str(trans_mat)
            for i in range(self.n_states):
                self.log_startprob.append(ln(self.start_prob[i]))
                for j in range(self.n_states):
                    self.log_transmat[i,j] = ln(trans_mat[i][j])
            self.model_loaded = True

    """Init ROS publishers"""
    def init_pubs(self):
        self.phase_pub = rospy.Publisher('/gait_phase', Int8, queue_size=100)

    """Init ROS subcribers"""
    def init_subs(self):
        rospy.Subscriber('/imu_data', Imu, self.imu_callback)

    """Callback function upon arrival of IMU data for forward-only decoding"""
    def _fov_callback(self, data):
        self.rec_data += 1.0
        self.raw_win.append(data.angular_velocity.y)
        self.raw_win.pop(0)       # Drop first element

        if self.rec_data >= self.win_size and self.model_loaded:      # At least one previous and one subsequent data should have been received
            """Extract feature and append it to test dataset"""
            fder = (self.raw_win[self.win_size/2 + 1] - self.raw_win[self.win_size/2 - 1])/2
            # peak_detector = self.raw_win[self.win_size/2]/max(self.raw_win)
            # self.fder_win.append(fder)
            # self.fder_win.pop(0)
            # sder = (self.fder_win[2] - self.fder_win[0])/2
            # test_set = [self.raw_win[self.win_size/2], self.raw_win[self.win_size/2 - 2], self.raw_win[self.win_size/2 - 1], self.raw_win[self.win_size/2 + 1], self.raw_win[self.win_size/2 + 2]]         # Temporally proximal features
            test_set = [self.raw_win[self.win_size/2], fder]         # Temporally proximal features
            '''Forward-only decoding approach'''
            state = self.decode(test_set)
            # rospy.loginfo("Decoded phase: {}".format(state))
            self.time_passed += 1/self.smp_freq
            if self.last_state == state:
                if (self.time_passed >= self.fp_thresh) and (self.curr_state == 3 and state == 0) or (state == self.curr_state + 1):
                    self.curr_state = state
                    self.phase_pub.publish(state)
                else:
                    # rospy.loginfo("Current phase: {}".format(self.state2phase[self.states[self.curr_state]]))
                    # self.phase_pub.publish((self.curr_state-1)%4)
                    self.phase_pub.publish(self.curr_state)
            else:
                self.last_state = state
                self.time_passed = 1/self.smp_freq
                # rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[self.last_state]]))

    """Callback function upon arrival of IMU data for BVSW"""
    def _bvsw_callback(self, data):
        self.rec_data += 1.0
        self.raw_win.append(data.angular_velocity.y)
        self.raw_win.pop(0)       # Drop first element

        if self.rec_data >= 3 and self.model_loaded:      # At least one previous and one subsequent data should have been received
            """Extract feature and append it to test dataset"""
            test_set = [self.raw_win[1], (self.raw_win[0]+self.raw_win[2])/2]    # First-derivate of angular velocity
            '''Bounded sliding window decoding approach'''
            self.obs.append(test_set)
            self.obs.pop(0)                     # This way, -1 element corresponds to last received features
            states = self.decode(test_set)
            if len(states) != 0:
                for st in states:
                    self.phase_pub.publish(st)
                    if self.curr_state != st:
                        rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[st]]))
                        self.curr_state = st
                self.proc_data += 1.0     # One gyro data has been processed

    """Local data loading from mat file and feature extraction"""
    def load_data(self):
        """Data loading"""
        '''Load mat file (Data processed offline in Matlab)'''
        if self.data_ext == 'mat':
            rospy.logwarn("Initializing parameters via FSR-based reference system")
            # subs = ['daniel', 'erika', 'felipe', 'jonathan', 'luis', 'nathalia', 'paula', 'pedro', 'tatiana']      # Healthy subjects
            # subs = ['carmen', 'carolina', 'catalina', 'claudia', 'emmanuel', 'fabian', 'gustavo']      # Pathological subjects
            # subs = ['daniel']      # Single subject
            datapath = self.packpath + "/log/mat_files/"
            gyro_y = [[] for x in range(self.n_trials)]
            time_array = [[] for x in range(self.n_trials)]
            # for patient in subs:
            for i in range(self.n_trials):
                data = scio.loadmat(datapath + self.patient + "_proc_data" + str(i+1) + ".mat")
                # data = scio.loadmat(datapath + patient + "_proc_data" + str(i+1) + ".mat")
                gyro_y[i] = data["gyro_y"][0]
                # gyro_y[i] += list(data["gyro_y"][0])
                time_array[i] = data["time"][0]
                # time_array[i] += list(data["time"][0])
                self.labels[i] = data["labels"][0]
                # self.labels[i] += list(data["labels"][0])

            """Feature extraction"""
            '''First derivative'''
            fder_gyro_y = []
            for i in range(self.n_trials):
                der = []
                der.append(gyro_y[i][0])
                for j in range(1,len(gyro_y[i])-1):
                    der.append((gyro_y[i][j+1]-gyro_y[i][j-1])/2)
                der.append(gyro_y[i][-1])
                fder_gyro_y.append(der)

            '''Second derivative'''
            # sder_gyro_y = []
            # for i in range(self.n_trials):
            #     der = []
            #     der.append(fder_gyro_y[i][0])
            #     for j in range(1,len(fder_gyro_y[i])-1):
            #         der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2)
            #     der.append(fder_gyro_y[i][-1])
            #     sder_gyro_y.append(der)

            '''Peak detector'''
            # peak_detector = []
            # for i in range(self.n_trials):
            #     win = []
            #     for j in range(len(gyro_y[i])):
            #         if (j - self.win_size/2) < 0:
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][0:j + self.win_size/2]))
            #         elif (j + self.win_size/2) > (len(gyro_y[i])-1):
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:len(gyro_y[i])-1]))
            #         else:
            #             win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:j + self.win_size/2]))
            #     peak_detector.append(win)

            """Create training data"""
            for j in range(self.n_trials):
                # for k in range(self.win_size/2,len(gyro_y[j])-1-self.win_size/2):
                for k in range(len(gyro_y[j])):
                    f_ = []
                    f_.append(gyro_y[j][k])
                    '''Approximate differentials'''
                    f_.append(fder_gyro_y[j][k])
                    # f_.append(sder_gyro_y[j][k])
                    '''Temporally proximal feature'''
                    # f_.append(gyro_y[j][k-1])
                    # f_.append(gyro_y[j][k-2])
                    # f_.append(gyro_y[j][k+1])
                    # f_.append(gyro_y[j][k+2])
                    '''Peak detector'''
                    # f_.append(peak_detector[j][k])
                    self.ff[j].append(f_)
            self.ff = np.array(self.ff)
            self.n_features = len(self.ff[0][0])

            for i in range(len(self.ff[self.leave_one_out-1])):
                self.train_data.append(self.ff[self.leave_one_out-1][i])
            for i in range(len(self.ff[(self.leave_one_out+1) % self.n_trials])):
                self.train_data.append(self.ff[(self.leave_one_out+1) % self.n_trials][i])
            self.train_data = [self.train_data]       # Keep this line or training won't work

        """Parameter initialization"""
        '''Kmeans clustering'''
        # n_components = 4       # No. components of MGD
        # init = 'kmeans++'      # "kmeans||", "first-k", "random"
        # n_init = 1
        # weights = None
        # max_kmeans_iterations = 1
        # batch_size = None
        # batches_per_epoch = None
        #
        # X_concat = x_train
        #
        # # X_concat = numpy.concatenate(x_train)
        # # if X_concat.ndim == 1:
        # #     X_concat = X_concat.reshape(X_concat.shape[0], 1)
        # # n, d = X_concat.shape
        #
        # rospy.logwarn("K-means clustering...")
        # clf = Kmeans(n_components, init=init, n_init=n_init)
        # clf.fit(X_concat, weights, max_iterations=max_kmeans_iterations,
        #     batch_size=batch_size, batches_per_epoch=batches_per_epoch)
        # y = clf.predict(X_concat)
        #
        # rospy.logwarn("Creating distributions...")
        # class_data = [[] for x in range(self.n_states)]
        # for i in range(len(x_train)):
        #     class_data[y[i]].append(x_train[i])
        # if self.verbose:
        #     sum = 0
        #     print "Kmeans clusters:"
        #     for i in range(self.n_states):
        #         temp = len(class_data[i])
        #         sum += temp
        #         print temp
        #     print sum, len(x_train)

        """Offline threshold-based classification"""
        if self.data_ext == 'csv':
            rospy.logwarn("Initializing parameters via threshold-based gait phase detection method")
            self.leave_one_out = 0
            gyro_y = []
            datapath = self.packpath + '/log/IMU_data/'
            with open(datapath+'{}.csv'.format(self.patient), 'r') as imu_file:
                reader = csv.DictReader(imu_file)
                for row in reader:
                    gyro_y.append(float(dict(row)['gyro_y']))
            '''Feature extraction: 1st derivative'''
            for i in range(1, len(gyro_y)-1):
                feature_vect = [gyro_y[i], (gyro_y[i+1]-gyro_y[i-1])/2]
                self.train_data.append(feature_vect)
                self.ff[self.leave_one_out].append(feature_vect)
            '''Reference labels'''
            with open(datapath+'{}_labels.csv'.format(self.patient), 'r') as labels_file:
                reader = csv.reader(labels_file)
                for row in reader:
                    self.labels[self.leave_one_out].append(int(row[0]))
            self.train_data = [self.train_data]

    """Init HMM if no previous training"""
    def init_hmm(self):
        if self.data_ext == 'mat':
            rospy.logwarn("-------Leaving trial {} out-------".format(self.leave_one_out+1))

        """Transition matrix (A)"""
        '''Transition matrix from reference labels'''
        # prev = -1
        # for i in range(len(self.labels[self.leave_one_out])):
        #     if prev == -1:
        #         prev = self.labels[self.leave_one_out][i]
        #     self.trans_mat[prev][self.labels[self.leave_one_out][i]] += 1.0
        #     prev = self.labels[self.leave_one_out][i]
        # self.trans_mat = normalize(self.trans_mat, axis=1, norm='l1')
        if self.verbose: rospy.logwarn("**TRANSITION MATRIX (pre-training)**\n" + str(self.trans_mat) + '\nMatrix type: {}'.format(type(self.trans_mat)))

        class_data = [[] for x in range(self.n_states)]
        for i in range(len(self.ff[self.leave_one_out])):
            class_data[self.labels[self.leave_one_out][i]].append(self.ff[self.leave_one_out][i])

        """Multivariate Gaussian Distributions for each hidden state"""
        class_means = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        # class_vars = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        # class_std = [[[] for x in range(self.n_features)] for i in range(self.n_states)]
        class_cov = []
        for i in range(self.n_states):
            cov = np.ma.cov(np.array(class_data[i]), rowvar=False)
            class_cov.append(cov)
            for j in range(self.n_features):
                class_means[i][j] = np.array(class_data[i][:])[:, [j]].mean(axis=0)
                # class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0)
                # class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0)
        """Classifier initialization"""
        distros = []
        hmm_states = []
        for i in range(self.n_states):
            dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i]))
            st = State(dis, name=self.states[i])
            distros.append(dis)
            hmm_states.append(st)
        self.model.add_states(hmm_states)
        '''Initial transitions'''
        for i in range(self.n_states):
            self.model.add_transition(self.model.start, hmm_states[i], self.start_prob[i])
        '''Left-right model'''
        for i in range(self.n_states):
            for j in range(self.n_states):
                self.model.add_transition(hmm_states[i], hmm_states[j], self.trans_mat[i][j])
        '''Finish model setup'''
        self.model.bake()

    """Train initialized model"""
    def train_hmm(self):
        rospy.logwarn("Training initialized model...")
        self.model.fit(self.train_data, algorithm='baum-welch', verbose=self.verbose)
        self.model.freeze_distributions()     # Freeze all model distributions, preventing update from ocurring
        if self.verbose: print "**HMM model:\n{}**".format(self.model)

        """Save Multivariate Gaussian Distributions into yaml file"""
        for st in self.model.states:
            if st.name != self.model_name+"-start" and st.name != self.model_name+"-end":
                dis = st.distribution
                dis_yaml = dis.to_yaml()
                try:
                    with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st.name]+'.txt', 'w') as outfile:
                        yaml.dump(dis_yaml, outfile, default_flow_style=False)
                    rospy.logwarn(self.patient+"'s "+self.state2phase[st.name]+" distribution was saved.")
                except IOError:
                    rospy.logwarn('It was not possible to write GMM distribution.')
        """Save model (json script) into txt file"""
        model_json = self.model.to_json()
        try:
            with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt', 'w') as outfile:
                json.dump(model_json, outfile)
            rospy.logwarn(self.patient+"'s HMM model was saved.")
        except IOError:
            rospy.logwarn('It was not possible to write HMM model.')

    """Compute the log probability under a multivariate Gaussian distribution.
    Parameters
    ----------
        X : array_like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row corresponds to a
            single data point.
        means : array_like, shape (n_components, n_features)
            List of n_features-dimensional mean vectors for n_components Gaussians.
            Each row corresponds to a single mean vector.
        covars : array_like
            List of n_components covariance parameters for each Gaussian. The shape
            is (n_components, n_features, n_features) if 'full'
        Returns
    ----------
        lpr : array_like, shape (n_samples, n_components)
            Array containing the log probabilities of each data point in
            X under each of the n_components multivariate Gaussian distributions."""
    def log_multivariate_normal_density(self, X, min_covar=1.e-7):
        """Log probability for full covariance matrices."""
        n_samples, n_dim = X.shape
        nmix = len(self.dis_means)
        log_prob = np.empty((n_samples, nmix))
        for c, (mu, cv) in enumerate(zip(self.dis_means, self.dis_covars)):
            try:
                cv_chol = linalg.cholesky(cv, lower=True)
            except linalg.LinAlgError:
                # The model is most probably stuck in a component with too
                # few observations, we need to reinitialize this components
                try:
                    cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
                                              lower=True)
                except linalg.LinAlgError:
                    raise ValueError("'covars' must be symmetric, "
                                     "positive-definite")

            cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
            cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
            log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) +
                                     n_dim * np.log(2 * np.pi) + cv_log_det)
        return log_prob

    """Find argument (pos) that has the maximum value in array-like object"""
    def _argmax(self, X):
        X_max = float("-inf")
        pos = 0
        for i in range(X.shape[0]):
            if X[i] > X_max:
                X_max = X[i]
                pos = i
        return pos

    """Find max value in array-like object"""
    def _max(self, X):
        return X[self._argmax(X)]

    """Backtracking process to decode most-likely state sequence"""
    def _optim_backtrack(self, k):
        opt = []
        self.last_state = where_from = self._argmax(self.viterbi_path[k])
        opt.append(where_from)
        for lp in range(k-1, -1, -1):
            opt.insert(0, self.backtrack[lp + 1][where_from])
            where_from = self.backtrack[lp + 1][where_from]
        self.global_path.extend(opt)
        return opt

    '''Forward-only decoding approach'''
    def _run_fov(self, test_set):
        # Probability distribution of state given observation
        framelogprob = self.log_multivariate_normal_density(np.array([test_set]))
        if self.first_eval:
            for i in range(self.n_states):
                self.viterbi_path[0, i] = self.log_startprob[i] + framelogprob[0, i]
            self.first_eval = False
            return self._argmax(self.viterbi_path[0])
        else:       # Recursion
            for i in range(self.n_states):
                for j in range(self.n_states):
                    self.work_buffer[j] = (self.log_transmat[j, i] + self.viterbi_path[0, j])
                self.viterbi_path[1, i] = self._max(self.work_buffer) + framelogprob[0, i]
            self.viterbi_path[0] = self.viterbi_path[1]       # Prepare for next feature vector
            return self._argmax(self.viterbi_path[1])

    '''Bounded sliding variable window approach'''
    def _run_bvsw(self, test_set):
        framelogprob = self.log_multivariate_normal_density(np.array([test_set]))
        if self.first_eval:
            for i in range(self.n_states):
                self.viterbi_path[0,i] = self.log_startprob[i] + framelogprob[0,i]
                self.backtrack[0][i] = None
            self.first_eval = False
            return []
        else:
            '''Find likelihood probability and backpointer'''
            for j in range(self.n_states):
                for i in range(self.n_states):
                    self.work_buffer[i] = self.viterbi_path[self.boundary - 1][i] + self.log_transmat[i, j]
                self.viterbi_path[self.boundary][j] = self._max(self.work_buffer) + framelogprob[0][j]
                self.backtrack[self.boundary][j] = self._argmax(self.work_buffer)
            '''Backtracking local paths'''
            local_paths = [[] for x in range(self.n_states)]
            for j in range(self.n_states):
                where_from = j
                for smp in range(self.boundary-1, -1, -1):
                    local_paths[j].insert(0, self.backtrack[smp+1][where_from])
                    where_from = self.backtrack[smp+1][where_from]
            # if self.verbose:
            #     print "\n{}, {}".format(t, b)
            #     for path in local_paths:
            #         print path
            '''Given all local paths, find fusion point'''
            tmp = [None] * self.n_states
            for k in range(len(local_paths[0])-1, 0, -1):
                for st in range(self.n_states):
                    tmp[st] = local_paths[st][k]
                if tmp.count(tmp[0]) == len(tmp):      # All local paths point to only one state?
                    self.conv_found = True
                    self.conv_point = k
                    # if self.verbose: print "Found, {}".format(k)
                    break
            '''Find local path if fusion point was found'''
            if self.boundary < self.max_win_len and self.conv_found:
                self.buff_len += self.conv_point
                opt = self._optim_backtrack(self.conv_point)
                self.conv_found = False
                # if self.verbose: print "\nOpt1: " + str(opt) + ", {}".format(len(self.global_path))
                '''Reinitialize local variables'''
                for i in range(self.n_states):
                    if i == self.last_state:
                        self.log_startprob[i] = ln(1.0)
                    else:
                        self.log_startprob[i] = ln(0.0)
                    self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i]
                    self.backtrack[0][i] = None
                for smp in range(1, self.boundary-self.conv_point+1):
                    for j in range(self.n_states):
                        for i in range(self.n_states):
                            self.work_buffer[i] = self.viterbi_path[smp - 1][i] + self.log_transmat[i, j]
                        self.viterbi_path[smp][j] = self._max(self.work_buffer) + self.log_multivariate_normal_density(np.array([self.obs[self.conv_point-self.boundary+smp-1]]))[0,j]
                        self.backtrack[smp][j] = self._argmax(self.work_buffer)
                self.boundary -= self.conv_point-1
                return opt
            elif self.boundary >= self.max_win_len:
                '''Bounding threshold was exceeded'''
                self.buff_len += self.max_win_len
                opt = self._optim_backtrack(self.boundary-1)
                # if self.verbose: print "\nOpt2: " + str(opt) + ", {}".format(len(self.global_path))
                '''Reinitialize local variables'''
                self.boundary = 1
                for i in range(self.n_states):
                    if i == self.last_state:
                        self.log_startprob[i] = ln(1.0)
                    else:
                        self.log_startprob[i] = ln(0.0)
                    self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i]
                    self.backtrack[0][i] = None
                return opt
            else:
                self.boundary += 1
                return []
model = HiddenMarkovModel()

model.add_state(back)
model.add_state(back2)
add_sequence(model, start_states)

model.add_transition(model.start, back, 1)
model.add_transition(back, back, 0.55)
model.add_transition(back, start_states[0], 0.45)
model.add_transition(start_states[-1], back2, 1)
model.add_transition(back2, back2, 0.5)

model.bake()

test(model)

lines = []
with open('/home/zippyttech/Projects/personal/PerKalk/train_start2.exa') as fi:
    for line in fi:
        lines.append(converter_to(line.replace('\n', '')))

model.fit(lines,
          transition_pseudocount=1,
          emission_pseudocount=1,
          verbose=True)

test(model)

with open('partial_model_start_model.json', 'w') as out:
    out.write(model.to_json())