예제 #1
0
def prepare_training(list_of_training_patients):
    # creates a giant hidden sequence array and train_df from all training patients

    # create array of hidden state sequence of traning dataset
    training_class_array = []

    # create df of observation sequence of traning dataset
    train_df = pd.DataFrame()

    for j in range(list_of_training_patients.shape[0]):

        path = data_path + str(list_of_training_patients['file_name'][j])

        if os.path.isfile(path) == True:

            data = dp.data_import(path)
            binary_features = [
                "Gain", "Bradycardia", "LegMovement", "CentralApnea",
                "Arousal", "Hypopnea", "RelativeDesaturation", "Snore",
                "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia"
            ]
            for feature in binary_features:
                if feature in data.columns:
                    data = data.drop(feature, axis=1)
            df1 = data.pop('hypnogram_User')
            data['hypnogram_User'] = df1
            data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data(
                data=data)
            training_class_array.append(hidden_sequence)
            train_df = train_df.append(train)
        else:
            print('File not found.')
            pass

    feature_names = data.drop(['hypnogram_User', 'hypnogram_Machine'],
                              axis=1).columns.values.tolist()
    del data, observation_sequence, test

    return train_df, training_class_array, feature_names,
예제 #2
0
    dist, state_names = hmm_dist.gauss_kernel_dist(feature_names)
    print('Observation probabilities prepared')

    # * initiate HMM *

    model = pg.NaiveBayes(dist)

    # * test the model

    list_of_testing_patients = list_of_testing_patients.reset_index()

    for k in range(list_of_testing_patients.shape[0]):
        path = data_path + str(list_of_testing_patients['file_name'][k])

        if os.path.isfile(path) == True:
            patient_data = dp.data_import(path)
            binary_features = ["Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal", "Hypopnea",
                                       "RelativeDesaturation", "Snore", "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia"]
            for feature in binary_features:
                if feature in patient_data.columns:
                    patient_data = patient_data.drop(feature, axis=1)
            df1 = patient_data.pop('hypnogram_User')
            patient_data['hypnogram_User'] = df1
            n_features = patient_data.shape[1] - 2
            data_columns, hidden_sequence, observation_sequence, train1, test = preprocess_data(data=patient_data)

            test_observation_sequence = train1.iloc[:, 0:n_features].values.tolist()

        path = model.predict(test_observation_sequence)

        conf_hmm = metrics.confusion_matrix(hidden_sequence, [state_names[id] for id in path], states)
예제 #3
0
    "experimenty/list_of_patients_with_attributes.csv")
#LEARN MODEL ON FIRST TRAINING SET PATIENT
score = []
print("zacal")

for i in range(0, len(list_of_patients['file_name'])):
    list_of_testing_patients = list_of_patients.iloc[[i]]
    list_of_testing_patients = list_of_testing_patients.reset_index()
    list_of_training_patients = list_of_patients.drop([i], axis=0)
    list_of_training_patients = list_of_training_patients.reset_index()
    print(".")

    # PREPROCESS TESTING FILE
    testing_patient_path = "Data/" + str(
        list_of_testing_patients['file_name'][0])
    testing_patient_data = dp.data_import(testing_patient_path)
    binary_features = [
        "Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal",
        "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea",
        "MixedApnea", "LongRR", "Tachycardia"
    ]
    for feature in binary_features:
        if feature in testing_patient_data.columns:
            testing_patient_data = testing_patient_data.drop(feature, axis=1)
    df1 = testing_patient_data.pop('hypnogram_User')
    testing_patient_data['hypnogram_User'] = df1
    testing_patient_data = testing_patient_data.drop(['hypnogram_Machine'],
                                                     axis=1)

    test_observation_sequence = testing_patient_data.iloc[:, :-1].values
    test_hidden_sequence = testing_patient_data.iloc[:, -1].values
import os
import pandas as pd
import hmm as myhmm
import matplotlib.pyplot as plt
import data_preprocessing as dp

#hypnogram of average person
average_person = dp.data_import(
    '/Users/kristina/PycharmProjects/vyskumak/Data/12.10.2016-Z-M-39let.csv')
average_person['time'] = [(0.5 / 60 * i) for i in range(0, 973)]
cleanup_nums = {
    "hypnogram_User": {
        "Wake": 5,
        "REM": 4,
        "NonREM1": 3,
        "NonREM2": 2,
        "NonREM3": 1
    }
}
average_person.replace(cleanup_nums, inplace=True)
plt.plot(average_person['time'], average_person['hypnogram_User'])
y = [5, 4, 3, 2, 1]
labels = ["Wake", "REM", "NonREM1", "NonREM2", "NonREM3"]
plt.yticks(y, labels)
plt.xlabel('Measurement length [hours]')
plt.ylabel('Sleep stage')
plt.title('Hypnogram of average person from dataset Hradec Kralove')
plt.show()

#age histogram
plt.hist(males['age'], 10, color='grey')
예제 #5
0
for file in os.listdir(directory):

    try:
        filename = os.fsdecode(file)

        if filename.endswith(".csv") and forbidden_ps1[
                forbidden_ps1['Pacient'].str.contains(
                    filename[:-4])].empty == True:

            path = str(directory)[2:-1] + "/" + str(filename)
            y_pred = pd.read_csv(path, delim_whitespace=True, header=0)
            preds.extend(y_pred['0'])

            path1 = hs_dir + str(filename)
            y = dp.data_import(path1)
            for i in reversed(range(0, len(y['hypnogram_User']))):
                if y['hypnogram_User'][i] == "NotScored":
                    y = y.drop([i])
            hs.extend(y['hypnogram_User'])

            if len(y_pred['0']) != len(y['hypnogram_User']):
                print(
                    str(filename) + ' ' + str(len(y_pred['0'])) + ' ' +
                    str(len(y['hypnogram_User'])))

    except:
        pass

difs = [0 if preds[i] != hs[i] else 1 for i in range(0, len(hs))]
dif = pd.DataFrame({'difs': difs})