Example #1
0
def create_dataset(dataset_path, mode):
    print dataset_path
    if mode == 'test':
        start_data = 1500 - VEC_SIZE
    else:
        start_data = VEC_SIZE
    data = pd.read_csv(dataset_path)
    vec = np.zeros(shape=(len(data["Open"]) - VEC_SIZE + 1, 1, VEC_SIZE,
                          CHANNELS))  # MAYBE ADD CH
    if network_mode == 'reg':
        label = np.zeros(shape=(len(data["Open"]) - VEC_SIZE + 1, CHANNELS))
    else:
        label = np.zeros(
            shape=(len(data["Open"]) - VEC_SIZE + 1,
                   INTERVALS_NUM))  #maybe problem without channels
    if DATA_MODE == 'random':
        list_num, input_vectors, input_label = prepare_data_vectors_rand(
            data, start_data, vec, label, VEC_SIZE, LIMIT, network_mode)
        data_set = list(
            get_data(BATCH_SIZE, list_num, input_vectors, input_label))
    else:
        data_set = list(
            prepare_data_vectors(data, start_data, vec, label, VEC_SIZE,
                                 BATCH_SIZE, LIMIT, network_mode))
    if DATA_MODE == 'random_batch':
        shuffle(data_set)
    print "data set size:", len(data_set)
    # print "data_vec: " ,data_set[0][0]
    # print "data_label: ", data_set[0][1]
    return data_set
Example #2
0
def main():
    data_x, data_y, ecg_fnames = dgen.get_data(
        # n_files=10,
        location=cfg.pulse_data_location,
        return_fnames = True,
        channels = np.array(range(cfg.n_channels)),
        # norm = cfg.normalize_data,
        targets = cfg.targets,
        extension = "." + cfg.file_extension
    )

    # data_x, data_y, fnames = dprep.extract_windows(
    #     all_ecg_data_x, 
    #     all_ecg_data_y,
    #     cfg.nn_input_size,
    #     fnames = ecg_fnames,
    #     verbosity = cfg.verbosity
    # )

    if cfg.logging:
        with open(cfg.log_location + "log_" + cfg.t + ".csv", 'a') as csvlog:
            csvlog.write("t,lead,split_train,split_val,split_test,epochs,split_on,train_size,validation_size,test_size,loss,accuracy,precision,recall,ROC-AUC,PR-AUC,F1\n")

    model_save_name = cfg.model_save_name

    af_r_f = open('model/af_ratio_predictor.txt', 'w')
    for lead in cfg.leads:
        run_training_session(data_x, data_y, model_save_name, ecg_fnames, lead)

    af_r_f.close()
Example #3
0
def convert_and_process():
    # convert_ecgs()
    convert_xmls()
    data_x, data_y, fnames = dgen.get_data(
        return_fnames=True, location=cfg.converted_data_location)
    processed_data_x = dprep.preprocess_data(data_x)

    dprep.save_data(processed_data_x, data_y, cfg.processed_data_location,
                    fnames)
    save_pulse_data()
Example #4
0
def save_pulse_data(fmt='%.6f'):
    data, targets, fnames = dgen.get_data(
        # n_files=10,
        return_fnames=True,
        targets=["AF", "SR"])

    data, targets, fnames = dprep.extract_windows(data,
                                                  targets,
                                                  cfg.nn_input_size,
                                                  fnames=fnames)

    for fname, pulse in zip(fnames, data):
        np.savetxt(cfg.pulse_data_location + fname,
                   pulse,
                   delimiter=',',
                   fmt=fmt)
Example #5
0
def convert_dwt_images(lead):
    data_x, data_y, fnames = dgen.get_data(
        # n_files=1,
        targets=cfg.targets,
        return_fnames=True,
        channels=[lead],
        norm=True)

    for i, ecg in enumerate(data_x):
        title = fnames[i].split('.')[0]
        save_wavelet_img([i for i in range(data_x.shape[1])],
                         ecg[:, 0],
                         np.arange(1, 128, 2),
                         title=title)
        # used_fnames[fnames[i]] += 1
        progress_bar("Converting to DWT image", i, data_x.shape[0])
import os, sys
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
import data_generator as ds
import calculate_param as cm

random.seed(400)
np.random.seed(78)

x_coord,y_coord,real_cov1, real_cov2, real_x1, real_y1, real_x2, real_y2 = ds.get_data(5,10, 10,12,7,4)

data_set = zip(x_coord,y_coord)
x_coord = np.array(x_coord)
y_coord = np.array(y_coord)

# get initial parameters
cos_phi, centers, cov_matrix_1, cov_matrix_2, alpha_1, alpha_2 = cm.get_initial_parameters(data_set, x_coord, y_coord,2)

def E_Step(centers, cov_matrix_1, cov_matrix_2, data_set, alpha_1, alpha_2): 
    mu_x = centers[0][0]
    mu_y = centers[0][1]
    mu_x1 = centers[1][0]
    mu_y1 = centers[1][1]

    G_1 = cm.get_gaussian(cov_matrix_1, data_set, mu_x, mu_y)
    G_2 = cm.get_gaussian(cov_matrix_2, data_set, mu_x1,mu_y1)
    Q_1,Q_2 = cm.get_distribution(data_set,cos_phi,cov_matrix_1, cov_matrix_2, mu_x, mu_y, mu_x1, mu_y1,alpha_1,alpha_2)

    return G_1,G_2, Q_1, Q_2
Example #7
0
import config
from data_generator import data_generate, get_data
import torch.nn as nn
import torch.optim as optim
import torch
import os
from model import DD_CNN
from evaluate import evaluate_data

print('torch.cuda.is_available() is ', torch.cuda.is_available())

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

x_train, y_train, x_test, y_test = data_generate()
batch_size = config.batch_size
train_data, test_data = get_data(x_train, y_train, x_test, y_test, batch_size)

net = DD_CNN()


def accuracy(predictions, labels):
    pred = torch.max(predictions, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)


criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters(),
                        lr=0.001,
                        betas=(0.9, 0.999),
                        eps=1e-08,
Example #8
0
def generator(location, fnames, lead):
    for fname in fnames:
        data_x, data_y = dgen.get_data(location=location, open_files=[fname], verbosity=False)
        yield data_x[:, :, lead], data_y
Example #9
0
    and atrial fibrillation labels.
Authors: Florian Schroevers
"""

import data_preprocessing as dprep
import data_generator as dgen
from global_params import cfg

import numpy as np
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

data_x, data_y, fnames = dgen.get_data(return_fnames=True,
                                       channels=np.array([cfg.lead]),
                                       norm=True,
                                       exclude_targets=[1, 2, 3, 4])
data_x_sine, _ = dprep.extract_windows(data_x, data_y)

data_x, data_y, fnames = dgen.get_data(return_fnames=True,
                                       channels=np.array([cfg.lead]),
                                       norm=True,
                                       exclude_targets=[0, 2, 3, 4])
data_x_afib, _ = dprep.extract_windows(data_x, data_y)

plt.plot(np.mean(data_x_sine, axis=0), c="g", label="Sinus rythm")
plt.plot(np.mean(data_x_afib, axis=0),
         c="r",
         label="Atrial firbrillation",
         linestyle="--")
plt.legend()
Example #10
0

def hsl(value):
    return (value**0.35, 0, 1 - value**2, 1)


if cfg.logging:
    with open(cfg.log_location + "log_" + cfg.t + ".csv", 'a') as csvlog:
        csvlog.write(
            "t,lead,split_train,split_val,split_test,epochs,unique_patients,train_size,validation_size,test_size,loss,accuracy,precision,recall,AUC,F1\n"
        )

all_data_x, all_data_y, fnames = dgen.get_data(
    # n_files=100,
    return_fnames=True,
    channels=np.array(range(cfg.n_channels)),
    norm=cfg.normalize_data,
    targets=cfg.targets,
    extension="." + cfg.file_extension)

for lead in range(8):
    cfg.current_lead = lead

    data_x = all_data_x.copy()[:, :, (0, lead)]
    data_y = all_data_y.copy()

    data_x, data_y = dprep.extract_windows(data_x,
                                           data_y,
                                           exclude_first_channel=True,
                                           fnames=fnames)
    # data_x, data_y = dprep.extract_windows(all_data_x[:, :, lead], all_data_y)
import os
import sys
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
import data_generator as ds
import initialize as initialize


random.seed(400)
np.random.seed(78)



x_coord, y_coord, real_cov1, real_cov2, mu_x1, mu_y1, mu_x2, mu_y2 = ds.get_data(mu_x1=-10, mu_y1=-10, mu_x2=10, mu_y2=10, sigma_x=5, sigma_y=2)
print real_cov1
print real_cov2
print  mu_x1, mu_y1, mu_x2, mu_y2

data_set = zip(x_coord, y_coord)
x_coord = np.array(x_coord)
y_coord = np.array(y_coord)
plt.scatter(x_coord, y_coord)
plt.xlim([-20, 20])
plt.ylim([-20, 20])
k = 2

centers = initialize.center_seed(k, data_set)
new_centers = []