def create_dataset(dataset_path, mode): print dataset_path if mode == 'test': start_data = 1500 - VEC_SIZE else: start_data = VEC_SIZE data = pd.read_csv(dataset_path) vec = np.zeros(shape=(len(data["Open"]) - VEC_SIZE + 1, 1, VEC_SIZE, CHANNELS)) # MAYBE ADD CH if network_mode == 'reg': label = np.zeros(shape=(len(data["Open"]) - VEC_SIZE + 1, CHANNELS)) else: label = np.zeros( shape=(len(data["Open"]) - VEC_SIZE + 1, INTERVALS_NUM)) #maybe problem without channels if DATA_MODE == 'random': list_num, input_vectors, input_label = prepare_data_vectors_rand( data, start_data, vec, label, VEC_SIZE, LIMIT, network_mode) data_set = list( get_data(BATCH_SIZE, list_num, input_vectors, input_label)) else: data_set = list( prepare_data_vectors(data, start_data, vec, label, VEC_SIZE, BATCH_SIZE, LIMIT, network_mode)) if DATA_MODE == 'random_batch': shuffle(data_set) print "data set size:", len(data_set) # print "data_vec: " ,data_set[0][0] # print "data_label: ", data_set[0][1] return data_set
def main(): data_x, data_y, ecg_fnames = dgen.get_data( # n_files=10, location=cfg.pulse_data_location, return_fnames = True, channels = np.array(range(cfg.n_channels)), # norm = cfg.normalize_data, targets = cfg.targets, extension = "." + cfg.file_extension ) # data_x, data_y, fnames = dprep.extract_windows( # all_ecg_data_x, # all_ecg_data_y, # cfg.nn_input_size, # fnames = ecg_fnames, # verbosity = cfg.verbosity # ) if cfg.logging: with open(cfg.log_location + "log_" + cfg.t + ".csv", 'a') as csvlog: csvlog.write("t,lead,split_train,split_val,split_test,epochs,split_on,train_size,validation_size,test_size,loss,accuracy,precision,recall,ROC-AUC,PR-AUC,F1\n") model_save_name = cfg.model_save_name af_r_f = open('model/af_ratio_predictor.txt', 'w') for lead in cfg.leads: run_training_session(data_x, data_y, model_save_name, ecg_fnames, lead) af_r_f.close()
def convert_and_process(): # convert_ecgs() convert_xmls() data_x, data_y, fnames = dgen.get_data( return_fnames=True, location=cfg.converted_data_location) processed_data_x = dprep.preprocess_data(data_x) dprep.save_data(processed_data_x, data_y, cfg.processed_data_location, fnames) save_pulse_data()
def save_pulse_data(fmt='%.6f'): data, targets, fnames = dgen.get_data( # n_files=10, return_fnames=True, targets=["AF", "SR"]) data, targets, fnames = dprep.extract_windows(data, targets, cfg.nn_input_size, fnames=fnames) for fname, pulse in zip(fnames, data): np.savetxt(cfg.pulse_data_location + fname, pulse, delimiter=',', fmt=fmt)
def convert_dwt_images(lead): data_x, data_y, fnames = dgen.get_data( # n_files=1, targets=cfg.targets, return_fnames=True, channels=[lead], norm=True) for i, ecg in enumerate(data_x): title = fnames[i].split('.')[0] save_wavelet_img([i for i in range(data_x.shape[1])], ecg[:, 0], np.arange(1, 128, 2), title=title) # used_fnames[fnames[i]] += 1 progress_bar("Converting to DWT image", i, data_x.shape[0])
import os, sys import numpy as np import random import matplotlib import matplotlib.pyplot as plt import data_generator as ds import calculate_param as cm random.seed(400) np.random.seed(78) x_coord,y_coord,real_cov1, real_cov2, real_x1, real_y1, real_x2, real_y2 = ds.get_data(5,10, 10,12,7,4) data_set = zip(x_coord,y_coord) x_coord = np.array(x_coord) y_coord = np.array(y_coord) # get initial parameters cos_phi, centers, cov_matrix_1, cov_matrix_2, alpha_1, alpha_2 = cm.get_initial_parameters(data_set, x_coord, y_coord,2) def E_Step(centers, cov_matrix_1, cov_matrix_2, data_set, alpha_1, alpha_2): mu_x = centers[0][0] mu_y = centers[0][1] mu_x1 = centers[1][0] mu_y1 = centers[1][1] G_1 = cm.get_gaussian(cov_matrix_1, data_set, mu_x, mu_y) G_2 = cm.get_gaussian(cov_matrix_2, data_set, mu_x1,mu_y1) Q_1,Q_2 = cm.get_distribution(data_set,cos_phi,cov_matrix_1, cov_matrix_2, mu_x, mu_y, mu_x1, mu_y1,alpha_1,alpha_2) return G_1,G_2, Q_1, Q_2
import config from data_generator import data_generate, get_data import torch.nn as nn import torch.optim as optim import torch import os from model import DD_CNN from evaluate import evaluate_data print('torch.cuda.is_available() is ', torch.cuda.is_available()) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") x_train, y_train, x_test, y_test = data_generate() batch_size = config.batch_size train_data, test_data = get_data(x_train, y_train, x_test, y_test, batch_size) net = DD_CNN() def accuracy(predictions, labels): pred = torch.max(predictions, 1)[1] rights = pred.eq(labels.data.view_as(pred)).sum() return rights, len(labels) criterion = nn.CrossEntropyLoss() optimizer = optim.AdamW(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08,
def generator(location, fnames, lead): for fname in fnames: data_x, data_y = dgen.get_data(location=location, open_files=[fname], verbosity=False) yield data_x[:, :, lead], data_y
and atrial fibrillation labels. Authors: Florian Schroevers """ import data_preprocessing as dprep import data_generator as dgen from global_params import cfg import numpy as np import matplotlib.pyplot as plt import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) data_x, data_y, fnames = dgen.get_data(return_fnames=True, channels=np.array([cfg.lead]), norm=True, exclude_targets=[1, 2, 3, 4]) data_x_sine, _ = dprep.extract_windows(data_x, data_y) data_x, data_y, fnames = dgen.get_data(return_fnames=True, channels=np.array([cfg.lead]), norm=True, exclude_targets=[0, 2, 3, 4]) data_x_afib, _ = dprep.extract_windows(data_x, data_y) plt.plot(np.mean(data_x_sine, axis=0), c="g", label="Sinus rythm") plt.plot(np.mean(data_x_afib, axis=0), c="r", label="Atrial firbrillation", linestyle="--") plt.legend()
def hsl(value): return (value**0.35, 0, 1 - value**2, 1) if cfg.logging: with open(cfg.log_location + "log_" + cfg.t + ".csv", 'a') as csvlog: csvlog.write( "t,lead,split_train,split_val,split_test,epochs,unique_patients,train_size,validation_size,test_size,loss,accuracy,precision,recall,AUC,F1\n" ) all_data_x, all_data_y, fnames = dgen.get_data( # n_files=100, return_fnames=True, channels=np.array(range(cfg.n_channels)), norm=cfg.normalize_data, targets=cfg.targets, extension="." + cfg.file_extension) for lead in range(8): cfg.current_lead = lead data_x = all_data_x.copy()[:, :, (0, lead)] data_y = all_data_y.copy() data_x, data_y = dprep.extract_windows(data_x, data_y, exclude_first_channel=True, fnames=fnames) # data_x, data_y = dprep.extract_windows(all_data_x[:, :, lead], all_data_y)
import os import sys import numpy as np import random import matplotlib import matplotlib.pyplot as plt import data_generator as ds import initialize as initialize random.seed(400) np.random.seed(78) x_coord, y_coord, real_cov1, real_cov2, mu_x1, mu_y1, mu_x2, mu_y2 = ds.get_data(mu_x1=-10, mu_y1=-10, mu_x2=10, mu_y2=10, sigma_x=5, sigma_y=2) print real_cov1 print real_cov2 print mu_x1, mu_y1, mu_x2, mu_y2 data_set = zip(x_coord, y_coord) x_coord = np.array(x_coord) y_coord = np.array(y_coord) plt.scatter(x_coord, y_coord) plt.xlim([-20, 20]) plt.ylim([-20, 20]) k = 2 centers = initialize.center_seed(k, data_set) new_centers = []