ECG_test = ECG_Multilead_Dataset(root_dir=root_dir, transform=tf_ds) # For KNN demo ######## Example how to access the data (Uncomment if necessary) ############## # ECG_test=ECG_Multilead_Dataset(root_dir=os.getcwd()+'\\Chineese_database\\',transform=None) # For access demo # sample_test=ECG_test[2] #Taking for example record number 2 (Starting from zero) # print(f'Size of data of 12 leads :{np.shape(sample_test[0][0])}+ 10 seconds of the long lead is {np.shape(sample_test[0][1])}') # print(f'Is the example record AFIB: {sample_test[1]}') # Define how much data to load (only use a subset for speed) num_train = 35000 num_test = 1000 batch_size = 10000 # Training dataset & loader ds_train = tf.SubsetDataset(ECG_test, num_train) # (train=True, transform=tf_ds) dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=False) # Test dataset & loader ds_test = tf.SubsetDataset(ECG_test, num_test, offset=num_train) dl_test = torch.utils.data.DataLoader(ds_test, batch_size) # for batch_indx, sample in enumerate(dl_train): # print(sample) # Get all test data to predict in one go test_iter = iter(dl_test) x_test, y_test = test_iter.next()
def RunNetImageToMultiClassBinary(class_type=0, perspective_transform=False, run_tag=''): import torch import models import transforms as tf import matplotlib.pyplot as plt torch.multiprocessing.freeze_support() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.cuda.set_device(device=0) print('Using device:', device) checkpoints_name = r'checkpoints/Ecg12LeadImageNet' + \ str(run_tag)+'Perspective' + str(perspective_transform) + \ r'ClassType'+str(class_type) apply_perspective_transformation = perspective_transform realtime_rendering = True ds = ECG_Rendered_Multilead_Dataset( root_dir=root_dir, realtime_rendering=realtime_rendering, apply_perspective_transformation=apply_perspective_transformation) # for real training: num_train = 35000 num_val = 1000 num_test = 5830 # for small set overfit experiment: # num_train = 4 # num_val = 4 # num_test = 4 batch_size = 100 # Training dataset & loader ds_train = tf.SubsetDataset(ds, num_train) # (train=True, transform=tf_ds) dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=True, num_workers=2, pin_memory=True) x, y = next(iter(dl_train)) # Validation dataset & loader ds_val = tf.SubsetDataset(ds, num_val, offset=num_train) dl_val = torch.utils.data.DataLoader(ds_val, batch_size, num_workers=2, pin_memory=True) # Test dataset & loader ds_test = tf.SubsetDataset(ds, num_test, offset=num_train + num_val) dl_test = torch.utils.data.DataLoader(ds_test, batch_size, num_workers=2, pin_memory=True) in_h = x.shape[1] in_w = x.shape[2] in_channels = x.shape[3] batch_memory = x.element_size() * x.nelement() // 1024**2 print('Images of shape: ', x.shape) print('Labels of shape: ', y.shape) print('Size of a batch in the memory is: ~', batch_memory, 'MB') print('\nLet us see the first sample:\n') # plt.figure(figsize = (20,15)) # plt.imshow(x[0,:,:,:]) x = x.transpose(1, 2).transpose(1, 3) # plt.show() # %% Architecture definition # num of channels and kernel length in each layer, note that list lengths must correspond hidden_channels = [8, 16, 32, 64, 128, 256, 512] kernel_sizes = [5] * 7 # which tricks to use: dropout, stride, batch normalization and dilation dropout = 0.2 stride = 2 dilation = 1 batch_norm = True # FC net structure: # num of hidden units in every FC layer fc_hidden_dims = [128] # num of output classess num_of_classes = 2 model = models.Ecg12ImageNet(in_channels, hidden_channels, kernel_sizes, in_h, in_w, fc_hidden_dims, dropout=dropout, stride=stride, dilation=dilation, batch_norm=batch_norm, num_of_classes=2).to(device) print(model) # %% Test the dimentionality x_try = x.to(device, dtype=torch.float) y_pred = model(x_try) print('Output batch size is:', y_pred.shape[0], ', and number of class scores:', y_pred.shape[1], '\n') num_correct = torch.sum( (y_pred > 0).flatten() == (y.to(device, dtype=torch.long) == 1)) print(100 * num_correct.item() / len(y), '% Accuracy... maybe we should consider training the model') del x, y, x_try, y_pred # %% Let's start training import torch.nn as nn import torch.optim as optim from training import Ecg12LeadImageNetTrainerBinary torch.manual_seed(42) lr = 0.0001 lrs = [0.01, 0.001, 0.0001, 0.00001] lr = 0.001 checkpoint_filename = f'{checkpoints_name}.pt' full_path = os.path.realpath(__file__) path, filename = os.path.split(full_path) if os.path.isfile(path + '//' + checkpoint_filename): num_epochs = 0 else: num_epochs = 30 loss_fn = nn.BCEWithLogitsLoss() for lr in lrs: optimizer = optim.Adam(model.parameters(), lr=lr) trainer = Ecg12LeadImageNetTrainerBinary(model, loss_fn, optimizer, device) fitResult2 = trainer.fit(dl_train, dl_test, num_epochs, checkpoints=checkpoints_name, early_stopping=100, print_every=1) with open(f"Execution_dump_{checkpoints_name}.txt", "a") as myfile: myfile.write( "Fit result:\n train accuracy: train loss: test accuracy: test loss: \n" ) for i, j in enumerate(fitResult2.test_acc): myfile.write( f'{fitResult2.train_acc[i]} {fitResult2.train_loss[i]} {fitResult2.test_acc[i]} {fitResult2.test_loss[i]}\n' ) # %% Test results test_result = trainer.test_epoch(dl_test, verbose=True) print('Test accuracy is: ', test_result[1], '%')
def RunNoamsECG_ImageClassification(perspective_transform=False, realtime_rendering=False, Is_classifier=False, Image_to_classify=None, classification_threshold=None, GPU_num=0): import torch import models import transforms as tf import matplotlib.pyplot as plt torch.multiprocessing.freeze_support() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.cuda.set_device(device=GPU_num) print('Using device:', device) checkpoints_name = r'checkpoints/Ecg12LeadImageNetPerspective' + \ str(perspective_transform)+r'Rendering' + str(realtime_rendering) apply_perspective_transformation = perspective_transform ds = ECG_Rendered_Multilead_Dataset( realtime_rendering=realtime_rendering, apply_perspective_transformation=apply_perspective_transformation) # ds = ECG_Multilead_Dataset(root_dir=root_dir,transform=None, partial_upload=False) # Define how much data to load (only use a subset for speed) # for real training: num_train = 35000 num_val = 1000 num_test = 5830 # for small set overfit experiment: # num_train = 4 # num_val = 4 # num_test = 4 if apply_perspective_transformation: batch_size = 30 else: batch_size = 100 # Training dataset & loader ds_train = tf.SubsetDataset(ds, num_train) # (train=True, transform=tf_ds) dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=True, num_workers=2, pin_memory=True) x, y = next(iter(dl_train)) # Validation dataset & loader ds_val = tf.SubsetDataset(ds, num_val, offset=num_train) dl_val = torch.utils.data.DataLoader(ds_val, batch_size, num_workers=2, pin_memory=True) # Test dataset & loader ds_test = tf.SubsetDataset(ds, num_test, offset=num_train + num_val) dl_test = torch.utils.data.DataLoader(ds_test, batch_size, num_workers=2, pin_memory=True) # %% Let's see what we uploaded import matplotlib.pyplot as plt x, y = iter(dl_train).next() in_h = x.shape[1] in_w = x.shape[2] in_channels = x.shape[3] batch_memory = x.element_size() * x.nelement() // 1024**2 print('Images of shape: ', x.shape) print('Labels of shape: ', y.shape) print('Size of a batch in the memory is: ~', batch_memory, 'MB') print('\nLet us see the first sample:\n') # plt.figure(figsize = (20,15)) # plt.imshow(x[0,:,:,:]) x = x.transpose(1, 2).transpose(1, 3) # plt.show() # %% Architecture definition # num of channels and kernel length in each layer, note that list lengths must correspond hidden_channels = [8, 16, 32, 64, 128, 256, 512] kernel_sizes = [5] * 7 # which tricks to use: dropout, stride, batch normalization and dilation dropout = 0.2 stride = 2 dilation = 1 batch_norm = True # FC net structure: # num of hidden units in every FC layer fc_hidden_dims = [128] # num of output classess num_of_classes = 2 model = models.Ecg12ImageNet(in_channels, hidden_channels, kernel_sizes, in_h, in_w, fc_hidden_dims, dropout=dropout, stride=stride, dilation=dilation, batch_norm=batch_norm, num_of_classes=2).to(device) # print(model) # %% Test the dimentionality x_try = x.to(device, dtype=torch.float) y_pred = model(x_try) # print('Output batch size is:', # y_pred.shape[0], ', and number of class scores:', y_pred.shape[1], '\n') num_correct = torch.sum( (y_pred > 0).flatten() == (y.to(device, dtype=torch.long) == 1)) # print(100*num_correct.item()/len(y), # '% Accuracy... maybe we should consider training the model') del x, y, x_try, y_pred # %% Let's start training import torch.nn as nn import torch.optim as optim from training import Ecg12LeadImageNetTrainerBinary torch.manual_seed(42) # lr = 0.01 # num_epochs = 1 # loss_fn = nn.BCEWithLogitsLoss() # optimizer = optim.Adam(model.parameters(), lr=lr) # trainer = Ecg12LeadImageNetTrainerBinary(model, loss_fn, optimizer, device) # fitResult = trainer.fit(dl_train, dl_val, num_epochs, checkpoints=r'checkpoints/Ecg12LeadImageNetDemonstration', # early_stopping=5, print_every=1) lr = 0.001 checkpoint_filename = f'{checkpoints_name}.pt' full_path = os.path.realpath(__file__) path, filename = os.path.split(full_path) if os.path.isfile(path + '//' + checkpoint_filename): num_epochs = 0 else: num_epochs = 30 loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=lr) trainer = Ecg12LeadImageNetTrainerBinary( model, loss_fn, optimizer, device, classification_threshold=classification_threshold) fitResult2 = trainer.fit( dl_train, dl_val, num_epochs, checkpoints=checkpoints_name, #dl_val early_stopping=5, print_every=1) # %% Test results # if Is_classifier: # K = Image_to_classify.permute(0, 3, 1, 2).to(device) # out = model(K) # print(f'Out: {out}') # else: ##################### ROC ################################################# thresholds = np.arange(0, 1, 0.01, dtype=float) for th in thresholds: trainer.classification_threshold = th test_result = trainer.test_epoch(dl_test, verbose=True) with open(f'ROC_Image_Pers_{perspective_transform}.txt', "a") as myfile: myfile.write( f'{th}\t{test_result.num_TP}\t{test_result.num_TN}\t{test_result.num_FP}\t{test_result.num_FN}\t{test_result.accuracy}\n' ) trainer.classification_threshold = None ##################### END OF ROC ################################################# test_result = trainer.test_epoch(dl_test, verbose=True) print('Test accuracy is: ', test_result[1], '%') return test_result
def RunVadimsNetDigitizedToMultiClass(): torch.multiprocessing.freeze_support() # Define the transforms that should be applied to each ECG record before returning it tf_ds = tvtf.Compose([ tf.ECG_tuple_transform(-1) # Reshape to 1D Tensor ]) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device) ds = ECG_Multilead_Dataset(root_dir=root_dir, multiclass=True) # %% Prepare the dataloaders # Define how much data to load # for real training: num_train = 35000 # for small set overfit experiments: # num_train = 3500 num_val = 1000 num_test = 5000 batch_size = 256 # 512 # Training dataset & loader ds_train = tf.SubsetDataset(ds, num_train) dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=True) # Validation dataset & loader ds_val = tf.SubsetDataset(ds, num_val, offset=num_train) dl_val = torch.utils.data.DataLoader(ds_val, batch_size) # Test dataset & loader ds_test = tf.SubsetDataset(ds, num_test, offset=num_train + num_val) dl_test = torch.utils.data.DataLoader(ds_test, batch_size) # %% Model creation # CNNs structure: # num of channels and kernel length in each layer of each branch, note that list lengths must correspond short_hidden_channels = [16, 32, 64, 128, 256, 512] long_hidden_channels = [4, 8, 16, 32, 64, 128, 256, 512] short_kernel_lengths = [5] * 6 long_kernel_lengths = [5] * 8 # which tricks to use: dropout, stride, batch normalization and dilation short_dropout = 0.5 long_dropout = 0.5 short_stride = 2 long_stride = 2 short_dilation = 1 long_dilation = 1 short_batch_norm = True long_batch_norm = True # enter input length here short_input_length = 1250 long_input_length = 5000 # FC net structure: # num of hidden units in every FC layer fc_hidden_dims = [128] # num of output classes num_of_classes = 9 model = models.Ecg12LeadMultiClassNet( short_hidden_channels, long_hidden_channels, short_kernel_lengths, long_kernel_lengths, fc_hidden_dims, short_dropout, long_dropout, short_stride, long_stride, short_dilation, long_dilation, short_batch_norm, long_batch_norm, short_input_length, long_input_length, num_of_classes).to(device) print(model) # %% Dimensions Check x, y = iter(dl_train).next() x1, x2 = x print('Long lead data of shape: ', x2.shape) print('Short lead data of shape: ', x1.shape) print('Labels of shape: ', y.shape) x_try = (x1.to(device, dtype=torch.float), x2.to(device, dtype=torch.float)) y_pred = model(x_try) print('Output batch size is:', y_pred.shape[0], ', and number of class scores:', y_pred.shape[1], '\n') num_correct = torch.mean(1 - abs(torch.sub(y_pred, y.to(device)))) print(100 * num_correct.item() / len(y), '% Accuracy... maybe we should consider training the model') # %% Training import torch.nn as nn import torch.optim as optim from training import Ecg12LeadNetTrainerMulticlass # for reproducibility torch.manual_seed(42) lr = 0.001 num_epochs = 50 torch.cuda.empty_cache() # entirely clear all allocated memory loss_fn = nn.MSELoss() # loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=lr) trainer = Ecg12LeadNetTrainerMulticlass(model, loss_fn, optimizer, device) fitResult = trainer.fit( dl_train, dl_val, num_epochs, checkpoints=r'checkpoints/Ecg12LeadNetDigitizedToMultiClass', early_stopping=10, print_every=1)
def RunNetDigitizedToMultiClassBinary(class_type=0, kernel_size=17, train_set_size=35000, test_only=False, classification_threshold=None): torch.multiprocessing.freeze_support() # Define the transforms that should be applied to each ECG record before returning it tf_ds = tvtf.Compose([ tf.ECG_tuple_transform(-1) # Reshape to 1D Tensor ]) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device) #ds = ECG_Multilead_Dataset(root_dir=root_dir) ds = ECG_Multilead_Dataset(multiclass=False, multiclass_to_binary=True, multiclass_to_binary_type=class_type) checkpoints_str = r'checkpoints/Ecg12LeadNetDigitizedToClass__' + \ f'{class_type}' # %% Prepare the dataloaders # Define how much data to load # for real training: num_train = train_set_size # for small set overfit experiments: # num_train = 3500 num_val = 1000 num_test = 5830 batch_size = 1024 # 512 # Training dataset & loader ds_train = tf.SubsetDataset(ds, num_train) dl_train = torch.utils.data.DataLoader(ds_train, batch_size, shuffle=False) # Validation dataset & loader ds_val = tf.SubsetDataset(ds, num_val, offset=num_train) dl_val = torch.utils.data.DataLoader(ds_val, batch_size) # Test dataset & loader ds_test = tf.SubsetDataset(ds, num_test, offset=num_train + num_val) dl_test = torch.utils.data.DataLoader(ds_test, batch_size) # %% Model creation # CNNs structure: # num of channels and kernel length in each layer of each branch, note that list lengths must correspond short_hidden_channels = [16, 32, 64, 128, 256, 512] long_hidden_channels = [4, 8, 16, 32, 64, 128, 256, 512] short_kernel_lengths = [kernel_size] * 6 # 5 long_kernel_lengths = [kernel_size] * 8 # 5 # which tricks to use: dropout, stride, batch normalization and dilation short_dropout = 0.5 long_dropout = 0.5 short_stride = 2 long_stride = 2 short_dilation = 1 long_dilation = 1 short_batch_norm = True long_batch_norm = True # enter input length here short_input_length = 1250 long_input_length = 5000 # FC net structure: # num of hidden units in every FC layer fc_hidden_dims = [128] # num of output classess num_of_classes = 2 model = models.Ecg12LeadNet(short_hidden_channels, long_hidden_channels, short_kernel_lengths, long_kernel_lengths, fc_hidden_dims, short_dropout, long_dropout, short_stride, long_stride, short_dilation, long_dilation, short_batch_norm, long_batch_norm, short_input_length, long_input_length, num_of_classes).to(device) print(model) # %% Dimensions Check # x, y = iter(dl_train).next() # x1, x2 = x # print('Long lead data of shape: ', x2.shape) # print('Short lead data of shape: ', x1.shape) # print('Labels of shape: ', y.shape) # x_try = (x1.to(device, dtype=torch.float), # x2.to(device, dtype=torch.float)) # y_pred = model(x_try) # print('Output batch size is:', # y_pred.shape[0], ', and number of class scores:', y_pred.shape[1], '\n') # num_correct = torch.sum((y_pred > 0).flatten() == ( # y.to(device, dtype=torch.long) == 1)) # print(100*num_correct.item()/len(y), # '% Accuracy... maybe we should consider training the model') # %% Training import torch.nn as nn import torch.optim as optim from training import Ecg12LeadNetTrainerBinary # for reproducibility torch.manual_seed(42) loss_fn = nn.BCEWithLogitsLoss() lrs = [0.01, 0.0001] # , 0.01, 0.00001 optimizer = optim.Adam(model.parameters(), lr=0.001) torch.cuda.empty_cache() # entirely clear all allocated memory trainer = Ecg12LeadNetTrainerBinary( model, loss_fn, optimizer, device, classification_threshold=classification_threshold) if not test_only: for lr in lrs: num_epochs = 100 torch.cuda.empty_cache() # entirely clear all allocated memory optimizer = optim.Adam(model.parameters(), lr=lr) trainer = Ecg12LeadNetTrainerBinary(model, loss_fn, optimizer, device) fitResult = trainer.fit( dl_train, dl_val, num_epochs, checkpoints=checkpoints_str, # dl_val early_stopping=20, print_every=1) else: fitResult = trainer.fit( dl_train, dl_val, 0, checkpoints=checkpoints_str, # dl_val early_stopping=20, print_every=100) ##################### ROC ################################################# thresholds = np.arange(0, 1, 0.01, dtype=float) for th in thresholds: trainer.classification_threshold = th test_result = trainer.test_epoch(dl_test, verbose=True) with open(f'ROC_Digital_{class_type}.txt', "a") as myfile: myfile.write( f'{th}\t{test_result.num_TP}\t{test_result.num_TN}\t{test_result.num_FP}\t{test_result.num_FN}\t{test_result.accuracy}\n' ) trainer.classification_threshold = None ##################### END OF ROC ################################################# # ################### DATA FOR CONFUSION MATRIX ########################### # ds_full = ECG_Multilead_Dataset(root_dir=root_dir, multiclass=True, # multiclass_to_binary=False, multiclass_to_binary_type=class_type) # ds_test_full = tf.SubsetDataset(ds_full, num_test, offset=num_train + num_val) # joined_list=[] # for row_in_list in test_result.out: # for el in row_in_list: # joined_list.append(el.cpu().item()) # with open(f'LOG_Digital_{class_type}.txt', "a") as myfile: # for i in range(num_test): # myfile.write(f'{i}\t{int(ds_test[i][1])}\t{joined_list[i]}\t{ds_test_full[i][1][0]}\t{ds_test_full[i][1][1]}\t{ds_test_full[i][1][2]}\t{ds_test_full[i][1][3]}\t{ds_test_full[i][1][4]}\t{ds_test_full[i][1][5]}\t{ds_test_full[i][1][6]}\t{ds_test_full[i][1][7]}\t{ds_test_full[i][1][8]}\n') # ####################END OF DATA FOR CONFUSION MATRIX ############################# # # # zipped = zip(flat_list_out, flat_list_y) # it = iter(dl_test_full) # for item in zipped: # with open(f'Classification_Digital_Output{class_type}.txt', "a") as myfile: # x_, y_= it.next() # y__= (y_.data).cpu().numpy() # myfile.write(f'{item[0].item()}\t{item[1].item()}\t') # for y___ in y__: # myfile.write(f'{int(y___)}\t') # myfile.write(f'\n') test_result = trainer.test_epoch(dl_test, verbose=True) with open(f'Test_Accuracy_Log.txt', "a") as myfile: myfile.write(f'Class type: {class_type},Accuracy:{ test_result[1]}\n') print('Test accuracy is: ', test_result[1], '%') return test_result