def testing(testing_flag, batch_size): autoencoder, encoder, decoder = chapman_autoencoder.get_trained_autoencoder( testing_flag) model = deepcopy(encoder) # get chapman datasets user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory = get_datasets_from_paths( testing_flag) # get 4 unique rhythms unique_rhythms_words = set(list(patient_to_rhythm_dict.values())) # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3} rythm_to_label_encoding = { rhythm: index for index, rhythm in enumerate(unique_rhythms_words) } train_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train') test_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test') train_loader = DataLoader(train_chapman_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader( test_chapman_dataset, batch_size=batch_size, ) byol_model = deepcopy(model) byol = byol_chapman_utilities.BYOL(byol_model, image_size=(2500, 4)) byol_trainer = pl.Trainer( max_epochs=10, accumulate_grad_batches=2048 // batch_size, weights_summary=None, ) byol_trainer.fit(byol, train_loader, val_loader) byol_encoder = byol.encoder state_dict = byol_model.state_dict() new_model = deepcopy(encoder) new_model.load_state_dict(state_dict) for data_label in val_loader: data, label = data_label byol_encoded_data = byol_encoder(data.float()) byol_new_model_data = new_model(data.float()) print(f'byol encoder data shape: {byol_encoded_data.size()}') print(f'byol state dict model shape: {byol_new_model_data.size()}') print(f'byol encoded size {byol_encoded_data.size()}') print(label)
def multiple_segment_main(testing_flag, batch_size, epoch_number, latent_dim, projection_dim): autoencoder, encoder, decoder = chapman_autoencoder.get_trained_autoencoder_ms( testing_flag, latent_dim) # we will use the encoder as input into byol model = deepcopy(encoder) # get chapman datasets user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory, path_to_embeddings = get_datasets_from_paths( testing_flag) # get 4 unique rhythms unique_rhythms_words = set(list(patient_to_rhythm_dict.values())) # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3} rythm_to_label_encoding = { rhythm: index for index, rhythm in enumerate(unique_rhythms_words) } # get train and test datasets and create dataloaders train_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train') test_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test') train_loader = DataLoader(train_chapman_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader( test_chapman_dataset, batch_size=batch_size, ) # byol training model byol_model = deepcopy(model) byol = byol_chapman_utilities.BYOL_MS(byol_model, image_size=(1250, 4), projection_size=projection_dim) byol_trainer = pl.Trainer(max_epochs=epoch_number, accumulate_grad_batches=2048 // batch_size, weights_summary=None, logger=False) byol_trainer.fit(byol, train_loader, val_loader) state_dict = byol_model.state_dict() byol_encoder = deepcopy(encoder) byol_encoder.load_state_dict(state_dict) return byol_encoder, test_chapman_dataset, train_chapman_dataset, working_directory, path_to_embeddings
def resnet_main(testing_flag, batch_size): # get chapman datasets user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory = get_datasets_from_paths( testing_flag) # get 4 unique rhythms unique_rhythms_words = set(list(patient_to_rhythm_dict.values())) # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3} rythm_to_label_encoding = { rhythm: index for index, rhythm in enumerate(unique_rhythms_words) } # get train and test datasets and create dataloaders train_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train') test_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test') train_loader = DataLoader(train_chapman_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader( test_chapman_dataset, batch_size=batch_size, ) print('got here') model = resnet18() model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) model_name = 'resnet18' # supervised learning before byol supervised_model = deepcopy(model) supervised = byol_chapman_utilities.SupervisedLightningModule( supervised_model) supervised_trainer = pl.Trainer(max_epochs=25, weights_summary=None) supervised_trainer.fit(supervised, train_loader, val_loader) supervised_accuracy = byol_chapman_utilities.accuracy_from_val_loader_and_model( val_loader, supervised_model) # byol training model byol_model = deepcopy(model) byol = byol_chapman_utilities.BYOL(byol_model, image_size=(2500, 4)) byol_trainer = pl.Trainer( max_epochs=10, accumulate_grad_batches=2048 // batch_size, weights_summary=None, ) byol_trainer.fit(byol, train_loader, val_loader) # supervised learning again after byol state_dict = byol_model.state_dict() post_byol_model = deepcopy(model) post_byol_model.load_state_dict(state_dict) post_byol_supervised = byol_chapman_utilities.SupervisedLightningModule( post_byol_model) post_byol_trainer = pl.Trainer( max_epochs=10, accumulate_grad_batches=2048 // 128, weights_summary=None, ) post_byol_trainer.fit(post_byol_supervised, train_loader, val_loader) post_byol_accuracy = byol_chapman_utilities.accuracy_from_val_loader_and_model( val_loader, post_byol_model) # final results print(f'supervised accuracy - {supervised_accuracy}') print(f'post byol supervised accuracy - {post_byol_accuracy}') save_dict = { 'supervised_acc': supervised_accuracy, 'post_byol_acc': post_byol_accuracy } # save results start_time = datetime.datetime.now() start_time_str = start_time.strftime("%Y%m%d-%H%M%S") save_filename = f'{testing_flag}-{batch_size}-{model_name}-{start_time_str}-byol-chapman.pickle' save_path = os.path.join(working_directory, save_filename) with open(save_path, 'wb') as f: pickle.dump(save_dict, f)
def testing(testing_flag): dataset_save_path = os.path.join(os.path.dirname(os.getcwd()), "PickledData", "chapman") path_to_patient_to_rhythm_dict = os.path.join( dataset_save_path, 'patient_to_rhythm_dict.pickle') # paths to user datasets with no nan values if testing_flag: path_to_user_datasets = os.path.join( dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join( dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle') else: path_to_user_datasets = os.path.join( dataset_save_path, 'four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join( dataset_save_path, "test_train_split_dict_no_nan.pickle") with open(path_to_user_datasets, 'rb') as f: user_datasets = pickle.load(f) with open(path_to_test_train_split_dict, 'rb') as f: test_train_split_dict = pickle.load(f) train_user_list = test_train_split_dict['train'] test_user_list = test_train_split_dict['test'] with open(path_to_patient_to_rhythm_dict, 'rb') as f: patient_to_rhythm_dict = pickle.load(f) train_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train') test_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test') batch_size = 128 train_loader = DataLoader(train_chapman_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader( test_chapman_dataset, batch_size=batch_size, ) model = autoencoder(512) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) num_epochs = 10 for epoch in range(num_epochs): for data_label in train_loader: data, _ = data_label data = Variable(data) output = model(data.float()) # print(data.size()) # print(output.size()) flattened_input = data.view(data.size(0), -1) loss = criterion(output, flattened_input.float()) # print(flattened_input.size()) optimizer.zero_grad() loss.backward() optimizer.step() print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.item())) encoder = model.encoder decoder = model.decoder for data_label in val_loader: data, _ = data_label # data = data.view(data.size(0), -1) input_data = Variable(data) input_data = input_data.float() encoded_data = encoder(input_data) print(encoded_data.size()) decoded_data = decoder(encoded_data.float()) data = data.view(data.size(0), -1) loss = criterion(decoded_data.float(), data.float()) print(f'val_loss: {loss.item()}')
def get_trained_autoencoder_ms(testing_flag, latent_dim=512): dataset_save_path = os.path.join(os.path.dirname(os.getcwd()), "PickledData", "chapman") path_to_patient_to_rhythm_dict = os.path.join( dataset_save_path, 'patient_to_rhythm_dict.pickle') # paths to user datasets with no nan values if testing_flag: path_to_user_datasets = os.path.join( dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join( dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle') else: path_to_user_datasets = os.path.join( dataset_save_path, 'four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join( dataset_save_path, "test_train_split_dict_no_nan.pickle") with open(path_to_user_datasets, 'rb') as f: user_datasets = pickle.load(f) with open(path_to_test_train_split_dict, 'rb') as f: test_train_split_dict = pickle.load(f) train_user_list = test_train_split_dict['train'] test_user_list = test_train_split_dict['test'] with open(path_to_patient_to_rhythm_dict, 'rb') as f: patient_to_rhythm_dict = pickle.load(f) train_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train') test_chapman_dataset = byol_chapman_utilities.ChapmanDataset( user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test') batch_size = 128 train_loader = DataLoader(train_chapman_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader( test_chapman_dataset, batch_size=len(test_chapman_dataset), ) # train autoencoder model = autoencoder(latent_dim, x_dim=1250, y_dim=4) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) num_epochs = 10 for epoch in range(num_epochs): for data_label in train_loader: data, _ = data_label data1, data2 = torch.split(data, 1250, dim=2) for d in [data1, data2]: input_data = Variable(d) loss_data = d.view(d.size(0), -1) output = model(input_data.float()) loss = criterion(output, loss_data.float()) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 10 == 0: print('epoch [{}/{}], loss:{:.4f}'.format( epoch + 1, num_epochs, loss.item())) # get validation encoder = model.encoder decoder = model.decoder for data_label in val_loader: data, _ = data_label data1, data2 = torch.split(data, 1250, dim=2) for d in [data1, data2]: input_data = Variable(d) loss_data = d.view(d.size(0), -1) encoded_data = encoder(input_data.float()) decoded_data = decoder(encoded_data.float()) loss = criterion(decoded_data.float(), loss_data.float()) print(f'val_loss: {loss.item()}') return autoencoder, encoder, decoder