def test_lstm(): composed = transforms.Compose([ToTensor()]) transformed_dataset = EcgHearBeatsDataset(transform=composed) dataloader = DataLoader(transformed_dataset, batch_size=4, shuffle=True, num_workers=4) lstmN = lstm.ECGLSTM(5, 512, 5, 2) for i, data in enumerate(dataloader): ecg_batch = data['cardiac_cycle'].permute(1, 0, 2).float() first_beat = ecg_batch[:, 0, :] print("First beat shape: {}".format(first_beat.shape)) print("First beat label {}".format(data['beat_type'][0])) print("First beat label one hot {}".format(data['label'][0])) first_beat = first_beat.numpy().flatten() plt.plot(first_beat) plt.show() plt.figure() plt.plot(data['orig_beat'][0].numpy()) plt.show() preds = lstmN(ecg_batch) print("Module output shape = {}".format(preds.shape)) print("Preds: {}".format(preds)) break
def train_with_noise(): beat_type = 'N' device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") with open('res_noise_{}.text'.format(beat_type), 'w') as fd: # for n in [500, 800, 1000, 1500, 3000, 5000, 7000, 10000, 15000]: for n in [0]: base_tomer_remote = '/home/nivgiladi/tomer/' model_dir = base_tomer_remote + 'ecg_pytorch/ecg_pytorch/classifiers/tensorboard/noise_{}/lstm_add_{}/'.format( str(n), beat_type) total_runs = 0 BEST_AUC_N = 0 BEST_AUC_S = 0 BEST_AUC_V = 0 BEST_AUC_F = 0 BEST_AUC_Q = 0 # while BEST_AUC_S <= 0.876: while total_runs < 10: if os.path.isdir(model_dir): logging.info("Removing model dir") shutil.rmtree(model_dir) net = lstm.ECGLSTM(5, 512, 5, 2).to(device) gen_details = GeneratorAdditionalDataConfig( beat_type=beat_type, checkpoint_path='', num_examples_to_add=n) train_config = ECGTrainConfig(num_epochs=4, batch_size=16, lr=0.002, weighted_loss=False, weighted_sampling=True, device=device, add_data_from_gan=False, generator_details=gen_details, train_one_vs_all=False) train_classifier(net, model_dir=model_dir, train_config=train_config) total_runs += 1 logging.info("Done after {} runs.".format(total_runs)) logging.info( "Best AUC:\n N: {}\tS: {}\tV: {}\tF: {}\tQ: {}".format( BEST_AUC_N, BEST_AUC_S, BEST_AUC_V, BEST_AUC_F, BEST_AUC_Q)) w = "#n: {} .Best AUC:\n N: {}\tS: {}\tV: {}\tF: {}\tQ: {}\n".format( n, BEST_AUC_N, BEST_AUC_S, BEST_AUC_V, BEST_AUC_F, BEST_AUC_Q) fd.write(w)
def find_optimal_checkpoint(chk_dir, beat_type, gan_type, device, num_samples_to_add): model_dir = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/tensorboard/{}/find_optimal_chk_{}_{}_agg/' \ .format(beat_type, str(num_samples_to_add), gan_type) writer = SummaryWriter(model_dir) if not os.path.isdir(chk_dir): raise ValueError("{} not a directory".format(chk_dir)) # # Define summary values: # mean_auc_values = [] best_auc_values = [] final_dict = {} for i, chk_name in enumerate(os.listdir(chk_dir)): if chk_name.startswith('checkpoint'): chk_path = os.path.join(chk_dir, chk_name) # # Train configurations: # model_dir = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/tensorboard/{}/lstm_{}_{}_{}/'.format( beat_type, str(num_samples_to_add), gan_type, chk_name) gen_details = GeneratorAdditionalDataConfig( beat_type=beat_type, checkpoint_path=chk_path, num_examples_to_add=num_samples_to_add, gan_type=gan_type) train_config = ECGTrainConfig(num_epochs=5, batch_size=20, lr=0.0002, weighted_loss=False, weighted_sampling=True, device=device, add_data_from_gan=True, generator_details=gen_details, train_one_vs_all=False) # # Run 10 times each configuration: # total_runs = 0 best_auc_per_run = [] while total_runs < 10: if os.path.isdir(model_dir): logging.info("Removing model dir") shutil.rmtree(model_dir) # # Initialize the network each run: # net = lstm.ECGLSTM(5, 512, 5, 2).to(device) # # Train the classifier: # best_auc_scores = train_classifier(net, model_dir=model_dir, train_config=train_config) best_auc_per_run.append( best_auc_scores[BEAT_TO_INDEX[beat_type]]) writer.add_scalar('best_auc_{}'.format(chk_name), best_auc_scores[BEAT_TO_INDEX[beat_type]], total_runs) total_runs += 1 mean_auc = np.mean(best_auc_per_run) max_auc = max(best_auc_per_run) logging.info("Checkpoint {}: Mean AUC {}. Max AUC: {}".format( chk_name, mean_auc, max_auc)) mean_auc_values.append(mean_auc) best_auc_values.append(max_auc) final_dict[chk_name] = {} final_dict[chk_name]['MEAN'] = mean_auc final_dict[chk_name]['MAX'] = max_auc writer.add_scalar('mean_auc_per_chk', mean_auc, i) writer.add_scalar('max_auc_per_chk', max_auc, i) writer.close() # # Save data in pickle: # pickle_file_path = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/pickles_results/{}_{}_lstm_different_ckps_500.pkl'.format( beat_type, gan_type) with open(pickle_file_path, 'wb') as handle: pickle.dump(final_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
def train_mult(beat_type, gan_type, device): summary_model_dir = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/tensorboard/{}/lstm_{}_summary/'.format( beat_type, gan_type) writer = SummaryWriter(summary_model_dir) # # Retrieve Checkpoint path: # if gan_type in ['DCGAN', 'ODE_GAN']: ck_path = checkpoint_paths.BEAT_AND_MODEL_TO_CHECKPOINT_PATH[ beat_type][gan_type] else: ck_path = None # # Define summary values: # mean_auc_values = [] var_auc_values = [] best_auc_values = [] best_auc_for_each_n = {} # # Run with different number of additional data from trained generator: # for n in [500, 800, 1000, 1500, 3000, 5000, 7000, 10000, 15000]: # for n in [5000]: # # Train configurations: # model_dir = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/tensorboard/{}/lstm_{}_{}/'.format( beat_type, str(n), gan_type) gen_details = GeneratorAdditionalDataConfig(beat_type=beat_type, checkpoint_path=ck_path, num_examples_to_add=n, gan_type=gan_type) train_config = ECGTrainConfig(num_epochs=5, batch_size=20, lr=0.0002, weighted_loss=False, weighted_sampling=True, device=device, add_data_from_gan=True, generator_details=gen_details, train_one_vs_all=False) # # Run 10 times each configuration: # total_runs = 0 best_auc_per_run = [] while total_runs < 10: if os.path.isdir(model_dir): logging.info("Removing model dir") shutil.rmtree(model_dir) # # Initialize the network each run: # net = lstm.ECGLSTM(5, 512, 5, 2).to(device) # # Train the classifier: # best_auc_scores = train_classifier(net, model_dir=model_dir, train_config=train_config) best_auc_per_run.append(best_auc_scores[BEAT_TO_INDEX[beat_type]]) writer.add_scalar('auc_with_additional_{}_beats'.format(n), best_auc_scores[BEAT_TO_INDEX[beat_type]], total_runs) if best_auc_scores[BEAT_TO_INDEX[beat_type]] >= 0.88: logging.info("Found desired AUC: {}".format(best_auc_scores)) break total_runs += 1 best_auc_for_each_n[n] = best_auc_per_run mean_auc_values.append(np.mean(best_auc_per_run)) var_auc_values.append(np.var(best_auc_per_run)) best_auc_values.append(max(best_auc_per_run)) writer.add_scalar('mean_auc', np.mean(best_auc_per_run), n) writer.add_scalar('max_auc', max(best_auc_per_run), n) writer.close() # # Save data in pickle: # all_results = { 'best_auc_for_each_n': best_auc_for_each_n, 'mean': mean_auc_values, 'var': var_auc_values, 'best': best_auc_values } pickle_file_path = base_path + 'ecg_pytorch/ecg_pytorch/classifiers/pickles_results/{}_{}_lstm.pkl'.format( beat_type, gan_type) with open(pickle_file_path, 'wb') as handle: pickle.dump(all_results, handle, protocol=pickle.HIGHEST_PROTOCOL)