def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_folder", default='../models/step4', help="Model to eval folder") parser.add_argument("--model_name", default='Cnn1_3k_10_1e4_256_40_X', help="Classifier model path") parser.add_argument("--classifier", default='Cnn1_3k_10', help="Choose classifier architecture") parser.add_argument("--das_dataset_path", default='DAS_dataset.hdf5', help="Choose classifier architecture") parser.add_argument("--batch_size", type=int, default=1, help="Size of the training batches") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Test dataset test_set = HDF5Dataset(args.das_path) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Load parameters from trained model net.load_state_dict( torch.load('../models/' + args.model_folder + '/' + args.model_name + '.pth')) net.eval() # Evaluate model on DAS test dataset evaluate_dataset(test_loader, device, net, args.model_name, 'Analysis/CSVOutputs') eval_end = time.time() total_time = eval_end - start_time print(f'Number of network parameters: {params}\n' f'Total execution time: {format_timespan(total_time)}')
def test_dataset(out_dir_with_shards): transform_hdf5 = transforms.Compose([ArrayCenterCrop(64), ArrayToTensor()]) # Check nice exceptions for invalid inputs with pytest.raises(ValueError): HDF5Dataset([], transform=transform_hdf5) with pytest.raises(ValueError): HDF5Dataset(['doesnotexists.hdf5'], transform=transform_hdf5) all_file_ps = sorted(glob.glob(os.path.join(out_dir_with_shards, '*.hdf5'))) ds = HDF5Dataset(all_file_ps, transform=transform_hdf5) # using the standard PyTorch DataLoader dl = DataLoader(ds, batch_size=3, shuffle=True, num_workers=0) # last should be smaller assert dataset.get_num_in_shard(all_file_ps[0]) > dataset.get_num_in_shard( all_file_ps[-1]) # sum number of imgs in all shards all except last shard expected_num_imgs = sum( dataset.get_num_in_shard(shard_p) for shard_p in all_file_ps[:-1]) actual_num_imgs = 0 for batch in dl: actual_num_imgs += batch.shape[0] assert actual_num_imgs == expected_num_imgs
def main(): #train_dataset = HDF5Dataset('/home/ph/PycharmProjects/STEAD_ANN/MiniTrain.hdf5') train_dataset = HDF5Dataset( '../../PycharmProjects/STEAD_ANN/Train_data.hdf5') trainloader = DataLoader(train_dataset, batch_size=1, shuffle=True) trace, label = next(iter(trainloader)) trace, label = next(iter(trainloader)) print(label) print(trace) print(trace.shape) print(type(trace)) print(type(trace.numpy())) plt.figure() plt.plot(trace.squeeze().numpy()) plt.show()
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='1h6k_test_model', help="Name of model to eval") parser.add_argument("--model_folder", default='test', help="Model to eval folder") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--train_path", default='Train_data_v3.hdf5', help="HDF5 train Dataset path") parser.add_argument("--stead_seis_test_path", default='Test_stead_seis.hdf5', help="HDF5 test Dataset path") parser.add_argument("--stead_nseis_test_path", default='Test_stead_noise.hdf5', help="HDF5 test Dataset path") parser.add_argument("--geo_test_path", default='Test_geo.hdf5', help="HDF5 test Dataset path") parser.add_argument("--batch_size", type=int, default=256, help="Size of the training batches") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_set = HDF5Dataset(args.train_path) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) # STEAD Seismic test dataset test_set = HDF5Dataset(args.stead_seis_test_path) stead_seis_test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # STEAD NSeismic test dataset test_set = HDF5Dataset(args.stead_nseis_test_path) stead_nseis_test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # Geo test dataset test_set = HDF5Dataset(args.geo_test_path) geo_test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Load from trained model net.load_state_dict( torch.load(f'../models/{args.model_folder}/' f'{args.model_name}.pth')) net.eval() # Evaluate model on training dataset # evaluate_dataset(train_loader, 'Train', device, # net, args.model_name, args.model_folder, # '../Analysis/CSVOutputs') train_end = time.time() train_time = train_end - start_time # Evaluate model on STEAD seismic test set evaluate_dataset(stead_seis_test_loader, 'Stead_seismic_test', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') # Evaluate model on STEAD seismic test set evaluate_dataset(stead_nseis_test_loader, 'Stead_noise_test', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') # Evaluate model on STEAD seismic test set evaluate_dataset(geo_test_loader, 'Geo_test', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') eval_end = time.time() eval_time = eval_end - train_end total_time = eval_end - start_time print(f'Training evaluation time: {format_timespan(train_time)}\n' f'Test evaluation time: {format_timespan(eval_time)}\n' f'Total execution time: {format_timespan(total_time)}\n\n' f'Number of network parameters: {params}')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='defaultmodel', help="Name of model to eval") parser.add_argument("--model_folder", default='default', help="Folder to save model") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--test_path", default='Test_data.hdf5', help="HDF5 test Dataset path") parser.add_argument("--batch_size", type=int, default=256, help="Mini-batch size") parser.add_argument("--beta", type=float, default=2, help="Fscore beta parameter") args = parser.parse_args() # Create curves folders Path(f"../Analysis/Confusion_matrices/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/PR_curves/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/ROC_curves/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/Fscore_curves/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/FPFN_curves/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/Histograms/{args.model_folder}").mkdir(parents=True, exist_ok=True) Path(f"../Analysis/Output_values/{args.model_folder}").mkdir(parents=True, exist_ok=True) # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Test dataset test_dataset = HDF5Dataset(args.test_path) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters nparams = count_parameters(net) # Load from trained model net.load_state_dict(torch.load('../models/' + args.model_folder + '/' + args.model_name + '.pth')) net.eval() # Print number of network parameters print(f'Number of network parameters: {nparams}\n') # Seismic and non seismic output values hist = 1 s_outputs = [] ns_outputs = [] # Preallocate precision and recall values precision = [] fp_rate = [] recall = [] fscores = [] fp_plt = [] fn_plt = [] # Confusion matrix cm = [] # Record max fscore value obtained max_fscore = 0 # Record threshold of best fscore best_thresh = 0 # Thresholds to evaluate performance on thresholds = np.arange(0.025, 1, 0.025) # thresholds = [0, 0.5, 0.9] # Round threshold values thresholds = np.around(thresholds, decimals=3) # Evaluate model on training dataset for thresh in thresholds: # True/False Positives/Negatives correct = 0 total = 0 tp, fp, tn, fn = 0, 0, 0, 0 # Print threshold value print(f'Threshold value: {thresh}\n') # Evaluate with tqdm.tqdm(total=len(test_loader), desc='Test dataset evaluation') as test_bar: with torch.no_grad(): for data in test_loader: traces, labels = data[0].to(device), data[1].to(device) outputs = net(traces) predicted = (outputs > thresh) total += labels.size(0) # Add output values to list (just once) if hist: for i, lab in enumerate(labels): if lab: s_outputs.append(outputs[i].item()) else: ns_outputs.append(outputs[i].item()) # Count true positives, true negatives, etc. for i, pred in enumerate(predicted): if pred: if pred == labels[i]: tp += 1 else: fp += 1 else: if pred == labels[i]: tn += 1 else: fn += 1 correct += (predicted == labels).sum().item() test_bar.update() # Run just one time hist = 0 # Metrics pre, rec, fpr, fscore = print_metrics(tp, fp, tn, fn, args.beta) recall.append(rec) fp_rate.append(fpr) precision.append(pre) fscores.append(fscore) fp_plt.append(fp) fn_plt.append(fn) # Save best conf matrix if fscore > max_fscore: max_fscore = fscore cm = np.asarray([[tp, fn], [fp, tn]]) best_thresh = thresh eval_1 = time.time() ev_1 = eval_1 - start_time print(f'Test evaluation time: {format_timespan(ev_1)}\n') # Add point (0, 1) to PR curve precision.append(1) recall.append(0) # Add point (1, 0.5) to PR curve precision.insert(0, 0.5) recall.insert(0, 1) # Add point (0, 0) to ROC curve fp_rate.append(0) # Add point (1, 1) to ROC curve fp_rate.insert(0, 1) # Area under curve pr_auc = np.trapz(precision[::-1], x=recall[::-1]) roc_auc = np.trapz(recall[::-1], x=fp_rate[::-1]) # Print fscores print(f'Best test threshold: {best_thresh}, f-score: {max_fscore:5.3f}\n\n' f'Test PR AUC: {pr_auc:5.3f}\n' f'Test ROC AUC: {roc_auc:5.3f}') # Save output values to file with open(f'../Analysis/Output_values/{args.model_folder}/outputs_{args.model_name}.txt', 'w') as f: f.write('Seismic outputs\n') f.write('\n'.join(list(map(str, s_outputs)))) f.write('\nNon-Seismic outputs\n') f.write('\n'.join(list(map(str, ns_outputs)))) # Plot histograms plot_histograms(s_outputs, ns_outputs, args.model_folder, args.model_name) # Plot best confusion matrices target_names = ['Seismic', 'Non Seismic'] # Confusion matrix plot_confusion_matrix(cm, target_names, title=f'Confusion matrix {args.model_name}, threshold = {best_thresh}', filename=f'../Analysis/Confusion_matrices/{args.model_folder}/Confusion_matrix_STEAD_{args.model_name}.png') # F-score vs thresholds curve plt.figure() plt.plot(thresholds, fscores) plt.title(f'Fscores por umbral modelo {args.model_name}') plt.xlabel('Umbrales') plt.ylabel('F-score') plt.grid(True) plt.savefig(f'../Analysis/Fscore_curves/{args.model_folder}/Fscore_{args.model_name}.png') # False positives / False negatives curve plt.figure() line_fp, = plt.plot(thresholds, fp_plt, label='False positives') line_fn, = plt.plot(thresholds, fn_plt, label='False negatives') plt.title(f'FP y FN modelo {args.model_name}') plt.xlabel('Umbrales') plt.ylabel('Total') plt.grid(True) plt.legend(handles=[line_fp, line_fn], loc='best') plt.savefig(f'../Analysis/FPFN_curves/{args.model_folder}/FPFN_{args.model_name}.png') # Precision/Recall curve test dataset plt.figure() plt.plot(recall, precision) # Annotate threshold values for i, j, k in zip(recall, precision, thresholds): plt.annotate(str(k), (i, j)) # Dumb model line plt.hlines(0.5, 0, 1, 'b', '--') plt.title(f'PR test dataset curve for model {args.model_name}') plt.xlabel('Recall') plt.ylabel('Precision') plt.xlim(-0.02, 1.02) plt.ylim(0.48, 1.02) plt.grid(True) plt.savefig(f'../Analysis/PR_curves/{args.model_folder}/PR_test_{args.model_name}.png') # Receiver operating characteristic curve test dataset plt.figure() plt.plot(fp_rate, recall) # Annotate for i, j, k in zip(fp_rate, recall, thresholds): plt.annotate(str(k), (i, j)) # Dumb model line plt.plot([0, 1], [0, 1], 'b--') plt.title(f'ROC test dataset curve for model {args.model_name}') plt.xlabel('False Positive Rate') plt.ylabel('Recall') plt.xlim(-0.02, 1.02) plt.ylim(-0.02, 1.02) plt.grid(True) plt.savefig(f'../Analysis/ROC_curves/{args.model_folder}/ROC_test_{args.model_name}.png')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='Default_model', help="Name of model to save") parser.add_argument( "--classifier", default='C', help="Choose classifier architecture, C, S, XS, XL, XXL, XXXL") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--test_path", default='Test_data.hdf5', help="HDF5 test Dataset path") parser.add_argument("--n_epochs", type=int, default=50, help="Number of epochs of training") parser.add_argument("--batch_size", type=int, default=32, help="Size of the batches") parser.add_argument("--lr", type=float, default=0.001, help="SGD learning rate") parser.add_argument("--wd", type=float, default=0, help="weight decay parameter") parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") parser.add_argument("--b2", type=float, default=0.99, help="adam: decay of first order momentum of gradient") args = parser.parse_args() print(f'Execution details: \n {args}') # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Start tensorboard SummaryWriter tb = SummaryWriter('../runs/Seismic') # Train dataset train_dataset = HDF5Dataset(args.train_path) trainloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # Test dataset test_dataset = HDF5Dataset(args.test_path) testloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # Load specified Classifier if args.classifier == 'XS': net = Classifier_XS() elif args.classifier == 'S': net = Classifier_S() elif args.classifier == 'XL': net = Classifier_XL() elif args.classifier == 'XXL': net = Classifier_XXL() elif args.classifier == 'XXXL': net = Classifier_XXXL() else: net = Classifier() net.to(device) # Add model graph to tensorboard traces, labels = next(iter(trainloader)) traces, labels = traces.to(device), labels.to(device) tb.add_graph(net, traces) # Loss function and optimizer criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.wd) # Loss id for tensorboard logs loss_id = 0 # Start training with tqdm.tqdm(total=args.n_epochs, desc='Epochs', position=0) as epoch_bar: for epoch in range(args.n_epochs): total_loss = 0 with tqdm.tqdm(total=len(trainloader), desc='Batches', position=1) as batch_bar: for i, data in enumerate(trainloader, 0): inputs, labels = data[0].to(device), data[1].to(device) optimizer.zero_grad() outputs = net(inputs) tb.add_scalar('Output', outputs[0].item(), loss_id) loss = criterion(outputs, labels.float()) loss.backward() optimizer.step() total_loss += loss.item() loss_id += 1 tb.add_scalar('Loss', loss.item(), loss_id) batch_bar.update(1) tb.add_scalar('Total_Loss', total_loss, epoch) epoch_bar.update(1) # Close tensorboard tb.close() # Save model torch.save(net.state_dict(), '../models/' + args.model_name + '.pth') # Measure training, and execution times end_tm = time.time() # Training time tr_t = end_tm - start_time print(f'Training time: {format_timespan(tr_t)}')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--dataset_name", default='STEAD-STEAD', help="Name of dataset to evaluate on") parser.add_argument("--model_name", default='1h6k_test_model', help="Name of model to eval") parser.add_argument("--model_folder", default='models', help="Model to eval folder") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--test_path", default='Test_data_v2.hdf5', help="HDF5 test Dataset path") parser.add_argument("--batch_size", type=int, default=256, help="Size of the training batches") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_set = HDF5Dataset(args.train_path) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=8) # Test dataset test_set = HDF5Dataset(args.test_path) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True, num_workers=8) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Load from trained model net.load_state_dict( torch.load(f'{args.model_folder}/' f'{args.model_name}.pth')) net.eval() # Evaluate model on training dataset evaluate_dataset(train_loader, args.dataset_name + '/Train', device, net, args.model_name, args.model_folder, 'Net_outputs') train_end = time.time() train_time = train_end - start_time # Evaluate model on test set evaluate_dataset(test_loader, args.dataset_name + '/Test', device, net, args.model_name, args.model_folder, 'Net_outputs') eval_end = time.time() eval_time = eval_end - train_end total_time = eval_end - start_time print(f'Training evaluation time: {format_timespan(train_time)}\n' f'Test evaluation time: {format_timespan(eval_time)}\n' f'Total execution time: {format_timespan(total_time)}\n\n' f'Number of network parameters: {params}')
def test_dataset_without_pickle(out_dir_with_shards): os.remove(os.path.join(out_dir_with_shards, maker.NUM_PER_SHARD_PKL)) transform_hdf5 = transforms.Compose([ArrayCenterCrop(64), ArrayToTensor()]) all_file_ps = sorted(glob.glob(os.path.join(out_dir_with_shards, '*.hdf5'))) HDF5Dataset(all_file_ps, transform=transform_hdf5)
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='Default_model', help="Name of model to save") parser.add_argument("--model_folder", default='default', help="Folder to save model") parser.add_argument("--classifier", default='C', help="Choose classifier architecture, C, CBN") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--val_path", default='Validation_data.hdf5', help="HDF5 validation Dataset path") parser.add_argument("--n_epochs", type=int, default=1, help="Number of epochs of training") parser.add_argument("--batch_size", type=int, default=32, help="Size of the batches") parser.add_argument("--eval_iter", type=int, default=1, help="Number of batches between validations") parser.add_argument("--earlystop", type=int, default=1, help="Early stopping flag, 0 no early stopping") parser.add_argument("--patience", type=int, default=30, help="Early stopping patience") parser.add_argument("--lr", type=float, default=0.00001, help="Adam learning rate") parser.add_argument("--wd", type=float, default=0, help="weight decay parameter") parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") parser.add_argument("--b2", type=float, default=0.99, help="adam: decay of first order momentum of gradient") args = parser.parse_args() # Create learning curves folder Path("../Analysis/Learning_curves/" + args.model_folder + "/" + "Accuracy").mkdir(exist_ok=True, parents=True) Path("../Analysis/Learning_curves/" + args.model_folder + "/" + "Loss").mkdir(exist_ok=True) # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_dataset = HDF5Dataset(args.train_path) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # Validation dataset val_dataset = HDF5Dataset(args.val_path) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters nparams = count_parameters(net) # Loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.wd) # Training and validation errors tr_accuracies = [] val_accuracies = [] # Training and validation losses tr_losses = [] val_losses = [] # Early stopping val_acc = 1 earlys = np.zeros(args.patience).tolist() # Start training with tqdm.tqdm(total=args.n_epochs, desc='Epochs') as epoch_bar: for epoch in range(args.n_epochs): n_correct, n_total = 0, 0 # Early stopping if all(val_acc <= i for i in earlys) and args.earlystop: break with tqdm.tqdm(total=len(train_loader), desc='Batches', leave=False) as batch_bar: for i, data in enumerate(train_loader): # Network to train mode net.train() # Clear gradient accumulators optimizer.zero_grad() # Get batch data and labels inputs, labels = data[0].to(device), data[1].to(device) # Forward pass outputs = net(inputs) # Predicted labels predicted = torch.round(outputs) # Calculate accuracy on current batch n_total += labels.size(0) n_correct += (predicted == labels).sum().item() train_acc = 100 * n_correct / n_total # Calculate loss loss = criterion(outputs, labels.float()) # Backpropagation loss.backward() # Optimize optimizer.step() # Check validation accuracy periodically if i % args.eval_iter == 0: # Switch model to eval mode net.eval() # Calculate accuracy on validation total_val_loss = 0 total_val, correct_val = 0, 0 with torch.no_grad(): for val_data in val_loader: # Retrieve data and labels traces, labels = val_data[0].to( device), val_data[1].to(device) # Forward pass outputs = net(traces) # Calculate loss val_loss = criterion(outputs, labels.float()) # Total loss for epoch total_val_loss += val_loss.item() # Predicted labels predicted = torch.round(outputs) # Sum up correct and total validation examples total_val += labels.size(0) correct_val += ( predicted == labels).sum().item() val_avg_loss = total_val_loss / len(val_loader) # Calculate validation accuracy val_acc = 100 * correct_val / total_val # Save acc for early stopping earlys.pop(0) earlys.append(val_acc) # Save loss to list val_losses.append(val_avg_loss) tr_losses.append(loss) # Append training and validation accuracies tr_accuracies.append(train_acc) val_accuracies.append(val_acc) # Update batch bar batch_bar.update() # Early stopping if all(val_acc <= i for i in earlys) and args.earlystop: break # Update epochs bar epoch_bar.update() # Save model torch.save( net.state_dict(), '../models/' + args.model_folder + '/' + args.model_name + '.pth') # Measure training, and execution times end_tm = time.time() # Training time tr_t = end_tm - start_time # Plot train and validation accuracies learning_curve_acc(tr_accuracies, val_accuracies, args.model_name, args.model_folder) # Plot train and validation losses learning_curve_loss(tr_losses, val_losses, args.model_name, args.model_folder) print(f'Execution details: \n{args}\n' f'Number of parameters: {nparams}\n' f'Training time: {format_timespan(tr_t)}')
pass opt.manualSeed = 43 # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if opt.dataset in ['3D']: dataset = HDF5Dataset(opt.dataroot, input_transform=transforms.Compose( [transforms.ToTensor()])) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) ngpu = int(opt.ngpu) nz = int(opt.nz) ngf = int(opt.ngf) ndf = int(opt.ndf) nc = 1
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='XXL_lr0000001_bs32', help="Name of model to eval") parser.add_argument( "--classifier", default='XXL', help="Choose classifier architecture, C, S, XS, XL, XXL, XXXL") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--test_path", default='Test_data.hdf5', help="HDF5 test Dataset path") parser.add_argument("--batch_size", type=int, default=32, help="Size of the batches") args = parser.parse_args() print(f'Evaluation details: \n {args}\n') # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_dataset = HDF5Dataset(args.train_path) trainloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # Test dataset test_dataset = HDF5Dataset(args.test_path) testloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # Load specified Classifier if args.classifier == 'XS': net = Classifier_XS() elif args.classifier == 'S': net = Classifier_S() elif args.classifier == 'XL': net = Classifier_XL() elif args.classifier == 'XXL': net = Classifier_XXL() elif args.classifier == 'XXXL': net = Classifier_XXXL() else: net = Classifier() net.to(device) # Load from trained model net.load_state_dict(torch.load('../models/' + args.model_name + '.pth')) net.eval() # Evaluate model on training dataset # True/False Positives/Negatives correct = 0 total = 0 tp, fp, tn, fn = 0, 0, 0, 0 with tqdm.tqdm(total=len(trainloader), desc='Train dataset evaluation', position=0) as train_bar: with torch.no_grad(): for data in trainloader: traces, labels = data[0].to(device), data[1].to(device) outputs = net(traces) predicted = torch.round(outputs) total += labels.size(0) for i, pred in enumerate(predicted): if pred: if pred == labels[i]: tp += 1 else: fp += 1 else: if pred == labels[i]: tn += 1 else: fn += 1 correct += (predicted == labels).sum().item() train_bar.update(1) # Evaluation metrics precision = tp / (tp + fp) recall = tp / (tp + fn) fscore = 2 * (precision * recall) / (precision + recall) eval_1 = time.time() ev_1 = eval_1 - start_time print(f'Training Evaluation results: \n\n\n' f'correct: {correct}, total: {total}\n' f'True positives: {tp}\n\n' f'False positives: {fp}\n' f'True negatives: {tn}\n' f'False negatives: {fn}\n\n' f'Evaluation metrics:\n\n' f'Precision: {precision:5.3f}\n' f'Recall: {recall:5.3f}\n' f'F-score: {fscore:5.3f}\n') print('Accuracy of the network on the train set: %d %%\n' % (100 * correct / total)) # Evaluate model on test set # True/False Positives/Negatives correct = 0 total = 0 tp, fp, tn, fn = 0, 0, 0, 0 with tqdm.tqdm(total=len(testloader), desc='Test dataset evaluation', position=0) as test_bar: with torch.no_grad(): for data in testloader: traces, labels = data[0].to(device), data[1].to(device) outputs = net(traces) predicted = torch.round(outputs) total += labels.size(0) for i, pred in enumerate(predicted): if pred: if pred == labels[i]: tp += 1 else: fp += 1 else: if pred == labels[i]: tn += 1 else: fn += 1 correct += (predicted == labels).sum().item() test_bar.update(1) # Evaluation metrics precision = tp / (tp + fp) recall = tp / (tp + fn) fscore = 2 * (precision * recall) / (precision + recall) eval_2 = time.time() ev_2 = eval_2 - eval_1 ev_t = eval_2 - start_time print(f'Test Evaluation results: \n\n\n' f'correct: {correct}, total: {total}\n\n' f'True positives: {tp}\n' f'False positives: {fp}\n' f'True negatives: {tn}\n' f'False negatives: {fn}\n\n' f'Evaluation metrics:\n\n' f'Precision: {precision:5.3f}\n' f'Recall: {recall:5.3f}\n' f'F-score: {fscore:5.3f}\n\n' f'Training evaluation time: {format_timespan(ev_1)}\n' f'Test evaluation time: {format_timespan(ev_2)}\n' f'Total execution time: {format_timespan(ev_t)}\n\n') print('Accuracy of the network on the test set: %d %%' % (100 * correct / total))
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='1h6k_test_model', help="Name of model to eval") parser.add_argument("--model_folder", default='test', help="Model to eval folder") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--das_seis_path", default='DAS_seismic.hdf5', help="HDF5 DAS seimic dataset path") parser.add_argument("--das_nseis_path", default='DAS_non_seismic.hdf5', help="HDF5 DAS non seimic dataset path") parser.add_argument("--das_noise_path", default='DAS_noise.hdf5', help="HDF5 DAS noise dataset path") parser.add_argument("--batch_size", type=int, default=1, help="Size of the training batches") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # DAS Seismic test dataset test_set = HDF5Dataset(args.das_seis_path) das_seis_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # DAS NSeismic test dataset test_set = HDF5Dataset(args.das_nseis_path) das_nseis_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # DAS noise test dataset test_set = HDF5Dataset(args.das_noise_path) das_noise_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Load from trained model net.load_state_dict( torch.load(f'../models/{args.model_folder}/' f'{args.model_name}.pth')) net.eval() # Evaluate model on STEAD seismic test set evaluate_dataset(das_seis_loader, 'DAS_seismic', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') # Evaluate model on STEAD seismic test set evaluate_dataset(das_nseis_loader, 'DAS_non_seismic', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') # Evaluate model on STEAD seismic test set evaluate_dataset(das_noise_loader, 'DAS_noise', device, net, args.model_name, args.model_folder, '../Results/Testing/Outputs') total_time = time.time() - start_time print(f'Total execution time: {format_timespan(total_time)}\n' f'Number of network parameters: {params}')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='1h6k_test_model', help="Name of model to save") parser.add_argument("--model_folder", default='test', help="Folder to save model") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--val_path", default='Validation_data.hdf5', help="HDF5 validation Dataset path") parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs") parser.add_argument("--batch_size", type=int, default=256, help="Size of the batches") parser.add_argument("--eval_iter", type=int, default=1, help="Number of batches between validations") parser.add_argument("--earlystop", type=int, default=1, help="Early stopping flag, 0 no early stopping") parser.add_argument("--patience", type=int, default=30, help="Early stopping patience") parser.add_argument("--lr", type=float, default=1e-4, help="Adam learning rate") parser.add_argument("--wd", type=float, default=0, help="weight decay parameter") parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") parser.add_argument("--b2", type=float, default=0.99, help="adam: decay of first order momentum of gradient") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_set = HDF5Dataset(args.train_path) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) # Validation dataset val_set = HDF5Dataset(args.val_path) val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.wd) # Training and validation errors tr_accuracies = [] val_accuracies = [] # Training and validation losses tr_losses = [] val_losses = [] # Early stopping val_acc = 1 early = np.zeros(args.patience).tolist() train_model(train_loader, val_loader, net, device,args.epochs, optimizer, criterion, args.earlystop, args.patience, args.eval_iter, f'../models/{args.model_folder}', args.model_name) # Measure training, and execution times train_end = time.time() # Training time train_time = train_end - start_time # Plot train and validation accuracies learning_curve_acc(tr_accuracies, val_accuracies, f'../Analysis/Learning_curves/{args.model_folder}', args.model_name) # Plot train and validation losses learning_curve_loss(tr_losses, val_losses, f'../Analysis/Learning_curves/{args.model_folder}', args.model_name) print(f'Execution details: \n{args}\n' f'Number of parameters: {params}\n' f'Training time: {format_timespan(train_time)}')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='1h6k_test_model', help="Name of model to train") parser.add_argument("--model_folder", default='test', help="Folder to save model") parser.add_argument("--classifier", default='1h6k', help="Choose classifier architecture") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--epochs", type=int, default=50, help="Number of training epochs") parser.add_argument("--batch_size", type=int, default=256, help="Size of the batches") parser.add_argument("--lr", type=float, default=1e-3, help="SGD learning rate") parser.add_argument("--wd", type=float, default=0, help="weight decay parameter") parser.add_argument("--b1", type=float, default=0.9, help="adam: decay of first order momentum of gradient") parser.add_argument("--b2", type=float, default=0.99, help="adam: decay of first order momentum of gradient") args = parser.parse_args() # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_set = HDF5Dataset(args.train_path) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters params = count_parameters(net) # Loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.b1, args.b2), weight_decay=args.wd) # Train model train_model(train_loader, args.epochs, optimizer, criterion, net, device, f'../models/{args.model_folder}', args.model_name) # Measure training, and execution times train_end = time.time() # Training time train_time = train_end - start_time print(f'Execution details: \n{args}\n' f'Number of parameters: {params}\n' f'Training time: {format_timespan(train_time)}')
def main(): # Measure exec time start_time = time.time() # Args parser = argparse.ArgumentParser() parser.add_argument("--model_name", default='XXL_lr0000001_bs32', help="Name of model to eval") parser.add_argument("--model_folder", default='default', help="Folder to save model") parser.add_argument( "--classifier", default='XXL', help="Choose classifier architecture, C, S, XS, XL, XXL, XXXL") parser.add_argument("--train_path", default='Train_data.hdf5', help="HDF5 train Dataset path") parser.add_argument("--test_path", default='Test_data.hdf5', help="HDF5 test Dataset path") parser.add_argument("--batch_size", type=int, default=256, help="Size of the batches") parser.add_argument("--beta", type=float, default=2, help="Fscore beta parameter") args = parser.parse_args() # Create csv files folder Path(f"../Analysis/OutputsCSV/{args.model_folder}/train").mkdir( parents=True, exist_ok=True) Path(f"../Analysis/OutputsCSV/{args.model_folder}/eval").mkdir( parents=True, exist_ok=True) # Select training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Train dataset train_dataset = HDF5Dataset(args.train_path) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # Test dataset test_dataset = HDF5Dataset(args.test_path) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # Load specified Classifier net = get_classifier(args.classifier) net.to(device) # Count number of parameters nparams = count_parameters(net) # Load from trained model net.load_state_dict( torch.load('../models/' + args.model_folder + '/' + args.model_name + '.pth')) net.eval() # Evaluate model on training dataset train_rows_list = [] with tqdm.tqdm(total=len(train_loader), desc='Train dataset evaluation', position=0) as train_bar: with torch.no_grad(): for data in train_loader: traces, labels = data[0].to(device), data[1].to(device) outputs = net(traces) for out, lab in zip(outputs, labels): new_row = {'out': out.item(), 'label': lab.item()} train_rows_list.append(new_row) train_bar.update(1) train_outputs = pd.DataFrame(train_rows_list) train_outputs.to_csv( f'../Analysis/OutputsCSV/{args.model_folder}/train/{args.model_name}.csv', index=False) eval_1 = time.time() ev_1 = eval_1 - start_time # Evaluate model on test set test_rows_list = [] with tqdm.tqdm(total=len(test_loader), desc='Test dataset evaluation', position=0) as test_bar: with torch.no_grad(): for data in test_loader: traces, labels = data[0].to(device), data[1].to(device) outputs = net(traces) for out, lab in zip(outputs, labels): new_row = {'out': out.item(), 'label': lab.item()} test_rows_list.append(new_row) test_bar.update(1) test_outputs = pd.DataFrame(test_rows_list) test_outputs.to_csv( f'../Analysis/OutputsCSV/{args.model_folder}/eval/{args.model_name}.csv', index=False) eval_2 = time.time() ev_2 = eval_2 - eval_1 ev_t = eval_2 - start_time print(f'Training evaluation time: {format_timespan(ev_1)}\n' f'Test evaluation time: {format_timespan(ev_2)}\n' f'Total execution time: {format_timespan(ev_t)}\n\n' f'Number of network parameters: {nparams}')