help="Number of Noise for insert in control") parser.add_argument('--num_noise_patient', type=int, default=0, help="Number of Noise for insert in patient") args = parser.parse_args() c = load_config(args.config_path) ap = AudioProcessor(**c.audio) if not args.no_insert_noise: c.data_aumentation['insert_noise'] = True else: c.data_aumentation['insert_noise'] = False # ste values for noisy insertion in test c.data_aumentation["num_noise_control"] = args.num_noise_control c.data_aumentation["num_noise_patient"] = args.num_noise_patient print("Insert noise ?", c.data_aumentation['insert_noise']) c.dataset['test_csv'] = args.test_csv c.dataset['test_data_root_path'] = args.test_root_dir c.test_config['batch_size'] = args.batch_size c.test_config['num_workers'] = args.num_workers max_seq_len = c.dataset['max_seq_len'] test_dataloader = test_dataloader(c, ap, max_seq_len=max_seq_len) run_test(args, args.checkpoint_path, test_dataloader, c, c.model_name, ap, cuda=True)
all_checkpoints = sorted(glob(os.path.join(args.checkpoints_path, '*.pt'))) #print(all_checkpoints, os.listdir(args.checkpoints_path)) if args.config_path: c = load_config(args.config_path) else: #load config in checkpoint checkpoint = torch.load(all_checkpoints[0], map_location='cpu') c = load_config_from_str(checkpoint['config_str']) ap = AudioProcessor(c.audio) log_path = os.path.join(c.train_config['logs_path'], c.model_name) audio_config = c.audio[c.audio['backend']] tensorboard = TensorboardWriter(log_path, audio_config) # set test dataset dir c.dataset['test_dir'] = args.dataset_dir # set batchsize = 32 c.test_config['batch_size'] = 5 test_dataloader = test_dataloader(c, ap) best_loss = 999999999 best_loss_checkpoint = '' sdrs_checkpoint = [] for i in tqdm.tqdm(range(len(all_checkpoints))): checkpoint = all_checkpoints[i] mean_loss= test(args, log_path, checkpoint, test_dataloader, tensorboard, c, c.model_name, ap, cuda=True) sdrs_checkpoint.append([mean_loss, checkpoint]) if mean_loss < best_loss: best_loss = mean_loss best_loss_checkpoint = checkpoint print("Best Loss checkpoint is: ", best_loss_checkpoint, "Best Loss:", best_loss) copyfile(best_loss_checkpoint, os.path.join(args.checkpoints_path,'fast_best_checkpoint.pt')) np.save(os.path.join(args.checkpoints_path,"Loss_validation_with_VCTK_best_SI-SNR_is_"+str(best_sdr)+".np"), np.array(sdrs_checkpoint))
def run_test_all_seeds(args, cuda=True, debug=False, return_potential=False): runs_list = os.listdir(args.experiment_dir) runs_list.sort() num_runs = len(runs_list) votes = [] wav_files = [] targets = [] # define loss function criterion = nn.BCELoss(reduction='sum') for run in runs_list: blockPrint() run_dir = os.path.join(args.experiment_dir, run) if os.path.isfile(run_dir): continue model_name = os.listdir(run_dir)[0] checkpoint_path = os.path.join(run_dir, model_name, 'best_checkpoint.pt') config_path = os.path.join(run_dir, model_name, 'config.json') c = load_config(config_path) ap = AudioProcessor(**c.audio) c.dataset['test_csv'] = args.test_csv c.dataset['test_data_root_path'] = args.test_root_dir c.test_config['batch_size'] = args.batch_size c.test_config['num_workers'] = args.num_workers max_seq_len = c.dataset['max_seq_len'] c.train_config['seed'] = 0 testdataloader = test_dataloader(c, ap, max_seq_len=max_seq_len) # load model model = return_model(c) enablePrint() if checkpoint_path is not None: print("Loading checkpoint: %s" % checkpoint_path) try: checkpoint = torch.load(checkpoint_path, map_location='cpu') model.load_state_dict(checkpoint['model']) print("Model Sucessful Load !") except Exception as e: raise ValueError( "You need pass a valid checkpoint, may be you need check your config.json because de the of this checkpoint cause the error: " + e) blockPrint() # convert model from cuda if cuda: model = model.cuda() model.train(False) vote, targets, wav_path = test(criterion, ap, model, c, testdataloader, cuda=cuda, confusion_matrix=True, debug=debug, simples_vote=args.simples_vote) # print(vote) wav_files.append(wav_path) votes.append(vote) if len(wav_files): if wav_files[-1] != wav_files[0]: raise ValueError( "Diferents files or order for the test in diferrents seeds or folds" ) # mean vote, and round is necessary if use composite vote preds = np.mean(np.array(votes), axis=0) # print(preds) if not return_potential: preds = preds.round() file_names = wav_files[0] if debug and not return_potential: enablePrint() targets = np.array(targets) preds = np.array(preds) names = np.array(file_names) idxs = np.nonzero(targets == c.dataset['control_class']) control_target = targets[idxs] control_preds = preds[idxs] names_control = names[idxs] idxs = np.nonzero(targets == c.dataset['patient_class']) patient_target = targets[idxs] patient_preds = preds[idxs] names_patient = names[idxs] if debug: print('+' * 40) print("Control Files Classified incorrectly:") incorrect_ids = np.nonzero( control_preds != c.dataset['control_class']) inc_names = names_control[incorrect_ids] print("Num. Files:", len(inc_names)) print(inc_names) print('+' * 40) print('-' * 40) print("Patient Files Classified incorrectly:") incorrect_ids = np.nonzero( patient_preds != c.dataset['patient_class']) inc_names = names_patient[incorrect_ids] print("Num. Files:", len(inc_names)) print(inc_names) print('-' * 40) acc_control = (control_preds == control_target).mean() acc_patient = (patient_preds == patient_target).mean() acc_balanced = (acc_control + acc_patient) / 2 f1 = f1_score(targets.tolist(), preds.tolist()) uar = recall_score(targets.tolist(), preds.tolist(), average='macro') print("======== Confusion Matrix ==========") y_target = pd.Series(targets, name='Target') y_pred = pd.Series(preds, name='Predicted') df_confusion = pd.crosstab(y_target, y_pred, rownames=['Target'], colnames=['Predicted'], margins=True) print(df_confusion) print("Test\n ", "Acurracy Control: ", acc_control, "Acurracy Patient: ", acc_patient, "Acurracy Balanced", acc_balanced) print("F1:", f1, "UAR:", uar) if return_potential: return preds, file_names else: df = pd.DataFrame({ 'filename': file_names, 'prediction': preds.astype(int) }) df['prediction'] = df['prediction'].replace( int(c.dataset['control_class']), 'negative', regex=True).replace(int(c.dataset['patient_class']), 'positive', regex=True) if args.output_csv: out_csv_path = args.output_csv else: out_csv_path = os.path.join( args.experiment_dir, os.path.basename(c.dataset['test_csv'])) df.to_csv(out_csv_path, index=False)