def config(): exp_dir = '' assert len(exp_dir) > 0, 'Set the exp_dir on the command line.' storage_dir = str(Path(exp_dir) / 'eval') database_json = load_json(Path(exp_dir) / 'config.json')["database_json"] test_set = 'test_clean' max_examples = None device = 0 ex.observers.append(FileStorageObserver.create(storage_dir))
def config(): exp_dir = '' assert len(exp_dir) > 0, 'Set the exp_dir on the command line.' storage_dir = get_new_subdir(Path(exp_dir) / 'eval', id_naming='time', consider_mpi=True) database_json = load_json(Path(exp_dir) / 'config.json')["database_json"] test_set = 'test_clean' max_examples = None device = 0 ex.observers.append(FileStorageObserver.create(storage_dir))
def compare_configs(storage_dir, trainer_opts, provider_opts): opts = flatten(trainer_opts) opts.update(flatten(provider_opts)) init = load_json(Path(storage_dir) / 'init.json') added, removed, modified, _, _ = dict_compare(opts, init) if len(added): warn(f'The following options were added to the model: {added}') if len(removed): warn(f'The following options were removed from the model: {removed}') return init['trainer_opts'], init['provider_opts']
def config(): exp_dir = '' assert len(exp_dir) > 0, 'Set the model path on the command line.' storage_dir = str( get_new_subdir(Path(exp_dir) / 'eval', id_naming='time', consider_mpi=True)) database_json = load_json(Path(exp_dir) / 'config.json')["database_json"] num_workers = 8 batch_size = 32 max_padding_rate = .05 device = 0 ckpt_name = 'ckpt_best_map.pth'
def create_json(database_path, json_path): database_path = Path(database_path) rir_root = database_path.joinpath('rirs/') setups = load_json(Path(database_path).joinpath('setups.json')) simulation_descriptions = \ load_json(Path(database_path).joinpath('simulation_descriptions.json')) for scenario in simulation_descriptions.values(): for example_id, example in scenario.items(): for node_id, sro in example['sro'].items(): if isinstance(sro, str): example['sro'][node_id] = database_path.joinpath(sro) example['node__position'] = setups[example_id]['node_position'] example['node_orientation'] = \ setups[example_id]['node_orientation'] example['environment'] = setups[example_id]['environment'] example['src_diary'] = [ complete_source_information(source, example_id, setups, rir_root) for source in example['src_diary'] ] db = {'datasets': simulation_descriptions} dump_json(db, json_path, sort_keys=False)
def main(_run, exp_dir, storage_dir, database_json, ckpt_name, num_workers, batch_size, max_padding_rate, device): commands.print_config(_run) exp_dir = Path(exp_dir) storage_dir = Path(storage_dir) config = load_json(exp_dir / 'config.json') model = Model.from_storage_dir(exp_dir, consider_mpi=True, checkpoint_name=ckpt_name) model.to(device) model.eval() _, validation_data, test_data = get_datasets( database_json=database_json, min_signal_length=1.5, audio_reader=config['audio_reader'], stft=config['stft'], num_workers=num_workers, batch_size=batch_size, max_padding_rate=max_padding_rate, storage_dir=exp_dir, ) outputs = [] with torch.no_grad(): for example in tqdm(validation_data): example = model.example_to_device(example, device) (y, seq_len), _ = model(example) y = Mean(axis=-1)(y, seq_len) outputs.append(( y.cpu().detach().numpy(), example['events'].cpu().detach().numpy(), )) scores, targets = list(zip(*outputs)) scores = np.concatenate(scores) targets = np.concatenate(targets) thresholds, f1 = instance_based.get_optimal_thresholds(targets, scores, metric='f1') decisions = scores > thresholds f1, p, r = instance_based.fscore(targets, decisions, event_wise=True) ap = metrics.average_precision_score(targets, scores, None) auc = metrics.roc_auc_score(targets, scores, None) pos_class_indices, precision_at_hits = instance_based.positive_class_precisions( targets, scores) lwlrap, per_class_lwlrap, weight_per_class = instance_based.lwlrap_from_precisions( precision_at_hits, pos_class_indices, num_classes=targets.shape[1]) overall_results = { 'validation': { 'mF1': np.mean(f1), 'mP': np.mean(p), 'mR': np.mean(r), 'mAP': np.mean(ap), 'mAUC': np.mean(auc), 'lwlrap': lwlrap, } } event_validation_results = {} labels = load_json(exp_dir / 'events.json') for i, label in enumerate(labels): event_validation_results[label] = { 'F1': f1[i], 'P': p[i], 'R': r[i], 'AP': ap[i], 'AUC': auc[i], 'lwlrap': per_class_lwlrap[i], } outputs = [] with torch.no_grad(): for example in tqdm(test_data): example = model.example_to_device(example, device) (y, seq_len), _ = model(example) y = Mean(axis=-1)(y, seq_len) outputs.append(( example['example_id'], y.cpu().detach().numpy(), example['events'].cpu().detach().numpy(), )) example_ids, scores, targets = list(zip(*outputs)) example_ids = np.concatenate(example_ids).tolist() scores = np.concatenate(scores) targets = np.concatenate(targets) decisions = scores > thresholds f1, p, r = instance_based.fscore(targets, decisions, event_wise=True) ap = metrics.average_precision_score(targets, scores, None) auc = metrics.roc_auc_score(targets, scores, None) pos_class_indices, precision_at_hits = instance_based.positive_class_precisions( targets, scores) lwlrap, per_class_lwlrap, weight_per_class = instance_based.lwlrap_from_precisions( precision_at_hits, pos_class_indices, num_classes=targets.shape[1]) overall_results['test'] = { 'mF1': np.mean(f1), 'mP': np.mean(p), 'mR': np.mean(r), 'mAP': np.mean(ap), 'mAUC': np.mean(auc), 'lwlrap': lwlrap, } dump_json(overall_results, storage_dir / 'overall.json', indent=4, sort_keys=False) event_results = {} for i, label in sorted(enumerate(labels), key=lambda x: ap[x[0]], reverse=True): event_results[label] = { 'validation': event_validation_results[label], 'test': { 'F1': f1[i], 'P': p[i], 'R': r[i], 'AP': ap[i], 'AUC': auc[i], 'lwlrap': per_class_lwlrap[i], }, } dump_json(event_results, storage_dir / 'event_wise.json', indent=4, sort_keys=False) fp = np.argwhere(decisions * (1 - targets)) dump_json(sorted([(example_ids[n], labels[i]) for n, i in fp]), storage_dir / 'fp.json', indent=4, sort_keys=False) fn = np.argwhere((1 - decisions) * targets) dump_json(sorted([(example_ids[n], labels[i]) for n, i in fn]), storage_dir / 'fn.json', indent=4, sort_keys=False) pprint(overall_results)
def main(_run, exp_dir, storage_dir, database_json, test_set, max_examples, device): if IS_MASTER: commands.print_config(_run) exp_dir = Path(exp_dir) storage_dir = Path(storage_dir) audio_dir = storage_dir / 'audio' audio_dir.mkdir(parents=True) config = load_json(exp_dir / 'config.json') model = Model.from_storage_dir(exp_dir, consider_mpi=True) model.to(device) model.eval() db = JsonDatabase(database_json) test_data = db.get_dataset(test_set) if max_examples is not None: test_data = test_data.shuffle( rng=np.random.RandomState(0))[:max_examples] test_data = prepare_dataset(test_data, audio_reader=config['audio_reader'], stft=config['stft'], max_length=None, batch_size=1, shuffle=True) squared_err = list() with torch.no_grad(): for example in split_managed(test_data, is_indexable=False, progress_bar=True, allow_single_worker=True): example = model.example_to_device(example, device) target = example['audio_data'].squeeze(1) x = model.feature_extraction(example['stft'], example['seq_len']) x = model.wavenet.infer( x.squeeze(1), chunk_length=80_000, chunk_overlap=16_000, ) assert target.shape == x.shape, (target.shape, x.shape) squared_err.extend([(ex_id, mse.cpu().detach().numpy(), x.shape[1]) for ex_id, mse in zip(example['example_id'], (( x - target)**2).sum(1))]) squared_err_list = COMM.gather(squared_err, root=MASTER) if IS_MASTER: print(f'\nlen(squared_err_list): {len(squared_err_list)}') squared_err = [] for i in range(len(squared_err_list)): squared_err.extend(squared_err_list[i]) _, err, t = list(zip(*squared_err)) print('rmse:', np.sqrt(np.sum(err) / np.sum(t))) rmse = sorted([(ex_id, np.sqrt(err / t)) for ex_id, err, t in squared_err], key=lambda x: x[1]) dump_json(rmse, storage_dir / 'rmse.json', indent=4, sort_keys=False) ex_ids_ordered = [x[0] for x in rmse] test_data = db.get_dataset('test_clean').shuffle( rng=np.random.RandomState(0))[:max_examples].filter(lambda x: x[ 'example_id'] in ex_ids_ordered[:10] + ex_ids_ordered[-10:], lazy=False) test_data = prepare_dataset(test_data, audio_reader=config['audio_reader'], stft=config['stft'], max_length=10., batch_size=1, shuffle=True) with torch.no_grad(): for example in test_data: example = model.example_to_device(example, device) x = model.feature_extraction(example['stft'], example['seq_len']) x = model.wavenet.infer( x.squeeze(1), chunk_length=80_000, chunk_overlap=16_000, ) for i, audio in enumerate(x.cpu().detach().numpy()): wavfile.write( str(audio_dir / f'{example["example_id"][i]}.wav'), model.sample_rate, audio)
def data(self): return load_json(self._json_path)
def main(_run, model_path, load_ckpt, batch_size, device, store_misclassified): if IS_MASTER: commands.print_config(_run) model_path = Path(model_path) eval_dir = get_new_subdir(model_path / 'eval', id_naming='time', consider_mpi=True) # perform evaluation on a sub-set (10%) of the dataset used for training config = load_json(model_path / 'config.json') database_json = config['database_json'] dataset = config['dataset'] model = pt.Model.from_storage_dir(model_path, checkpoint_name=load_ckpt, consider_mpi=True) model.to(device) # Turn on evaluation mode for, e.g., BatchNorm and Dropout modules model.eval() _, _, test_set = get_datasets(model_path, database_json, dataset, batch_size, return_indexable=device == 'cpu') with torch.no_grad(): summary = dict(misclassified_examples=dict(), correct_classified_examples=dict(), hits=list()) for batch in split_managed(test_set, is_indexable=device == 'cpu', progress_bar=True, allow_single_worker=True): output = model(pt.data.example_to_device(batch, device)) prediction = torch.argmax(output, dim=-1).cpu().numpy() confidence = torch.softmax(output, dim=-1).max(dim=-1).values.cpu()\ .numpy() label = np.array(batch['speaker_id']) hits = (label == prediction).astype('bool') summary['hits'].extend(hits.tolist()) summary['misclassified_examples'].update({ k: { 'true_label': v1, 'predicted_label': v2, 'audio_path': v3, 'confidence': f'{v4:.2%}', } for k, v1, v2, v3, v4 in zip( np.array(batch['example_id'])[~hits], label[~hits], prediction[~hits], np.array(batch['audio_path'])[~hits], confidence[~hits]) }) # for each correct predicted label, collect the audio paths correct_classified = summary['correct_classified_examples'] summary['correct_classified_examples'].update({ k: correct_classified[k] + [v] if k in correct_classified.keys() else [v] for k, v in zip(prediction[hits], np.array(batch['audio_path'])[hits]) }) summary_list = COMM.gather(summary, root=MASTER) if IS_MASTER: print(f'\nlen(summary_list): {len(summary_list)}') if len(summary_list) > 1: summary = dict( misclassified_examples=dict(), correct_classified_examples=dict(), hits=list(), ) for partial_summary in summary_list: summary['hits'].extend(partial_summary['hits']) summary['misclassified_examples'].update( partial_summary['misclassified_examples']) for label, audio_path_list in \ partial_summary['correct_classified_examples'].items(): summary['correct_classified_examples'].update({ label: summary['correct_classified_examples'][label] + audio_path_list if label in summary['correct_classified_examples'].keys() else audio_path_list }) hits = summary['hits'] misclassified_examples = summary['misclassified_examples'] correct_classified_examples = summary['correct_classified_examples'] accuracy = np.array(hits).astype('float').mean() if store_misclassified: misclassified_dir = eval_dir / 'misclassified_examples' for example_id, v in misclassified_examples.items(): label, prediction_label, audio_path, _ = v.values() try: predicted_speaker_audio_path = \ correct_classified_examples[prediction_label][0] example_dir = \ misclassified_dir / f'{example_id}_{label}_{prediction_label}' example_dir.mkdir(parents=True) os.symlink(audio_path, example_dir / 'example.wav') os.symlink(predicted_speaker_audio_path, example_dir / 'predicted_speaker_example.wav') except KeyError: warnings.warn( 'There were no correctly predicted inputs from speaker ' f'with speaker label {prediction_label}') outputs = dict( accuracy=f'{accuracy:.2%} ({np.sum(hits)}/{len(hits)})', misclassifications=misclassified_examples, ) print(f'Speaker classification accuracy on test set: {accuracy:.2%}') print(f'Wrote results to {eval_dir / "results.json"}') dump_json(outputs, eval_dir / 'results.json')