def inference_test(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type model_type = args.model_type iteration = args.iteration batch_size = args.batch_size resume = args.resume cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = args.holdout_fold # Use model trained on full data without validation mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' test_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'test.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if not resume: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) else: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator( test_feature_hdf5_path=test_feature_hdf5_path, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_test() # Results of segments output_dict = forward_infer(model=model, generate_func=generate_func, cuda=cuda) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction(output_dict, average='arithmetic') # Write submission write_submission(result_dict, submission_path)
def evaluate(self, data_type, target_source, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' target_source: 'curated' | 'noisy' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' assert (data_type in ['train', 'validate']) assert (target_source in ['curated', 'noisy']) generate_func = self.data_generator.generate_validate( data_type=data_type, target_source=target_source, max_iteration=max_iteration) # Results of segments output_dict = forward_infer(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction( output_dict, average='arithmetic') output = result_dict['output'] target = result_dict['target'] # Mean average precision average_precision = metrics.average_precision_score(target, output, average=None) mAP = np.mean(average_precision) # Label-weighted label-ranking average precision (per_class_lwlrap, weight_per_class) = calculate_per_class_lwlrap(target, output) mean_lwlrap = np.sum(per_class_lwlrap * weight_per_class) logging.info(' Target source: {}, mAP: {:.3f}, mean_lwlrap: {:.3f}' ''.format(target_source, mAP, mean_lwlrap)) statistics = { 'average_precision': average_precision, 'per_class_lwlrap': per_class_lwlrap, 'weight_per_class': weight_per_class } if verbose: for n in range(self.classes_num): logging.info(' {:<20}{:.3f}'.format(self.labels[n], per_class_lwlrap[n])) logging.info('') return statistics