def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data without validation for training model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = 10 # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') models_dir = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_folder(models_dir) temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins)) create_folder(temp_submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_name) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, max_validate_num=max_validate_num, cuda=cuda) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 100 == 0: logging.info('------------------------------------') logging.info('iteration: {}'.format(iteration)) train_fin_time = time.time() train_list_dict = evaluator.evaluate(data_type='train') evaluator.metrics(train_list_dict, temp_submissions_dir, metadata_dir) if holdout_fold != -1: validate_list_dict = evaluator.evaluate(data_type='validate') evaluator.metrics(validate_list_dict, temp_submissions_dir, metadata_dir) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model, 'optimizer': optimizer } save_path = os.path.join(models_dir, 'md_{}_iters.pth'.format(iteration)) torch.save(checkpoint, save_path) logging.info('Model saved to {}'.format(save_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 10000: break iteration += 1
def inference_validation(args): '''Inference validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics on all 1, 2, 3 and 4 folds. model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool visualize: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() visualize = args.visualize mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') submissions_dir = os.path.join( workspace, 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'iteration={}'.format(iteration)) create_folder(submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Inference and calculate metrics for a fold if holdout_fold != -1: features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkoutpoint_path = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format( model_name, '', audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters.pth'.format(iteration)) # Load scalar scalar = load_scalar(scalar_path) # Load model checkpoint = torch.load(checkoutpoint_path) model = checkpoint['model'] if cuda: model.cuda() # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Calculate metrics data_type = 'validate' list_dict = evaluator.evaluate(data_type=data_type) evaluator.metrics(list_dict=list_dict, submissions_dir=submissions_dir, metadata_dir=metadata_dir) # Visualize reference and predicted events, elevation and azimuth if visualize: evaluator.visualize(data_type=data_type) # Calculate metrics for all folds else: prediction_names = os.listdir(submissions_dir) prediction_paths = [os.path.join(submissions_dir, name) for \ name in prediction_names] metrics = calculate_metrics(metadata_dir=metadata_dir, prediction_paths=prediction_paths) logging.info('Metrics of {} files: '.format(len(prediction_names))) for key in metrics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', metrics[key]))