def calculate_loss(self, list_dict): event_loss_list = [] for dict in list_dict: (output_dict, target_dict) = self._get_output_target_dict(dict) event_loss = event_spatial_loss(output_dict=output_dict, target_dict=target_dict, return_individual_loss=True) event_loss_list.append(event_loss) return np.mean(event_loss_list)
def calculate_loss(self, list_dict): total_loss_list = [] event_loss_list = [] position_loss_list = [] for dict in list_dict: (output_dict, target_dict) = self._get_output_target_dict(dict) (total_loss, event_loss, position_loss) = event_spatial_loss(output_dict=output_dict, target_dict=target_dict, return_individual_loss=True) total_loss_list.append(total_loss) event_loss_list.append(event_loss) position_loss_list.append(position_loss) return np.mean(total_loss_list), np.mean(event_loss_list), np.mean( position_loss_list)
def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data without validation for training model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = 10 # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') models_dir = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_folder(models_dir) temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins)) create_folder(temp_submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_name) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, max_validate_num=max_validate_num, cuda=cuda) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 100 == 0: logging.info('------------------------------------') logging.info('iteration: {}'.format(iteration)) train_fin_time = time.time() train_list_dict = evaluator.evaluate(data_type='train') evaluator.metrics(train_list_dict, temp_submissions_dir, metadata_dir) if holdout_fold != -1: validate_list_dict = evaluator.evaluate(data_type='validate') evaluator.metrics(validate_list_dict, temp_submissions_dir, metadata_dir) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model, 'optimizer': optimizer } save_path = os.path.join(models_dir, 'md_{}_iters.pth'.format(iteration)) torch.save(checkpoint, save_path) logging.info('Model saved to {}'.format(save_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 10000: break iteration += 1
def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all data without validation to train model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = None # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type, 'holdout_fold={}'.format(holdout_fold)) create_folder(checkpoints_dir) # All folds result should write to the same directory temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type) create_folder(temp_submissions_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, filemode='w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() ''' # Uncomment for evaluating on training dataset train_statistics = evaluator.evaluate( data_type='train', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) ''' if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 5000: break iteration += 1