def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type n_epoch = args.n_epoch batch_size = args.batch_size valid_source = args.valid_source pretrained = args.pretrained cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if pretrained == 'none': checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') else: checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume', 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) if pretrained != 'none': model.load_state_dict(torch.load(pretrained)['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Calculate total iteration required for n_epoch iter_per_epoch = np.ceil( len(data_generator.train_segments_indexes) / batch_size).astype(int) total_iter = iter_per_epoch * n_epoch # Define Warm-up LR scheduler epoch_to_warm = 10 epoch_to_flat = 200 def _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm, min_lr=0, max_lr=0.0035): delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm lr = min_lr + delta * iteration for p in optimizer.param_groups: p['lr'] = lr return lr # Optimizer criterion = FocalLoss(2) # metric_loss = RingLoss(type='auto', loss_weight=1.0) metric_loss = ArcFaceLoss() if cuda: metric_loss.cuda() optimizer = Nadam(model.parameters(), lr=0.0035, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, schedule_decay=4e-3) scheduler = CosineLRWithRestarts( optimizer, batch_size, len(data_generator.train_segments_indexes), restart_period=epoch_to_flat - epoch_to_warm + 1, t_mult=1, verbose=True) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Valid source if valid_source == 'curated': target_sources = ['curated'] elif valid_source == 'noisy': target_sources = ['noisy'] elif valid_source == 'both': target_sources = ['curated', 'noisy'] # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 epoch = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 2500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data # logging.info('Train statistics:') # for target_source in target_sources: # validate_curated_statistics = evaluator.evaluate( # data_type='train', # target_source=target_source, # max_iteration=max_iteration, # verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in target_sources: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 2500 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_feature, batch_output = model(batch_data_dict['feature'], is_infer=False) # loss loss = criterion(batch_output, batch_data_dict['target']) + metric_loss( batch_feature, batch_data_dict['target']) # Backward optimizer.zero_grad() # LR Warm up if iteration < epoch_to_warm * iter_per_epoch: cur_lr = _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm=epoch_to_warm, min_lr=0, max_lr=0.0035) loss.backward() optimizer.step() if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch: if data_generator.pointer >= len( data_generator.train_segments_indexes): scheduler.step() scheduler.batch_step() # Show LR information if iteration % iter_per_epoch == 0 and iteration != 0: epoch += 1 if epoch % 10 == 0: for p in optimizer.param_groups: logging.info( 'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}' .format(epoch, iteration, p['lr'])) # Stop learning if iteration == total_iter: break iteration += 1 if iteration == epoch_to_warm * iter_per_epoch: scheduler.step() if iteration == epoch_to_flat * iter_per_epoch: for param_group in optimizer.param_groups: param_group['lr'] = 1e-5
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data logging.info('Train statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='train', target_source=target_source, max_iteration=max_iteration, verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss loss = binary_cross_entropy(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 20000: break iteration += 1