def audio_tagging(args): """Inference audio tagging result of an audio clip. """ # Arugments & parameters sample_rate = args.sample_rate window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type checkpoint_path = args.checkpoint_path audio_path = args.audio_path device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu') classes_num = config.classes_num labels = config.labels # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) checkpoint = torch.load(checkpoint_path, map_location=device) model.load_state_dict(checkpoint['model']) # Parallel if 'cuda' in str(device): model.to(device) print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) else: print('Using CPU.') # Load audio (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) waveform = waveform[None, :] # (1, audio_length) waveform = move_data_to_device(waveform, device) # Forward with torch.no_grad(): model.eval() batch_output_dict = model(waveform, None) clipwise_output = batch_output_dict['clipwise_output'].data.cpu().numpy()[0] """(classes_num,)""" sorted_indexes = np.argsort(clipwise_output)[::-1] # Print audio tagging top probabilities for k in range(10): print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]], clipwise_output[sorted_indexes[k]])) # Print embedding if 'embedding' in batch_output_dict.keys(): embedding = batch_output_dict['embedding'].data.cpu().numpy()[0] print('embedding: {}'.format(embedding.shape)) return clipwise_output, labels
def sound_event_detection(args): """Inference sound event detection result of an audio clip. """ # Arugments & parameters sample_rate = args.sample_rate window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type checkpoint_path = args.checkpoint_path audio_path = args.audio_path device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu') classes_num = config.classes_num labels = config.labels frames_per_second = sample_rate // hop_size # Paths fig_path = os.path.join('results', '{}.png'.format(get_filename(audio_path))) create_folder(os.path.dirname(fig_path)) # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) checkpoint = torch.load(checkpoint_path, map_location=device) model.load_state_dict(checkpoint['model']) # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) # Load audio (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) waveform = waveform[None, :] # (1, audio_length) waveform = move_data_to_device(waveform, device) # Forward with torch.no_grad(): model.eval() batch_output_dict = model(waveform, None) framewise_output = batch_output_dict['framewise_output'].data.cpu().numpy()[0] """(time_steps, classes_num)""" print('Sound event detection result (time_steps x classes_num): {}'.format( framewise_output.shape)) sorted_indexes = np.argsort(np.max(framewise_output, axis=0))[::-1] top_k = 10 # Show top results top_result_mat = framewise_output[:, sorted_indexes[0 : top_k]] """(time_steps, top_k)""" # Plot result stft = librosa.core.stft(y=waveform[0].data.cpu().numpy(), n_fft=window_size, hop_length=hop_size, window='hann', center=True) frames_num = stft.shape[-1] fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 4)) axs[0].matshow(np.log(np.abs(stft)), origin='lower', aspect='auto', cmap='jet') axs[0].set_ylabel('Frequency bins') axs[0].set_title('Log spectrogram') axs[1].matshow(top_result_mat.T, origin='upper', aspect='auto', cmap='jet', vmin=0, vmax=1) axs[1].xaxis.set_ticks(np.arange(0, frames_num, frames_per_second)) axs[1].xaxis.set_ticklabels(np.arange(0, frames_num / frames_per_second)) axs[1].yaxis.set_ticks(np.arange(0, top_k)) axs[1].yaxis.set_ticklabels(np.array(labels)[sorted_indexes[0 : top_k]]) axs[1].yaxis.grid(color='k', linestyle='solid', linewidth=0.3, alpha=0.3) axs[1].set_xlabel('Seconds') axs[1].xaxis.set_ticks_position('bottom') plt.tight_layout() plt.savefig(fig_path) print('Save sound event detection visualization to {}'.format(fig_path)) return framewise_output, labels
def train(args): """Train AudioSet tagging model. Args: dataset_dir: str workspace: str data_type: 'balanced_train' | 'full_train' window_size: int hop_size: int mel_bins: int model_type: str loss_type: 'clip_bce' balanced: 'none' | 'balanced' | 'alternate' augmentation: 'none' | 'mixup' batch_size: int learning_rate: float resume_iteration: int early_stop: int accumulation_steps: int cuda: bool """ # Arugments & parameters workspace = args.workspace data_type = args.data_type sample_rate = args.sample_rate window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type loss_type = args.loss_type balanced = args.balanced augmentation = args.augmentation batch_size = args.batch_size learning_rate = args.learning_rate resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 128 prefetch_factor = 4 #os.environ["MASTER_ADDR"] = "localhost" #os.environ["MASTER_PORT"] = "12355" #dist.init_process_group("nccl", rank=rank, world_size=args.world_size) clip_samples = config.clip_samples classes_num = config.classes_num loss_func = get_loss_func(loss_type) # Paths black_list_csv = None train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), datetime.datetime.now().strftime("%d%m%Y_%H%M%S")) #if rank == 0: create_folder(checkpoints_dir) statistics_path = os.path.join( workspace, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') #if rank == 0: create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU. Set --cuda flag to use GPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(sample_rate=sample_rate) # Train sampler if balanced == 'none': Sampler = TrainSampler elif balanced == 'balanced': Sampler = BalancedTrainSampler elif balanced == 'alternate': Sampler = AlternateTrainSampler train_sampler = Sampler(indexes_hdf5_path=train_indexes_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size, black_list_csv=black_list_csv) # Evaluate sampler eval_bal_sampler = EvaluateSampler( indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=2 * batch_size) eval_test_sampler = EvaluateSampler( indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=2 * batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) #model = model.cuda(rank) #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) #print([(s[0], s[1].is_cuda) for s in model.named_parameters()]) time1 = time.time() prev_bal_map = 0.0 prev_test_map = 0.0 save_bal_model = 0 save_test_model = 0 for batch_data_dict in train_loader: """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ #print(batch_data_dict) # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == -1): train_fin_time = time.time() bal_statistics = evaluator.evaluate(eval_bal_loader) test_statistics = evaluator.evaluate(eval_test_loader) logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) save_bal_model = 1 if np.mean( bal_statistics['average_precision']) > prev_bal_map else 0 save_test_model = 1 if np.mean( test_statistics['average_precision']) > prev_test_map else 0 statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 100000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if save_bal_model: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations_bal.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) save_bal_model = 0 if save_test_model: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations_test.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) save_test_model = 0 # Mixup lambda if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( batch_size=len(batch_data_dict['waveform'])) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # Loss loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() print(loss) optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() # Stop learning if iteration == early_stop: break iteration += 1
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size few_shots = args.few_shots random_seed = args.random_seed resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' mini_data = args.mini_data filename = args.filename loss_func = get_loss_func(loss_type) pretrain = True if pretrained_checkpoint_path else False num_workers = 16 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join(workspace, 'features', '{}training.waveform.h5'.format(prefix)) test_hdf5_path = os.path.join(workspace, 'features', 'testing.waveform.h5'.format(prefix)) evaluate_hdf5_path = os.path.join(workspace, 'features', 'evaluation.waveform.h5'.format(prefix)) test_reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') evaluate_reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base)) create_folder(checkpoints_dir) tmp_submission_path = os.path.join(workspace, '_tmp_submission', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), '_submission.csv') create_folder(os.path.dirname(tmp_submission_path)) statistics_path = os.path.join(workspace, 'statistics', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) predictions_dir = os.path.join(workspace, 'predictions', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size)) create_folder(predictions_dir) logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num) # Statistics statistics_container = StatisticsContainer(statistics_path) if pretrain: logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) if resume_iteration: resume_checkpoint_path = os.path.join(checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info('Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_dataset = DCASE2017Task4Dataset(hdf5_path=train_hdf5_path) test_dataset = DCASE2017Task4Dataset(hdf5_path=test_hdf5_path) evaluate_dataset = DCASE2017Task4Dataset(hdf5_path=evaluate_hdf5_path) train_sampler = TrainSampler( hdf5_path=train_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size, few_shots=few_shots, random_seed=random_seed) test_sampler = EvaluateSampler(dataset_size=len(test_dataset), batch_size=batch_size) evaluate_sampler = EvaluateSampler(dataset_size=len(evaluate_dataset), batch_size=batch_size) collector = Collator() train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_sampler=train_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_sampler=test_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) evaluate_loader = torch.utils.data.DataLoader(dataset=evaluate_dataset, batch_sampler=evaluate_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator test_evaluator = Evaluator( model=model, generator=test_loader) evaluate_evaluator = Evaluator( model=model, generator=evaluate_loader) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # Evaluate if iteration % 1000 == 0: if resume_iteration > 0 and iteration == resume_iteration: pass else: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for (data_type, evaluator, reference_csv_path) in [ ('test', test_evaluator, test_reference_csv_path), ('evaluate', evaluate_evaluator, evaluate_reference_csv_path)]: logging.info('{} statistics:'.format(data_type)) (statistics, predictions) = evaluator.evaluate( reference_csv_path, tmp_submission_path) statistics_container.append(data_type, iteration, statistics) prediction_path = os.path.join(predictions_dir, '{}_iterations.prediction.{}.h5'.format(iteration, data_type)) write_out_prediction(predictions, prediction_path) statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0 and iteration > 49999: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) batch_target_dict = {'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda'])} else: batch_output_dict = model(batch_data_dict['waveform'], None) batch_target_dict = {'target': batch_data_dict['target']} # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1
def train(args): """Train a piano transcription system. Args: workspace: str, directory of your workspace model_type: str, e.g. 'Regressonset_regressoffset_frame_velocity_CRNN' loss_type: str, e.g. 'regress_onset_offset_frame_velocity_bce' augmentation: str, e.g. 'none' batch_size: int learning_rate: float reduce_iteration: int resume_iteration: int early_stop: int device: 'cuda' | 'cpu' mini_data: bool """ # Arugments & parameters workspace = args.workspace model_type = args.model_type loss_type = args.loss_type augmentation = args.augmentation max_note_shift = args.max_note_shift batch_size = args.batch_size learning_rate = args.learning_rate reduce_iteration = args.reduce_iteration resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') mini_data = args.mini_data filename = args.filename sample_rate = config.sample_rate segment_seconds = config.segment_seconds hop_seconds = config.hop_seconds segment_samples = int(segment_seconds * sample_rate) frames_per_second = config.frames_per_second classes_num = config.classes_num num_workers = 8 # Loss function loss_func = get_loss_func(loss_type) # Paths hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join(workspace, 'statistics', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size)) create_folder(logs_dir) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(frames_per_second=frames_per_second, classes_num=classes_num) if augmentation == 'none': augmentor = None elif augmentation == 'aug': augmentor = Augmentor() else: raise Exception('Incorrect argumentation!') # Dataset train_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir, segment_seconds=segment_seconds, frames_per_second=frames_per_second, max_note_shift=max_note_shift, augmentor=augmentor) evaluate_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir, segment_seconds=segment_seconds, frames_per_second=frames_per_second, max_note_shift=0) # Sampler for training train_sampler = Sampler(hdf5s_dir=hdf5s_dir, split='train', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) # Sampler for evaluation evaluate_train_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='train', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) evaluate_validate_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='validation', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) evaluate_test_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='test', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) # Dataloader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) evaluate_train_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) validate_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_validate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) # Evaluator evaluator = SegmentEvaluator(model, batch_size) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) train_bgn_time = time.time() for batch_data_dict in train_loader: # Evaluation if iteration % 5000 == 0: # and iteration > 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() evaluate_train_statistics = evaluator.evaluate( evaluate_train_loader) validate_statistics = evaluator.evaluate(validate_loader) test_statistics = evaluator.evaluate(test_loader) logging.info( ' Train statistics: {}'.format(evaluate_train_statistics)) logging.info( ' Validation statistics: {}'.format(validate_statistics)) logging.info(' Test statistics: {}'.format(test_statistics)) statistics_container.append(iteration, evaluate_train_statistics, data_type='train') statistics_container.append(iteration, validate_statistics, data_type='validation') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if iteration % reduce_iteration == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) model.train() batch_output_dict = model(batch_data_dict['waveform']) loss = loss_func(model, batch_output_dict, batch_data_dict) print(iteration, loss) # Backward loss.backward() optimizer.step() optimizer.zero_grad() # Stop learning if iteration == early_stop: break iteration += 1
def train(args): """Train AudioSet tagging model. Args: dataset_dir: str workspace: str data_type: 'balanced_train' | 'unbalanced_train' frames_per_second: int mel_bins: int model_type: str loss_type: 'bce' balanced: bool augmentation: str batch_size: int learning_rate: float resume_iteration: int early_stop: int accumulation_steps: int cuda: bool """ # Arugments & parameters workspace = args.workspace data_type = args.data_type window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type loss_type = args.loss_type balanced = args.balanced augmentation = args.augmentation batch_size = args.batch_size learning_rate = args.learning_rate resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 8 sample_rate = config.sample_rate clip_samples = config.clip_samples classes_num = config.classes_num loss_func = get_loss_func(loss_type) # Paths black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv') train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join( workspace, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(clip_samples=clip_samples, classes_num=classes_num) # Train sampler (train_sampler, train_collector) = get_train_sampler(balanced, augmentation, train_indexes_hdf5_path, black_list_csv, batch_size) # Evaluate sampler eval_bal_sampler = EvaluateSampler( indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=batch_size) eval_test_sampler = EvaluateSampler( indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=batch_size) eval_collector = Collator(mixup_alpha=None) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=train_collector, num_workers=num_workers, pin_memory=True) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) # Evaluator bal_evaluator = Evaluator(model=model, generator=eval_bal_loader) test_evaluator = Evaluator(model=model, generator=eval_test_loader) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) time1 = time.time() for batch_data_dict in train_loader: """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == 0): train_fin_time = time.time() bal_statistics = bal_evaluator.evaluate() test_statistics = test_evaluator.evaluate() logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # Loss loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() print(loss) optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() iteration += 1 # Stop learning if iteration == early_stop: break
def audio_tagging_folder(args): """Inference audio tagging result of an audio clip. """ # Arugments & parameters folder = args.folder sample_rate = args.sample_rate window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type #audio_path = args.audio_path device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu') classes_num = config.classes_num labels = config.labels aud_paths = glob.glob(os.path.join(folder,'*WAV'))+glob.glob(os.path.join(folder,'*wav'))+glob.glob(os.path.join(folder,'*mp3')) print(aud_paths) # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) checkpoint_path = args.checkpoint_path checkpoint = torch.load(checkpoint_path, map_location=device) model.load_state_dict(checkpoint['model']) # Parallel if 'cuda' in str(device): model.to(device) print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) else: print('Using CPU.') results ={} for audio_path in aud_paths: print('Infering path:',audio_path) # Load audio try: (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) except: print('Problem with path:',audio_path) results[audio_path]='Failed' continue waveform = waveform[None, :] # (1, audio_length) waveform = move_data_to_device(waveform, device) # Forward with torch.no_grad(): model.eval() batch_output_dict = model(waveform, None) clipwise_output = batch_output_dict['clipwise_output'].data.cpu().numpy()[0] """(classes_num,)""" sorted_indexes = np.argsort(clipwise_output)[::-1] # Print audio tagging top probabilities result = {} for k in range(10): result[str(np.array(labels)[sorted_indexes[k]])]= float(clipwise_output[sorted_indexes[k]]) print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]], clipwise_output[sorted_indexes[k]])) results[audio_path]=result # Print embedding if 'embedding' in batch_output_dict.keys(): embedding = batch_output_dict['embedding'].data.cpu().numpy()[0] print('embedding: {}'.format(embedding.shape)) out_name = os.path.join(folder,'results.json') with open(out_name, 'w') as fp: json.dump(results, fp) return clipwise_output, labels
def inference(args): # Arugments & parameters window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type checkpoint_path = args.checkpoint_path device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename sample_rate = config.sample_rate classes_num = config.classes_num # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) checkpoint = torch.load(checkpoint_path, map_location=device) model.load_state_dict(checkpoint['model']) # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) if True: waveform = np.zeros(sample_rate * 10) else: audio_path = "/vol/vssp/msos/qk/test9/YwfSPbhnpOlQ.wav" (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) waveform = waveform[None, :] waveform = move_data_to_device(waveform, device) # Forward model.eval() batch_output_dict = model(waveform, None) clipwise_output = batch_output_dict['clipwise_output'].data.cpu().numpy( )[0] sorted_indexes = np.argsort(clipwise_output)[::-1] embedding = batch_output_dict['embedding'].data.cpu().numpy()[0] print(embedding.shape) for k in range(10): print('{}, {}'.format( np.array(config.labels)[sorted_indexes[k]], clipwise_output[sorted_indexes[k]]))
def train(args): """Train and evaluate. Args: dataset_dir: str workspace: str holdout_fold: '1' model_type: str, e.g., 'Cnn_9layers_Gru_FrameAtt' loss_type: str, e.g., 'clip_bce' augmentation: str, e.g., 'mixup' learning_rate, float batch_size: int resume_iteration: int stop_iteration: int device: 'cuda' | 'cpu' mini_data: bool """ # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' mini_data = args.mini_data filename = args.filename loss_func = get_loss_func(loss_type) num_workers = 8 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join(workspace, 'hdf5s', '{}training.h5'.format(prefix)) test_hdf5_path = os.path.join(workspace, 'hdf5s', '{}testing.h5'.format(prefix)) evaluate_hdf5_path = os.path.join(workspace, 'hdf5s', 'evaluation.h5'.format(prefix)) test_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') evaluate_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') checkpoints_dir = os.path.join(workspace, 'checkpoints', '{}{}'.format( prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) tmp_submission_path = os.path.join( workspace, '_tmp_submission', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '_submission.csv') create_folder(os.path.dirname(tmp_submission_path)) statistics_path = os.path.join(workspace, 'statistics', '{}{}'.format( prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model assert model_type, 'Please specify model_type!' Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num) if resume_iteration: resume_checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info( 'Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Dataset dataset = DCASE2017Task4Dataset() # Sampler train_sampler = TrainSampler(hdf5_path=train_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size) test_sampler = TestSampler(hdf5_path=test_hdf5_path, batch_size=batch_size) evaluate_sampler = TestSampler(hdf5_path=evaluate_hdf5_path, batch_size=batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) evaluate_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=evaluate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) # Statistics statistics_container = StatisticsContainer(statistics_path) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # Evaluate if (iteration % 1000 == 0 and iteration > resume_iteration): # or (iteration == 0): logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for (data_type, data_loader, reference_csv_path) in [ ('test', test_loader, test_reference_csv_path), ('evaluate', evaluate_loader, evaluate_reference_csv_path) ]: # Calculate tatistics (statistics, _) = evaluator.evaluate(data_loader, reference_csv_path, tmp_submission_path) logging.info('{} statistics:'.format(data_type)) logging.info(' Clipwise mAP: {:.3f}'.format( np.mean(statistics['clipwise_ap']))) logging.info(' Framewise mAP: {:.3f}'.format( np.mean(statistics['framewise_ap']))) logging.info(' {}'.format( statistics['sed_metrics']['overall']['error_rate'])) statistics_container.append(data_type, iteration, statistics) statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( batch_size=len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } else: batch_output_dict = model(batch_data_dict['waveform'], None) batch_target_dict = {'target': batch_data_dict['target']} # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1
def train(args): # Arugments & parameters window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base freeze_base = True device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' sample_rate = config.sample_rate classes_num = config.classes_num pretrain = True if pretrained_checkpoint_path else False # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num, freeze_base) # Load pretrained model if pretrain: logging.info( 'Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) print('Load pretrained model successfully!') ###############Copying main.py#################### workspace_input = args.workspace_input workspace_output = args.workspace_output data_type = 'balanced_train' loss_type = 'clip_bce' balanced = 'balanced' augmentation = 'none' batch_size = 1 learning_rate = 1e-3 resume_iteration = 0 early_stop = 100000 device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 8 clip_samples = config.clip_samples loss_func = get_loss_func(loss_type) black_list_csv = 'metadata/black_list/groundtruth_weak_label_evaluation_set.csv' previous_loss = None train_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace_output, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join( workspace_output, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace_output, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num, freeze_base=freeze_base) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(clip_samples=clip_samples, classes_num=classes_num) # Train sampler (train_sampler, train_collector) = get_train_sampler( balanced, augmentation, workspace_input + 'hdf5s/indexes/balanced_train.h5', black_list_csv, batch_size) # Evaluate sampler eval_bal_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input + 'hdf5s/indexes/balanced_train.h5', batch_size=batch_size) eval_test_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input + 'hdf5s/indexes/eval.h5', batch_size=batch_size) eval_collector = Collator(mixup_alpha=None) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=train_collector, num_workers=num_workers, pin_memory=True) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) # Evaluator bal_evaluator = Evaluator(model=model, generator=eval_bal_loader) test_evaluator = Evaluator(model=model, generator=eval_test_loader) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace_input, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) if torch.cuda.is_available(): checkpoint = torch.load(resume_checkpoint_path) else: checkpoint = torch.load(resume_checkpoint_path, map_location='cpu') model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) time1 = time.time() for iterate_n, batch_data_dict in enumerate(train_loader): """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == 0): train_fin_time = time.time() bal_statistics = bal_evaluator.evaluate() test_statistics = test_evaluator.evaluate() logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" loss = loss_func(batch_output_dict, batch_target_dict) # Loss # try: # loss = loss_func(batch_output_dict, batch_target_dict) # except: # tensor = batch_output_dict['clipwise_output'].detach().numpy() # arr = -1. * np.where(tensor > 0,0.,tensor) # batch_output_dict['clipwise_output'] = torch.tensor(np.where(arr > 1,1.,arr),requires_grad=True) # loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() iteration += 1 # Stop learning if iteration == early_stop: break
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' filename = args.filename num_workers = 8 loss_func = get_loss_func(loss_type) pretrain = True if pretrained_checkpoint_path else False hdf5_path = os.path.join(workspace, 'features', 'waveform.h5') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base)) create_folder(checkpoints_dir) statistics_path = os.path.join(workspace, 'statistics', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num, freeze_base) # Statistics statistics_container = StatisticsContainer(statistics_path) if pretrain: logging.info( 'Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) if resume_iteration: resume_checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info( 'Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) dataset = GtzanDataset() # Data generator train_sampler = TrainSampler(hdf5_path=hdf5_path, holdout_fold=holdout_fold, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size) validate_sampler = EvaluateSampler(hdf5_path=hdf5_path, holdout_fold=holdout_fold, batch_size=batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) validate_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=validate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # import crash # asdf # Evaluate if iteration % 200 == 0 and iteration > 0: if resume_iteration > 0 and iteration == resume_iteration: pass else: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() statistics = evaluator.evaluate(validate_loader) logging.info('Validate accuracy: {:.3f}'.format( statistics['accuracy'])) statistics_container.append(iteration, statistics, 'validate') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 2000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1