def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' iteration: int holdout_fold: '1', which means using validation data batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type iteration = args.iteration holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'submission.csv') create_folder(os.path.dirname(submission_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=True) # Evaluate on validation data evaluator.evaluate(data_type='validate', submission_path=submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) # Visualize if visualize: evaluator.visualize(data_type='validate')
def inference_evaluation(args): '''Inference on evaluation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' iteration: int holdout_fold: 'none', which means using model trained on all development data batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type iteration = args.iteration holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' evaluate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'evaluate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth') submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2_submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path, scalar=scalar, batch_size=batch_size) # Forward output_dict = forward(model=model, generate_func=data_generator.generate(), cuda=cuda, return_target=False) # Write submission write_submission_csv(audio_names=output_dict['audio_name'], outputs=output_dict['output'], taxonomy_level=taxonomy_level, submission_path=submission_path)
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type iteration = args.iteration batch_size = args.batch_size resume = args.resume cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if not resume: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) else: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations.pth'.format(iteration)) figs_dir = os.path.join(workspace, 'figures') create_folder(figs_dir) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Evaluate for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=True) # Visualize if visualize: save_fig_path = os.path.join(figs_dir, '{}_logmel.png'.format(target_source)) validate_curated_statistics = evaluator.visualize( data_type='validate', target_source=target_source, save_fig_path=save_fig_path, max_iteration=None, verbose=False)
def inference_test(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type model_type = args.model_type iteration = args.iteration batch_size = args.batch_size resume = args.resume cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = args.holdout_fold # Use model trained on full data without validation mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' test_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'test.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if not resume: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) else: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator( test_feature_hdf5_path=test_feature_hdf5_path, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_test() # Results of segments output_dict = forward_infer(model=model, generate_func=generate_func, cuda=cuda) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction(output_dict, average='arithmetic') # Write submission write_submission(result_dict, submission_path)
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def inference_validation(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace data_type: 'train_weak' | 'train_synthetic' holdout_fold: '1' model_type: string, e.g. 'Cnn_9layers_AvgPooling' loss_type: 'clipwise_binary_crossentropy' | 'framewise_binary_crossentropy' batch_size: int cuda: bool visualize: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type loss_type = args.loss_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() visualize = args.visualize mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num # Paths if mini_data: prefix = 'minidata_' else: prefix = '' if loss_type == 'clipwise_binary_crossentropy': strong_target_training = False elif loss_type == 'framewise_binary_crossentropy': strong_target_training = True else: raise Exception('Incorrect argument!') train_relative_name = get_relative_path_no_extension(data_type) validate_relative_name = get_relative_path_no_extension('validation') validate_metadata_path = os.path.join( dataset_dir, 'metadata', 'validation', '{}.csv'.format(validate_relative_name)) train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(train_relative_name)) validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(validate_relative_name)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train/weak.h5') checkoutpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type), '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type), 'validation_submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type)) create_logging(logs_dir, filemode='w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num, strong_target_training) checkpoint = torch.load(checkoutpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda, verbose=True) evaluator.evaluate(data_type='validate', metadata_path=validate_metadata_path, submission_path=submission_path) if visualize: evaluator.visualize(data_type='validate')
def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data without validation for training model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = 10 # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') models_dir = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_folder(models_dir) temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins)) create_folder(temp_submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_name) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, max_validate_num=max_validate_num, cuda=cuda) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 100 == 0: logging.info('------------------------------------') logging.info('iteration: {}'.format(iteration)) train_fin_time = time.time() train_list_dict = evaluator.evaluate(data_type='train') evaluator.metrics(train_list_dict, temp_submissions_dir, metadata_dir) if holdout_fold != -1: validate_list_dict = evaluator.evaluate(data_type='validate') evaluator.metrics(validate_list_dict, temp_submissions_dir, metadata_dir) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model, 'optimizer': optimizer } save_path = os.path.join(models_dir, 'md_{}_iters.pth'.format(iteration)) torch.save(checkpoint, save_path) logging.info('Model saved to {}'.format(save_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 10000: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename fixed = args.fixed finetune = args.finetune ite_train = args.ite_train ite_eva = args.ite_eva ite_store = args.ite_store mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join(workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if subtask in ['a', 'b']: if fixed=='True': model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True) else : model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False) loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer if fixed=='True': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) else : optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if finetune=='True': model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth' device = torch.device('cuda') checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint['model']) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer(validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate #1800 if iteration % 200 == 0 and iteration > ite_eva: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate( data_type='train', source=source, max_iteration=None, verbose=False) if holdout_fold != 'none': for source in sources_to_evaluate: validate_statistics = evaluator.evaluate( data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 200 == 0 and iteration > ite_store: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.93 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']: batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train # batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns']) # loss = loss_func(batch_output, batch_data_dict['target']) # Using Mixup model.train() mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2) batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4) if batch_output.shape[1] == 10: # single scale models loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam) else: # multi scale models losses = [] for ite in range(batch_output.shape[1]-1): loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam) losses.append(loss) loss = sum(losses) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning # 12000 for scratch if iteration == ite_train: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data logging.info('Train statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='train', target_source=target_source, max_iteration=max_iteration, verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss loss = binary_cross_entropy(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 20000: break iteration += 1
def inference_validation(args): '''Inference validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics on all 1, 2, 3 and 4 folds. model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool visualize: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() visualize = args.visualize mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') submissions_dir = os.path.join( workspace, 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'iteration={}'.format(iteration)) create_folder(submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Inference and calculate metrics for a fold if holdout_fold != -1: features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkoutpoint_path = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format( model_name, '', audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters.pth'.format(iteration)) # Load scalar scalar = load_scalar(scalar_path) # Load model checkpoint = torch.load(checkoutpoint_path) model = checkpoint['model'] if cuda: model.cuda() # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Calculate metrics data_type = 'validate' list_dict = evaluator.evaluate(data_type=data_type) evaluator.metrics(list_dict=list_dict, submissions_dir=submissions_dir, metadata_dir=metadata_dir) # Visualize reference and predicted events, elevation and azimuth if visualize: evaluator.visualize(data_type=data_type) # Calculate metrics for all folds else: prediction_names = os.listdir(submissions_dir) prediction_paths = [os.path.join(submissions_dir, name) for \ name in prediction_names] metrics = calculate_metrics(metadata_dir=metadata_dir, prediction_paths=prediction_paths) logging.info('Metrics of {} files: '.format(len(prediction_names))) for key in metrics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', metrics[key]))
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename plt_x = [] plt_y = [] T_max = 300 mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) def mixup_data(x1, x2, y, alpha=1.0, use_cuda=True): # 数据增强,看下那个博客 '''Returns mixed inputs, pairs of targets, and lambda''' if alpha > 0: lam = np.random.beta(alpha, alpha) # 随机生成一个(1,1)的张量 else: lam = 1 # batch_size = x1.size()[0] if use_cuda: index = torch.randperm( batch_size).cuda() # 给定参数n,返回一个从0到n-1的随机整数序列 else: index = torch.randperm(batch_size) # 使用cpu还是gpu mixed_x1 = lam * x1 + (1 - lam) * x1[index, :] mixed_x2 = lam * x2 + (1 - lam) * x2[index, :] # 混合数据 y_a, y_b = y, y[index] return mixed_x1, mixed_x2, y_a, y_b, lam def mixup_criterion(criterion, pred, y_a, y_b, lam): return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) loss_path = os.path.join( workspace, 'loss', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(loss_path) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) logging.info( " Space_Duo_Cnn_9_Avg 多一层 258*258 不共用FC,必须带时空标签 用loss 监测,使用去零one hot " ) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) logging.info('model parm:{} '.format( sum(param.numel() for param in model.parameters()))) #计算模型参数量 # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 best_inde = {} best_inde['micro_auprc'] = np.array([0.0]) best_inde['micro_f1'] = np.array([0.0]) best_inde['macro_auprc'] = np.array([0.0]) best_inde['average_precision'] = np.array([0.0]) best_inde['sum'] = best_inde['micro_auprc'] + best_inde[ 'micro_f1'] + best_inde['macro_auprc'] last_loss1 = [] last_loss2 = [] last_loss = [] best_map = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) if iteration > 5000: if best_map < np.mean(train_statistics['average_precision']): best_map = np.mean(train_statistics['average_precision']) logging.info('best_map= {}'.format(best_map)) # logging.info('iter= {}'.format(iteration)) checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'indicators': train_statistics } checkpoint_path = os.path.join(checkpoints_dir, 'best7.pth') torch.save(checkpoint, checkpoint_path) logging.info( 'best_models saved to {}'.format(checkpoint_path)) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 batch_data2_dict = batch_data_dict.copy() n = [] for i, l in enumerate(batch_data2_dict['coarse_target']): k = 0 for j in range(0, 8): if l[j] > 0.6: l[j] = 1 else: l[j] = 0 k += 1 if k == 8: if taxonomy_level == 'coarse': n.append(i) for i, l in enumerate(batch_data2_dict['fine_target']): k = 0 for j in range(0, 29): if l[j] > 0.6: l[j] = 1 else: l[j] = 0 k += 1 if k == 29: if taxonomy_level == 'fine': n.append(i) batch_data2_dict['fine_target'] = np.delete( batch_data2_dict['fine_target'], n, axis=0) batch_data2_dict['coarse_target'] = np.delete( batch_data2_dict['coarse_target'], n, axis=0) batch_data2_dict['audio_name'] = np.delete( batch_data2_dict['audio_name'], n, axis=0) batch_data2_dict['feature'] = np.delete(batch_data2_dict['feature'], n, axis=0) batch_data2_dict['spacetime'] = np.delete( batch_data2_dict['spacetime'], n, axis=0) if batch_data2_dict['audio_name'].size == 0: iteration += 1 continue #使用 概率数据请注释下行,使用去零onehot数据不用注释 batch_data_dict = batch_data2_dict # if iteration <8655: # batch_data_dict = batch_data2_dict # elif iteration >=8655 and iteration % 2 == 0: # batch_data_dict = batch_data2_dict # Move data to GPU ,'external_target','external_feature' for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() # 使用mix_up 数据增强 feature1, spacetime1, targets1_a, targets1_b, lam1 = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['fine_target'], alpha=1.0, use_cuda=True) feature2, spacetime2, targets2_a, targets2_b, lam2 = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['coarse_target'], alpha=1.0, use_cuda=True) batch_output1 = model.forward1(feature1, spacetime1) batch_output2 = model.forward2(feature2, spacetime2) lam1 = int(lam1) lam2 = int(lam2) loss1 = (lam1 * binary_cross_entropy(batch_output1, targets1_a) + (1 - lam1) * binary_cross_entropy(batch_output1, targets1_b)) loss2 = (lam2 * binary_cross_entropy(batch_output2, targets2_a) + (1 - lam2) * binary_cross_entropy(batch_output2, targets2_b)) #不使用mix_up 数据增强,请使用以下代码 # batch_target1 = batch_data_dict['fine_target'] # batch_output1 = model.forward1(batch_data_dict['feature'], batch_data_dict['spacetime']) # batch_target2 = batch_data_dict['coarse_target'] # batch_output2 = model.forward2(batch_data_dict['feature'], batch_data_dict['spacetime']) # loss1 = binary_cross_entropy(batch_output1, batch_target1) # loss2 = binary_cross_entropy(batch_output2, batch_target2) loss = loss1 + loss2 #使用loss监测请使用以下代码否者注释 if iteration > 4320: new_loss = loss.item() if len(last_loss) < 5: last_loss.append(new_loss) else: cha = 0 for i in range(4): cha += abs(last_loss[i + 1] - last_loss[i]) if new_loss > last_loss[4] and cha >= (new_loss - last_loss[4]) > cha / 2: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss logging.info(' drop iteration:{}'.format(iteration)) iteration += 1 continue elif new_loss > last_loss[4] and (new_loss - last_loss[4]) > cha / 2.75: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss logging.info(' low weightiteration:{}'.format(iteration)) loss = loss / 2 else: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss # # Backward optimizer.zero_grad() loss.backward() optimizer.step() if iteration % 50 == 0: plt_x.append(iteration) plt_y.append(loss) if iteration % 13000 == 0 and iteration != 0: plt.figure(1) plt.suptitle('test result ', fontsize='18') plt.plot(plt_x, plt_y, 'r-', label='loss') plt.legend(loc='best') plt.savefig( loss_path + '/' + time.strftime('%m%d_%H%M%S', time.localtime(time.time())) + 'loss.jpg') plt.savefig(loss_path + '/loss.jpg') # Stop learning if iteration == 13000: # logging.info("best_micro_auprc:{:.3f}".format(best_inde['micro_auprc'])) # logging.info("best_micro_f1:{:.3f}".format(best_inde['micro_f1'])) # logging.info("best_macro_auprc:{:.3f}".format(best_inde['macro_auprc'])) # labels = get_labels(taxonomy_level) # for k, label in enumerate(labels): # logging.info(' {:<40}{:.3f}'.format(label, best_inde['average_precision'][k])) break iteration += 1
def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all data without validation to train model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = None # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type, 'holdout_fold={}'.format(holdout_fold)) create_folder(checkpoints_dir) # All folds result should write to the same directory temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type) create_folder(temp_submissions_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, filemode='w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() ''' # Uncomment for evaluating on training dataset train_statistics = evaluator.evaluate( data_type='train', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) ''' if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 5000: break iteration += 1
def inference_validation(dataset_dir, workspace): '''Inference validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: '1' | '2' | '3' | '4' | 'none', where 'none' represents summary and print results of all folds 1, 2, 3 and 4. model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool visualize: bool mini_data: bool, set True for debugging on a small part of data ''' # # Arugments & parameters # dataset_dir = args.dataset_dir # workspace = args.workspace # audio_type = args.audio_type # holdout_fold = args.holdout_fold # model_type = args.model_type # iteration = args.iteration # batch_size = args.batch_size # cuda = args.cuda and torch.cuda.is_available() # visualize = args.visualize # mini_data = args.mini_data # filename = args.filename # Test 1 audio_type = 'foa' holdout_fold = '1' model_type = 'Cnn_9layers_AvgPooling' iteration = 1000 batch_size = 32 cuda = True visualize = True mini_data = True filename = 'train' mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') submissions_dir = os.path.join(workspace, filename, 'submissions', model_type, 'iteration={}'.format(iteration)) create_folder(submissions_dir) logs_dir = os.path.join(workspace, filename, 'logs', model_type) create_logging(logs_dir, filemode='w') # Inference and calculate metrics for a fold if holdout_fold != 'none': features_dir = os.path.join(workspace, 'features') scalar_path = os.path.join(workspace, 'scalars', 'scalar.h5') checkoutpoint_path = os.path.join(workspace, filename, 'checkpoints', model_type, '{}_iterations.pth'.format(iteration)) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkoutpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, cuda=cuda) # Calculate metrics data_type = 'validate' evaluator.evaluate( data_type=data_type, metadata_dir=metadata_dir, submissions_dir=submissions_dir, max_validate_num=None) # Visualize reference and predicted events, elevation and azimuth if visualize: evaluator.visualize(data_type=data_type) # Calculate metrics for all 4 folds else: prediction_names = os.listdir(submissions_dir) prediction_paths = [os.path.join(submissions_dir, name) for \ name in prediction_names] metrics = calculate_metrics(metadata_dir=metadata_dir, prediction_paths=prediction_paths) logging.info('Metrics of {} files: '.format(len(prediction_names))) for key in metrics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', metrics[key]))
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename plt_x = [] plt_y = [] mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 best_inde = {} best_inde['micro_auprc'] = np.array([0.0]) best_inde['micro_f1'] = np.array([0.0]) best_inde['macro_auprc'] = np.array([0.0]) best_inde['average_precision'] = np.array([0.0]) best_inde['sum'] = best_inde['micro_auprc'] + best_inde[ 'micro_f1'] + best_inde['macro_auprc'] best_map = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) if iteration > 5000: if best_map < np.mean(train_statistics['average_precision']): best_map = np.mean(train_statistics['average_precision']) logging.info('best_map= {}'.format(best_map)) # logging.info('iter= {}'.format(iteration)) checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'indicators': train_statistics } checkpoint_path = os.path.join(checkpoints_dir, 'best2.pth') torch.save(checkpoint, checkpoint_path) logging.info( 'best_models saved to {}'.format(checkpoint_path)) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) feature, spacetime, targets_a, targets_b, lam = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['{}_target'.format(taxonomy_level)], alpha=1.0, use_cuda=True) # Train model.train() criterion = nn.BCELoss().cuda() batch_output = model(feature, spacetime) # loss #batch_target = batch_data_dict['{}_target'.format(taxonomy_level)] loss = mixup_criterion(criterion, batch_output, targets_a, targets_b, lam) #loss = binary_cross_entropy(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() if iteration % 100 == 0: plt_x.append(iteration) plt_y.append(loss.item()) if iteration % 10000 == 0 and iteration != 0: plt.figure(1) plt.suptitle('test result ', fontsize='18') plt.plot(plt_x, plt_y, 'r-', label='loss') plt.legend(loc='best') plt.savefig( '/home/fangjunyan/count/' + time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) + '{}'.format(holdout_fold) + '{}.jpg'.format(taxonomy_level)) # Stop learning if iteration == 10000: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type n_epoch = args.n_epoch batch_size = args.batch_size valid_source = args.valid_source pretrained = args.pretrained cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if pretrained == 'none': checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') else: checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume', 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) if pretrained != 'none': model.load_state_dict(torch.load(pretrained)['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Calculate total iteration required for n_epoch iter_per_epoch = np.ceil( len(data_generator.train_segments_indexes) / batch_size).astype(int) total_iter = iter_per_epoch * n_epoch # Define Warm-up LR scheduler epoch_to_warm = 10 epoch_to_flat = 200 def _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm, min_lr=0, max_lr=0.0035): delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm lr = min_lr + delta * iteration for p in optimizer.param_groups: p['lr'] = lr return lr # Optimizer criterion = FocalLoss(2) # metric_loss = RingLoss(type='auto', loss_weight=1.0) metric_loss = ArcFaceLoss() if cuda: metric_loss.cuda() optimizer = Nadam(model.parameters(), lr=0.0035, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, schedule_decay=4e-3) scheduler = CosineLRWithRestarts( optimizer, batch_size, len(data_generator.train_segments_indexes), restart_period=epoch_to_flat - epoch_to_warm + 1, t_mult=1, verbose=True) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Valid source if valid_source == 'curated': target_sources = ['curated'] elif valid_source == 'noisy': target_sources = ['noisy'] elif valid_source == 'both': target_sources = ['curated', 'noisy'] # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 epoch = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 2500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data # logging.info('Train statistics:') # for target_source in target_sources: # validate_curated_statistics = evaluator.evaluate( # data_type='train', # target_source=target_source, # max_iteration=max_iteration, # verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in target_sources: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 2500 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_feature, batch_output = model(batch_data_dict['feature'], is_infer=False) # loss loss = criterion(batch_output, batch_data_dict['target']) + metric_loss( batch_feature, batch_data_dict['target']) # Backward optimizer.zero_grad() # LR Warm up if iteration < epoch_to_warm * iter_per_epoch: cur_lr = _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm=epoch_to_warm, min_lr=0, max_lr=0.0035) loss.backward() optimizer.step() if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch: if data_generator.pointer >= len( data_generator.train_segments_indexes): scheduler.step() scheduler.batch_step() # Show LR information if iteration % iter_per_epoch == 0 and iteration != 0: epoch += 1 if epoch % 10 == 0: for p in optimizer.param_groups: logging.info( 'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}' .format(epoch, iteration, p['lr'])) # Stop learning if iteration == total_iter: break iteration += 1 if iteration == epoch_to_warm * iter_per_epoch: scheduler.step() if iteration == epoch_to_flat * iter_per_epoch: for param_group in optimizer.param_groups: param_group['lr'] = 1e-5
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_left = os.path.join( workspace, 'features_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_right = os.path.join( workspace, 'features_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_side = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_left = os.path.join( workspace, 'scalars_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_right = os.path.join( workspace, 'scalars_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_side = os.path.join( workspace, 'scalars_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) scalar_left = load_scalar(scalar_path_left) scalar_right = load_scalar(scalar_path_right) scalar_side = load_scalar(scalar_path_side) # Model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, feature_hdf5_path_left=feature_hdf5_path_left, feature_hdf5_path_right=feature_hdf5_path_right, feature_hdf5_path_side=feature_hdf5_path_side, train_csv=train_csv, validate_csv=validate_csv, scalar=scalar, scalar_left=scalar_left, scalar_right=scalar_right, scalar_side=scalar_side, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train( ): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate(data_type='train', source=source, max_iteration=None, verbose=False) for source in sources_to_evaluate: validate_statistics = evaluator.evaluate(data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) for key in batch_data_dict_left.keys(): if key in ['feature_left', 'target']: batch_data_dict_left[key] = move_data_to_gpu( batch_data_dict_left[key], cuda) for key in batch_data_dict_right.keys(): if key in ['feature_right', 'target']: batch_data_dict_right[key] = move_data_to_gpu( batch_data_dict_right[key], cuda) for key in batch_data_dict_side.keys(): if key in ['feature_side', 'target']: batch_data_dict_side[key] = move_data_to_gpu( batch_data_dict_side[key], cuda) # Train model.train() batch_output = model(data=batch_data_dict['feature'], data_left=batch_data_dict_left['feature_left'], data_right=batch_data_dict_right['feature_right'], data_side=batch_data_dict_side['feature_side']) # loss loss = loss_func(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 15000: break iteration += 1
def get_train_features(args): '''Get features from training set. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) feature_map_path = os.path.join( workspace, 'feature_map', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_feature_map.h5' ''.format(iteration)) create_folder(os.path.dirname(feature_map_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_validate('train_feature_map', train_source) # Results of segments output_dict = forward_dist(model=model, generate_func=generate_func, cuda=cuda, return_target=True) # get train features train_features = get_feature_map(output_dict) # Write HD5F file hf = h5py.File(feature_map_path, 'w') for k, v in train_features.items(): hf.create_dataset(name=k, data=v, dtype=np.float32) hf.close()
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters dataset_dir = args.dataset_dir subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type holdout_fold = args.holdout_fold iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second sources = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy #checkpoint = torch.load(checkpoint_path) #model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) if subtask in ['a', 'c']: evaluator.evaluate(data_type='validate', source='a', verbose=True) elif subtask == 'b': evaluator.evaluate(data_type='validate', source='a', verbose=True) evaluator.evaluate(data_type='validate', source='b', verbose=True) evaluator.evaluate(data_type='validate', source='c', verbose=True) # Visualize log mel spectrogram if visualize: evaluator.visualize(data_type='validate', source='a')
def get_infer_features(args): # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type iteration = args.iteration batch_size = args.batch_size infer_set = args.infer_set cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename assert infer_set in ['valid', 'test'] mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') test_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'test.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) feature_map_path = os.path.join( workspace, 'feature_map', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_{}_feature_map.h5' ''.format(iteration, infer_set)) create_folder(os.path.dirname(feature_map_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator if infer_set == 'valid': data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_validate( 'validate', train_source) elif infer_set == 'test': data_generator = TestDataGenerator( test_feature_hdf5_path=test_feature_hdf5_path, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_test() # Results of segments output_dict = forward_dist(model=model, generate_func=generate_func, cuda=cuda, return_target=(infer_set == 'valid')) infer_features = segment_feature_to_clip_feature(output_dict, average='arithmetic') # Write HD5F file hf = h5py.File(feature_map_path, 'w') for k, v in infer_features.items(): if k == 'audio_name': v = [x.encode('utf8') for x in v] hf.create_dataset(name=k, data=v) else: hf.create_dataset(name=k, data=v, dtype=np.float32) hf.close()
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename seq_len = 640 mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num, seq_len, mel_bins, cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) print('cliqueNet parameters:', sum(param.numel() for param in model.parameters())) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss batch_target = batch_data_dict['{}_target'.format(taxonomy_level)] loss = binary_cross_entropy(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 3000: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace data_type: 'train_weak' | 'train_synthetic' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation.' model_type: string, e.g. 'Cnn_9layers_AvgPooling' loss_type: 'clipwise_binary_crossentropy' | 'framewise_binary_crossentropy' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type loss_type = args.loss_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True # Paths if mini_data: prefix = 'minidata_' else: prefix = '' if loss_type == 'clipwise_binary_crossentropy': strong_target_training = False elif loss_type == 'framewise_binary_crossentropy': strong_target_training = True else: raise Exception('Incorrect argument!') train_relative_name = get_relative_path_no_extension(data_type) validate_relative_name = get_relative_path_no_extension('validation') train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(train_relative_name)) validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(validate_relative_name)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train/weak.h5') train_metadata_path = os.path.join(dataset_dir, 'metadata', '{}.csv'.format(train_relative_name)) validate_metadata_path = os.path.join( dataset_dir, 'metadata', 'validation', '{}.csv'.format(validate_relative_name)) checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type)) create_folder(checkpoints_dir) temp_submission_path = os.path.join( workspace, '_temp', 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type), '_temp_submission.csv') create_folder(os.path.dirname(temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type), 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(train_relative_name), 'holdout_fold={}'.format(holdout_fold), model_type, 'loss_type={}'.format(loss_type)) create_logging(logs_dir, filemode='w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num, strong_target_training) if cuda: model.cuda() loss_func = eval(loss_type) # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() train_statistics = evaluator.evaluate( data_type='train', metadata_path=train_metadata_path, submission_path=temp_submission_path, max_iteration=max_iteration) if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', metadata_path=validate_metadata_path, submission_path=temp_submission_path, max_iteration=max_iteration) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'weak_target', 'strong_target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) # loss loss = loss_func(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 5000: break iteration += 1