Exemple #1
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: '1', which means using validation data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=True)

    # Evaluate on validation data
    evaluator.evaluate(data_type='validate',
                       submission_path=submission_path,
                       annotation_path=annotation_path,
                       yaml_path=yaml_path,
                       max_iteration=None)

    # Visualize
    if visualize:
        evaluator.visualize(data_type='validate')
Exemple #2
0
def inference_evaluation(args):
    '''Inference on evaluation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: 'none', which means using model trained on all development data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    evaluate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'evaluate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth')

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'best2_submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path,
                                       scalar=scalar,
                                       batch_size=batch_size)

    # Forward
    output_dict = forward(model=model,
                          generate_func=data_generator.generate(),
                          cuda=cuda,
                          return_target=False)

    # Write submission
    write_submission_csv(audio_names=output_dict['audio_name'],
                         outputs=output_dict['output'],
                         taxonomy_level=taxonomy_level,
                         submission_path=submission_path)
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool, visualize the logmel spectrogram of segments
    '''

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    resume = args.resume
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if not resume:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type,
            '{}_iterations.pth'.format(iteration))
    else:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations.pth'.format(iteration))

    figs_dir = os.path.join(workspace, 'figures')
    create_folder(figs_dir)

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Evaluate
    for target_source in ['curated', 'noisy']:
        validate_curated_statistics = evaluator.evaluate(
            data_type='validate',
            target_source=target_source,
            max_iteration=None,
            verbose=True)

        # Visualize
        if visualize:
            save_fig_path = os.path.join(figs_dir,
                                         '{}_logmel.png'.format(target_source))

            validate_curated_statistics = evaluator.visualize(
                data_type='validate',
                target_source=target_source,
                save_fig_path=save_fig_path,
                max_iteration=None,
                verbose=False)
def inference_test(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool, visualize the logmel spectrogram of segments
    '''

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    resume = args.resume
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    holdout_fold = args.holdout_fold  # Use model trained on full data without validation
    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    test_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'test.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if not resume:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type,
            '{}_iterations.pth'.format(iteration))

        submission_path = os.path.join(
            workspace, 'submissions', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, '{}_iterations_submission.csv'
            ''.format(iteration))
        create_folder(os.path.dirname(submission_path))
    else:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations.pth'.format(iteration))

        submission_path = os.path.join(
            workspace, 'submissions', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations_submission.csv'
            ''.format(iteration))
        create_folder(os.path.dirname(submission_path))

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(
        test_feature_hdf5_path=test_feature_hdf5_path,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    generate_func = data_generator.generate_test()

    # Results of segments
    output_dict = forward_infer(model=model,
                                generate_func=generate_func,
                                cuda=cuda)

    # Results of audio recordings
    result_dict = segment_prediction_to_clip_prediction(output_dict,
                                                        average='arithmetic')

    # Write submission
    write_submission(result_dict, submission_path)
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
Exemple #6
0
def inference_validation(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      data_type: 'train_weak' | 'train_synthetic'
      holdout_fold: '1'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      loss_type: 'clipwise_binary_crossentropy' | 'framewise_binary_crossentropy'
      batch_size: int
      cuda: bool
      visualize: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    loss_type = args.loss_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    visualize = args.visualize
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    if loss_type == 'clipwise_binary_crossentropy':
        strong_target_training = False
    elif loss_type == 'framewise_binary_crossentropy':
        strong_target_training = True
    else:
        raise Exception('Incorrect argument!')

    train_relative_name = get_relative_path_no_extension(data_type)
    validate_relative_name = get_relative_path_no_extension('validation')

    validate_metadata_path = os.path.join(
        dataset_dir, 'metadata', 'validation',
        '{}.csv'.format(validate_relative_name))

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(train_relative_name))

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(validate_relative_name))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train/weak.h5')

    checkoutpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type),
        '{}_iterations.pth'.format(iteration))

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type), 'validation_submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num, strong_target_training)
    checkpoint = torch.load(checkoutpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda,
                          verbose=True)

    evaluator.evaluate(data_type='validate',
                       metadata_path=validate_metadata_path,
                       submission_path=submission_path)

    if visualize:
        evaluator.visualize(data_type='validate')
Exemple #7
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data 
          without validation for training
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = 10  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    models_dir = os.path.join(
        workspace, 'models', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_folder(models_dir)

    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins))
    create_folder(temp_submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_name)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          max_validate_num=max_validate_num,
                          cuda=cuda)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0:

            logging.info('------------------------------------')
            logging.info('iteration: {}'.format(iteration))

            train_fin_time = time.time()
            train_list_dict = evaluator.evaluate(data_type='train')
            evaluator.metrics(train_list_dict, temp_submissions_dir,
                              metadata_dir)

            if holdout_fold != -1:
                validate_list_dict = evaluator.evaluate(data_type='validate')
                evaluator.metrics(validate_list_dict, temp_submissions_dir,
                                  metadata_dir)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model,
                'optimizer': optimizer
            }

            save_path = os.path.join(models_dir,
                                     'md_{}_iters.pth'.format(iteration))

            torch.save(checkpoint, save_path)
            logging.info('Model saved to {}'.format(save_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    fixed = args.fixed
    finetune = args.finetune
    ite_train = args.ite_train
    ite_eva = args.ite_eva
    ite_store = args.ite_store
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None      # Number of mini-batches to evaluate on training data
    reduce_lr = True
    
    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'validate_statistics.pickle')
    
    create_folder(os.path.dirname(validate_statistics_path))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)
    
    # Model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        if fixed=='True':
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True)
        else :
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False)
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()
    
    # Optimizer
    if fixed=='True':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999),
                         eps=1e-08, weight_decay=0., amsgrad=True)
    else :
        optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999),
                               eps=1e-08, weight_decay=0., amsgrad=True)

    if finetune=='True':
        model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth'
        device = torch.device('cuda')
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model'])
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    # Statistics
    validate_statistics_container = StatisticsContainer(validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():
        
        # Evaluate
        #1800
        if iteration % 200 == 0 and iteration > ite_eva:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(
                    data_type='train', 
                    source=source, 
                    max_iteration=None, 
                    verbose=False)
            
            if holdout_fold != 'none':
                for source in sources_to_evaluate:
                    validate_statistics = evaluator.evaluate(
                        data_type='validate', 
                        source=source, 
                        max_iteration=None, 
                        verbose=False)

                    validate_statistics_container.append_and_dump(
                        iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'Train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 200 == 0 and iteration > ite_store:
            checkpoint = {
                'iteration': iteration, 
                'model': model.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            
        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.93
        
        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']:
                batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
        
        # Train
#         batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns'])
#         loss = loss_func(batch_output, batch_data_dict['target'])
    
        # Using Mixup
        model.train()
        mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2)
        batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4)

        if batch_output.shape[1] == 10: # single scale models
            loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam)
        else:                  # multi scale models
            losses = []
            for ite in range(batch_output.shape[1]-1):
                loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam)
                losses.append(loss)
            loss = sum(losses)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        # 12000 for scratch
        if iteration == ite_train:
            break
            
        iteration += 1
Exemple #9
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            logging.info('Train statistics:')

            for target_source in ['curated', 'noisy']:
                validate_curated_statistics = evaluator.evaluate(
                    data_type='train',
                    target_source=target_source,
                    max_iteration=max_iteration,
                    verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in ['curated', 'noisy']:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        loss = binary_cross_entropy(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 20000:
            break

        iteration += 1
Exemple #10
0
def inference_validation(args):
    '''Inference validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics
          on all 1, 2, 3 and 4 folds. 
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      visualize: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    visualize = args.visualize
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    submissions_dir = os.path.join(
        workspace, 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'iteration={}'.format(iteration))
    create_folder(submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Inference and calculate metrics for a fold
    if holdout_fold != -1:

        features_dir = os.path.join(
            workspace, 'features',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins))

        scalar_path = os.path.join(
            workspace, 'scalars',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins), 'scalar.h5')

        checkoutpoint_path = os.path.join(
            workspace, 'models', filename,
            '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(
                model_name, '', audio_type, 'dev', frames_per_second,
                mel_bins), 'holdout_fold={}'.format(holdout_fold),
            'md_{}_iters.pth'.format(iteration))

        # Load scalar
        scalar = load_scalar(scalar_path)

        # Load model
        checkpoint = torch.load(checkoutpoint_path)
        model = checkpoint['model']

        if cuda:
            model.cuda()

        # Data generator
        data_generator = DataGenerator(features_dir=features_dir,
                                       scalar=scalar,
                                       batch_size=batch_size,
                                       holdout_fold=holdout_fold)

        # Evaluator
        evaluator = Evaluator(model=model,
                              data_generator=data_generator,
                              cuda=cuda)

        # Calculate metrics
        data_type = 'validate'
        list_dict = evaluator.evaluate(data_type=data_type)
        evaluator.metrics(list_dict=list_dict,
                          submissions_dir=submissions_dir,
                          metadata_dir=metadata_dir)

        # Visualize reference and predicted events, elevation and azimuth
        if visualize:
            evaluator.visualize(data_type=data_type)

    # Calculate metrics for all folds
    else:
        prediction_names = os.listdir(submissions_dir)
        prediction_paths = [os.path.join(submissions_dir, name) for \
            name in prediction_names]

        metrics = calculate_metrics(metadata_dir=metadata_dir,
                                    prediction_paths=prediction_paths)

        logging.info('Metrics of {} files: '.format(len(prediction_names)))
        for key in metrics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :', metrics[key]))
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    T_max = 300
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    def mixup_data(x1, x2, y, alpha=1.0, use_cuda=True):  # 数据增强,看下那个博客
        '''Returns mixed inputs, pairs of targets, and lambda'''
        if alpha > 0:
            lam = np.random.beta(alpha, alpha)  # 随机生成一个(1,1)的张量
        else:
            lam = 1
        #
        batch_size = x1.size()[0]
        if use_cuda:
            index = torch.randperm(
                batch_size).cuda()  # 给定参数n,返回一个从0到n-1的随机整数序列
        else:
            index = torch.randperm(batch_size)  # 使用cpu还是gpu

        mixed_x1 = lam * x1 + (1 - lam) * x1[index, :]
        mixed_x2 = lam * x2 + (1 - lam) * x2[index, :]  # 混合数据
        y_a, y_b = y, y[index]
        return mixed_x1, mixed_x2, y_a, y_b, lam

    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))
    loss_path = os.path.join(
        workspace, 'loss',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(loss_path)

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)

    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)
    logging.info(
        " Space_Duo_Cnn_9_Avg  多一层 258*258 不共用FC,必须带时空标签 用loss 监测,使用去零one hot "
    )

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    logging.info('model parm:{} '.format(
        sum(param.numel() for param in model.parameters())))
    #计算模型参数量

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    last_loss1 = []
    last_loss2 = []
    last_loss = []
    best_map = 0
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best7.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9
        batch_data2_dict = batch_data_dict.copy()
        n = []

        for i, l in enumerate(batch_data2_dict['coarse_target']):
            k = 0
            for j in range(0, 8):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 8:
                    if taxonomy_level == 'coarse':
                        n.append(i)

        for i, l in enumerate(batch_data2_dict['fine_target']):
            k = 0
            for j in range(0, 29):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 29:
                    if taxonomy_level == 'fine':
                        n.append(i)

        batch_data2_dict['fine_target'] = np.delete(
            batch_data2_dict['fine_target'], n, axis=0)
        batch_data2_dict['coarse_target'] = np.delete(
            batch_data2_dict['coarse_target'], n, axis=0)
        batch_data2_dict['audio_name'] = np.delete(
            batch_data2_dict['audio_name'], n, axis=0)
        batch_data2_dict['feature'] = np.delete(batch_data2_dict['feature'],
                                                n,
                                                axis=0)
        batch_data2_dict['spacetime'] = np.delete(
            batch_data2_dict['spacetime'], n, axis=0)
        if batch_data2_dict['audio_name'].size == 0:
            iteration += 1
            continue
        #使用 概率数据请注释下行,使用去零onehot数据不用注释
        batch_data_dict = batch_data2_dict

        # if iteration <8655:
        #      batch_data_dict = batch_data2_dict
        # elif iteration >=8655 and  iteration % 2 == 0:
        #     batch_data_dict = batch_data2_dict

        # Move data to GPU                                       ,'external_target','external_feature'
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)
        # Train
        model.train()
        # 使用mix_up  数据增强
        feature1, spacetime1, targets1_a, targets1_b, lam1 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['fine_target'],
            alpha=1.0,
            use_cuda=True)
        feature2, spacetime2, targets2_a, targets2_b, lam2 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['coarse_target'],
            alpha=1.0,
            use_cuda=True)
        batch_output1 = model.forward1(feature1, spacetime1)
        batch_output2 = model.forward2(feature2, spacetime2)
        lam1 = int(lam1)
        lam2 = int(lam2)
        loss1 = (lam1 * binary_cross_entropy(batch_output1, targets1_a) +
                 (1 - lam1) * binary_cross_entropy(batch_output1, targets1_b))
        loss2 = (lam2 * binary_cross_entropy(batch_output2, targets2_a) +
                 (1 - lam2) * binary_cross_entropy(batch_output2, targets2_b))

        #不使用mix_up  数据增强,请使用以下代码
        # batch_target1 = batch_data_dict['fine_target']
        # batch_output1 = model.forward1(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # batch_target2 = batch_data_dict['coarse_target']
        # batch_output2 = model.forward2(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # loss1 = binary_cross_entropy(batch_output1, batch_target1)
        # loss2 = binary_cross_entropy(batch_output2, batch_target2)

        loss = loss1 + loss2

        #使用loss监测请使用以下代码否者注释
        if iteration > 4320:
            new_loss = loss.item()
            if len(last_loss) < 5:
                last_loss.append(new_loss)
            else:
                cha = 0
                for i in range(4):
                    cha += abs(last_loss[i + 1] - last_loss[i])
                if new_loss > last_loss[4] and cha >= (new_loss -
                                                       last_loss[4]) > cha / 2:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' drop iteration:{}'.format(iteration))
                    iteration += 1
                    continue
                elif new_loss > last_loss[4] and (new_loss -
                                                  last_loss[4]) > cha / 2.75:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' low weightiteration:{}'.format(iteration))
                    loss = loss / 2

                else:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss

        # # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 50 == 0:
            plt_x.append(iteration)
            plt_y.append(loss)

        if iteration % 13000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                loss_path + '/' +
                time.strftime('%m%d_%H%M%S', time.localtime(time.time())) +
                'loss.jpg')
            plt.savefig(loss_path + '/loss.jpg')

        # Stop learning
        if iteration == 13000:
            # logging.info("best_micro_auprc:{:.3f}".format(best_inde['micro_auprc']))
            # logging.info("best_micro_f1:{:.3f}".format(best_inde['micro_f1']))
            # logging.info("best_macro_auprc:{:.3f}".format(best_inde['macro_auprc']))
            # labels = get_labels(taxonomy_level)
            # for k, label in enumerate(labels):
            #     logging.info('    {:<40}{:.3f}'.format(label, best_inde['average_precision'][k]))
            break
        iteration += 1
Exemple #12
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all 
        data without validation to train
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = None  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type,
        'holdout_fold={}'.format(holdout_fold))
    create_folder(checkpoints_dir)

    # All folds result should write to the same directory
    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type)
    create_folder(temp_submissions_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:

            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()
            '''
            # Uncomment for evaluating on training dataset
            train_statistics = evaluator.evaluate(
                data_type='train', 
                metadata_dir=metadata_dir, 
                submissions_dir=temp_submissions_dir, 
                max_validate_num=max_validate_num)
            '''

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    metadata_dir=metadata_dir,
                    submissions_dir=temp_submissions_dir,
                    max_validate_num=max_validate_num)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 5000:
            break

        iteration += 1
Exemple #13
0
def inference_validation(dataset_dir, workspace):
    '''Inference validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: '1' | '2' | '3' | '4' | 'none', where 'none' represents
          summary and print results of all folds 1, 2, 3 and 4. 
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      visualize: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # # Arugments & parameters
    # dataset_dir = args.dataset_dir
    # workspace = args.workspace
    # audio_type = args.audio_type
    # holdout_fold = args.holdout_fold
    # model_type = args.model_type
    # iteration = args.iteration
    # batch_size = args.batch_size
    # cuda = args.cuda and torch.cuda.is_available()
    # visualize = args.visualize
    # mini_data = args.mini_data
    # filename = args.filename
    
    
    # Test 1
    audio_type = 'foa'
    holdout_fold = '1'
    model_type = 'Cnn_9layers_AvgPooling'
    iteration = 1000
    batch_size = 32
    cuda = True
    visualize = True
    mini_data = True
    filename = 'train'  
    
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')
    submissions_dir = os.path.join(workspace, filename, 'submissions', model_type, 'iteration={}'.format(iteration))
    create_folder(submissions_dir)    
    logs_dir = os.path.join(workspace, filename, 'logs', model_type)
    create_logging(logs_dir, filemode='w')    
    

    # Inference and calculate metrics for a fold    
    if holdout_fold != 'none':
        
        features_dir = os.path.join(workspace, 'features')
        scalar_path = os.path.join(workspace, 'scalars', 'scalar.h5')
        checkoutpoint_path = os.path.join(workspace, filename, 'checkpoints', model_type, '{}_iterations.pth'.format(iteration))
    
        # Load scalar
        scalar = load_scalar(scalar_path)
        
        # Load model    
        Model = eval(model_type)
        model = Model(classes_num)
        checkpoint = torch.load(checkoutpoint_path)
        model.load_state_dict(checkpoint['model'])
        
        if cuda:
            model.cuda()
            
        # Data generator
        data_generator = DataGenerator(
            features_dir=features_dir, 
            scalar=scalar, 
            batch_size=batch_size, 
            holdout_fold=holdout_fold)
            
        # Evaluator
        evaluator = Evaluator(
            model=model, 
            data_generator=data_generator, 
            cuda=cuda)
        
        # Calculate metrics
        data_type = 'validate'
        
        evaluator.evaluate(
            data_type=data_type, 
            metadata_dir=metadata_dir, 
            submissions_dir=submissions_dir, 
            max_validate_num=None)
        
        # Visualize reference and predicted events, elevation and azimuth
        if visualize:
            evaluator.visualize(data_type=data_type)
            
    # Calculate metrics for all 4 folds
    else:
        prediction_names = os.listdir(submissions_dir)
        prediction_paths = [os.path.join(submissions_dir, name) for \
            name in prediction_names]
        
        metrics = calculate_metrics(metadata_dir=metadata_dir, 
            prediction_paths=prediction_paths)
        
        logging.info('Metrics of {} files: '.format(len(prediction_names)))
        for key in metrics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :', metrics[key]))    
Exemple #14
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    best_map = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best2.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        feature, spacetime, targets_a, targets_b, lam = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['{}_target'.format(taxonomy_level)],
            alpha=1.0,
            use_cuda=True)

        # Train
        model.train()
        criterion = nn.BCELoss().cuda()
        batch_output = model(feature, spacetime)

        # loss
        #batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = mixup_criterion(criterion, batch_output, targets_a, targets_b,
                               lam)
        #loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 100 == 0:
            plt_x.append(iteration)
            plt_y.append(loss.item())
        if iteration % 10000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                '/home/fangjunyan/count/' +
                time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) +
                '{}'.format(holdout_fold) + '{}.jpg'.format(taxonomy_level))
        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters

    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    n_epoch = args.n_epoch
    batch_size = args.batch_size
    valid_source = args.valid_source
    pretrained = args.pretrained
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if pretrained == 'none':
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type,
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_logging(logs_dir, 'w')

    else:
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume',
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_logging(logs_dir, 'w')

    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    if pretrained != 'none':
        model.load_state_dict(torch.load(pretrained)['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Calculate total iteration required for n_epoch
    iter_per_epoch = np.ceil(
        len(data_generator.train_segments_indexes) / batch_size).astype(int)
    total_iter = iter_per_epoch * n_epoch

    # Define Warm-up LR scheduler
    epoch_to_warm = 10
    epoch_to_flat = 200

    def _warmup_lr(optimizer,
                   iteration,
                   iter_per_epoch,
                   epoch_to_warm,
                   min_lr=0,
                   max_lr=0.0035):
        delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm
        lr = min_lr + delta * iteration
        for p in optimizer.param_groups:
            p['lr'] = lr
        return lr

    # Optimizer
    criterion = FocalLoss(2)
    # metric_loss = RingLoss(type='auto', loss_weight=1.0)
    metric_loss = ArcFaceLoss()
    if cuda:
        metric_loss.cuda()
    optimizer = Nadam(model.parameters(),
                      lr=0.0035,
                      betas=(0.9, 0.999),
                      eps=1e-8,
                      weight_decay=0,
                      schedule_decay=4e-3)
    scheduler = CosineLRWithRestarts(
        optimizer,
        batch_size,
        len(data_generator.train_segments_indexes),
        restart_period=epoch_to_flat - epoch_to_warm + 1,
        t_mult=1,
        verbose=True)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Valid source
    if valid_source == 'curated':
        target_sources = ['curated']
    elif valid_source == 'noisy':
        target_sources = ['noisy']
    elif valid_source == 'both':
        target_sources = ['curated', 'noisy']

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    epoch = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 2500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            # logging.info('Train statistics:')

            # for target_source in target_sources:
            #     validate_curated_statistics = evaluator.evaluate(
            #         data_type='train',
            #         target_source=target_source,
            #         max_iteration=max_iteration,
            #         verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in target_sources:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 2500 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_feature, batch_output = model(batch_data_dict['feature'],
                                            is_infer=False)

        # loss
        loss = criterion(batch_output,
                         batch_data_dict['target']) + metric_loss(
                             batch_feature, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()

        # LR Warm up
        if iteration < epoch_to_warm * iter_per_epoch:
            cur_lr = _warmup_lr(optimizer,
                                iteration,
                                iter_per_epoch,
                                epoch_to_warm=epoch_to_warm,
                                min_lr=0,
                                max_lr=0.0035)

        loss.backward()
        optimizer.step()

        if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch:
            if data_generator.pointer >= len(
                    data_generator.train_segments_indexes):
                scheduler.step()
            scheduler.batch_step()

        # Show LR information
        if iteration % iter_per_epoch == 0 and iteration != 0:
            epoch += 1
            if epoch % 10 == 0:
                for p in optimizer.param_groups:
                    logging.info(
                        'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}'
                        .format(epoch, iteration, p['lr']))

        # Stop learning
        if iteration == total_iter:
            break

        iteration += 1

        if iteration == epoch_to_warm * iter_per_epoch:
            scheduler.step()

        if iteration == epoch_to_flat * iter_per_epoch:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 1e-5
Exemple #16
0
def train(args):
    '''Training. Model will be saved after several iterations.

    Args:
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                             'fold1_train.csv')

    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                                'fold1_evaluate.csv')

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_left = os.path.join(
        workspace, 'features_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_right = os.path.join(
        workspace, 'features_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_side = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_left = os.path.join(
        workspace, 'scalars_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_right = os.path.join(
        workspace, 'scalars_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_side = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')

    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)
    scalar_left = load_scalar(scalar_path_left)
    scalar_right = load_scalar(scalar_path_right)
    scalar_side = load_scalar(scalar_path_side)
    # Model
    Model = eval(model_type)

    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss

    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path,
        feature_hdf5_path_left=feature_hdf5_path_left,
        feature_hdf5_path_right=feature_hdf5_path_right,
        feature_hdf5_path_side=feature_hdf5_path_side,
        train_csv=train_csv,
        validate_csv=validate_csv,
        scalar=scalar,
        scalar_left=scalar_left,
        scalar_right=scalar_right,
        scalar_side=scalar_side,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          subtask=subtask,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train(
    ):

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(data_type='train',
                                                      source=source,
                                                      max_iteration=None,
                                                      verbose=False)

            for source in sources_to_evaluate:
                validate_statistics = evaluator.evaluate(data_type='validate',
                                                         source=source,
                                                         max_iteration=None,
                                                         verbose=False)

                validate_statistics_container.append_and_dump(
                    iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        for key in batch_data_dict_left.keys():
            if key in ['feature_left', 'target']:
                batch_data_dict_left[key] = move_data_to_gpu(
                    batch_data_dict_left[key], cuda)

        for key in batch_data_dict_right.keys():
            if key in ['feature_right', 'target']:
                batch_data_dict_right[key] = move_data_to_gpu(
                    batch_data_dict_right[key], cuda)

        for key in batch_data_dict_side.keys():
            if key in ['feature_side', 'target']:
                batch_data_dict_side[key] = move_data_to_gpu(
                    batch_data_dict_side[key], cuda)

        # Train
        model.train()
        batch_output = model(data=batch_data_dict['feature'],
                             data_left=batch_data_dict_left['feature_left'],
                             data_right=batch_data_dict_right['feature_right'],
                             data_side=batch_data_dict_side['feature_side'])

        # loss
        loss = loss_func(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 15000:
            break

        iteration += 1
def get_train_features(args):
    '''Get features from training set.

    Args:
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    feature_map_path = os.path.join(
        workspace, 'feature_map', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type, '{}_iterations_feature_map.h5'
        ''.format(iteration))
    create_folder(os.path.dirname(feature_map_path))

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    generate_func = data_generator.generate_validate('train_feature_map',
                                                     train_source)

    # Results of segments
    output_dict = forward_dist(model=model,
                               generate_func=generate_func,
                               cuda=cuda,
                               return_target=True)

    # get train features
    train_features = get_feature_map(output_dict)

    # Write HD5F file
    hf = h5py.File(feature_map_path, 'w')
    for k, v in train_features.items():
        hf.create_dataset(name=k, data=v, dtype=np.float32)
    hf.close()
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    sources = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_train.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    #checkpoint = torch.load(checkpoint_path)
    #model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    if subtask in ['a', 'c']:
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        
    elif subtask == 'b':
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        evaluator.evaluate(data_type='validate', source='b', verbose=True)
        evaluator.evaluate(data_type='validate', source='c', verbose=True)
    
    # Visualize log mel spectrogram
    if visualize:
        evaluator.visualize(data_type='validate', source='a')
def get_infer_features(args):

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    infer_set = args.infer_set
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    assert infer_set in ['valid', 'test']

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    test_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'test.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    feature_map_path = os.path.join(
        workspace, 'feature_map', filename,
        'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'
        ''.format(holdout_fold), model_type, '{}_iterations_{}_feature_map.h5'
        ''.format(iteration, infer_set))
    create_folder(os.path.dirname(feature_map_path))

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    if infer_set == 'valid':
        data_generator = DataGenerator(
            curated_feature_hdf5_path=curated_feature_hdf5_path,
            noisy_feature_hdf5_path=noisy_feature_hdf5_path,
            curated_cross_validation_path=curated_cross_validation_path,
            noisy_cross_validation_path=noisy_cross_validation_path,
            train_source=train_source,
            holdout_fold=holdout_fold,
            segment_seconds=segment_seconds,
            hop_seconds=hop_seconds,
            pad_type=pad_type,
            scalar=scalar,
            batch_size=batch_size)
        generate_func = data_generator.generate_validate(
            'validate', train_source)

    elif infer_set == 'test':
        data_generator = TestDataGenerator(
            test_feature_hdf5_path=test_feature_hdf5_path,
            segment_seconds=segment_seconds,
            hop_seconds=hop_seconds,
            pad_type=pad_type,
            scalar=scalar,
            batch_size=batch_size)
        generate_func = data_generator.generate_test()

    # Results of segments
    output_dict = forward_dist(model=model,
                               generate_func=generate_func,
                               cuda=cuda,
                               return_target=(infer_set == 'valid'))

    infer_features = segment_feature_to_clip_feature(output_dict,
                                                     average='arithmetic')

    # Write HD5F file
    hf = h5py.File(feature_map_path, 'w')
    for k, v in infer_features.items():
        if k == 'audio_name':
            v = [x.encode('utf8') for x in v]
            hf.create_dataset(name=k, data=v)
        else:
            hf.create_dataset(name=k, data=v, dtype=np.float32)
    hf.close()
Exemple #20
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    seq_len = 640
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, seq_len, mel_bins, cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    print('cliqueNet parameters:',
          sum(param.numel() for param in model.parameters()))
    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 3000:
            break

        iteration += 1
Exemple #21
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      data_type: 'train_weak' | 'train_synthetic'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation.'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      loss_type: 'clipwise_binary_crossentropy' | 'framewise_binary_crossentropy'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    loss_type = args.loss_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    if loss_type == 'clipwise_binary_crossentropy':
        strong_target_training = False
    elif loss_type == 'framewise_binary_crossentropy':
        strong_target_training = True
    else:
        raise Exception('Incorrect argument!')

    train_relative_name = get_relative_path_no_extension(data_type)
    validate_relative_name = get_relative_path_no_extension('validation')

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(train_relative_name))

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(validate_relative_name))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train/weak.h5')

    train_metadata_path = os.path.join(dataset_dir, 'metadata',
                                       '{}.csv'.format(train_relative_name))

    validate_metadata_path = os.path.join(
        dataset_dir, 'metadata', 'validation',
        '{}.csv'.format(validate_relative_name))

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type))
    create_folder(checkpoints_dir)

    temp_submission_path = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type), '_temp_submission.csv')
    create_folder(os.path.dirname(temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type), 'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}'.format(train_relative_name),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'loss_type={}'.format(loss_type))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, strong_target_training)

    if cuda:
        model.cuda()

    loss_func = eval(loss_type)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            train_statistics = evaluator.evaluate(
                data_type='train',
                metadata_path=train_metadata_path,
                submission_path=temp_submission_path,
                max_iteration=max_iteration)

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    metadata_path=validate_metadata_path,
                    submission_path=temp_submission_path,
                    max_iteration=max_iteration)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'weak_target', 'strong_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])

        # loss
        loss = loss_func(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 5000:
            break

        iteration += 1