Example #1
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data 
          without validation for training
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = 10  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    models_dir = os.path.join(
        workspace, 'models', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_folder(models_dir)

    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins))
    create_folder(temp_submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_name)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          max_validate_num=max_validate_num,
                          cuda=cuda)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0:

            logging.info('------------------------------------')
            logging.info('iteration: {}'.format(iteration))

            train_fin_time = time.time()
            train_list_dict = evaluator.evaluate(data_type='train')
            evaluator.metrics(train_list_dict, temp_submissions_dir,
                              metadata_dir)

            if holdout_fold != -1:
                validate_list_dict = evaluator.evaluate(data_type='validate')
                evaluator.metrics(validate_list_dict, temp_submissions_dir,
                                  metadata_dir)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model,
                'optimizer': optimizer
            }

            save_path = os.path.join(models_dir,
                                     'md_{}_iters.pth'.format(iteration))

            torch.save(checkpoint, save_path)
            logging.info('Model saved to {}'.format(save_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
Example #2
0
def inference_validation(args):
    '''Inference validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics
          on all 1, 2, 3 and 4 folds. 
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      visualize: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    visualize = args.visualize
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    submissions_dir = os.path.join(
        workspace, 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'iteration={}'.format(iteration))
    create_folder(submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Inference and calculate metrics for a fold
    if holdout_fold != -1:

        features_dir = os.path.join(
            workspace, 'features',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins))

        scalar_path = os.path.join(
            workspace, 'scalars',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins), 'scalar.h5')

        checkoutpoint_path = os.path.join(
            workspace, 'models', filename,
            '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(
                model_name, '', audio_type, 'dev', frames_per_second,
                mel_bins), 'holdout_fold={}'.format(holdout_fold),
            'md_{}_iters.pth'.format(iteration))

        # Load scalar
        scalar = load_scalar(scalar_path)

        # Load model
        checkpoint = torch.load(checkoutpoint_path)
        model = checkpoint['model']

        if cuda:
            model.cuda()

        # Data generator
        data_generator = DataGenerator(features_dir=features_dir,
                                       scalar=scalar,
                                       batch_size=batch_size,
                                       holdout_fold=holdout_fold)

        # Evaluator
        evaluator = Evaluator(model=model,
                              data_generator=data_generator,
                              cuda=cuda)

        # Calculate metrics
        data_type = 'validate'
        list_dict = evaluator.evaluate(data_type=data_type)
        evaluator.metrics(list_dict=list_dict,
                          submissions_dir=submissions_dir,
                          metadata_dir=metadata_dir)

        # Visualize reference and predicted events, elevation and azimuth
        if visualize:
            evaluator.visualize(data_type=data_type)

    # Calculate metrics for all folds
    else:
        prediction_names = os.listdir(submissions_dir)
        prediction_paths = [os.path.join(submissions_dir, name) for \
            name in prediction_names]

        metrics = calculate_metrics(metadata_dir=metadata_dir,
                                    prediction_paths=prediction_paths)

        logging.info('Metrics of {} files: '.format(len(prediction_names)))
        for key in metrics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :', metrics[key]))