Example #1
0
    def calculate_loss(self, list_dict):
        event_loss_list = []

        for dict in list_dict:
            (output_dict, target_dict) = self._get_output_target_dict(dict)

            event_loss = event_spatial_loss(output_dict=output_dict,
                                            target_dict=target_dict,
                                            return_individual_loss=True)

            event_loss_list.append(event_loss)

        return np.mean(event_loss_list)
Example #2
0
    def calculate_loss(self, list_dict):
        total_loss_list = []
        event_loss_list = []
        position_loss_list = []

        for dict in list_dict:
            (output_dict, target_dict) = self._get_output_target_dict(dict)

            (total_loss, event_loss,
             position_loss) = event_spatial_loss(output_dict=output_dict,
                                                 target_dict=target_dict,
                                                 return_individual_loss=True)

            total_loss_list.append(total_loss)
            event_loss_list.append(event_loss)
            position_loss_list.append(position_loss)

        return np.mean(total_loss_list), np.mean(event_loss_list), np.mean(
            position_loss_list)
Example #3
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data 
          without validation for training
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = 10  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    models_dir = os.path.join(
        workspace, 'models', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_folder(models_dir)

    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins))
    create_folder(temp_submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_name)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          max_validate_num=max_validate_num,
                          cuda=cuda)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0:

            logging.info('------------------------------------')
            logging.info('iteration: {}'.format(iteration))

            train_fin_time = time.time()
            train_list_dict = evaluator.evaluate(data_type='train')
            evaluator.metrics(train_list_dict, temp_submissions_dir,
                              metadata_dir)

            if holdout_fold != -1:
                validate_list_dict = evaluator.evaluate(data_type='validate')
                evaluator.metrics(validate_list_dict, temp_submissions_dir,
                                  metadata_dir)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model,
                'optimizer': optimizer
            }

            save_path = os.path.join(models_dir,
                                     'md_{}_iters.pth'.format(iteration))

            torch.save(checkpoint, save_path)
            logging.info('Model saved to {}'.format(save_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
Example #4
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all 
        data without validation to train
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = None  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type,
        'holdout_fold={}'.format(holdout_fold))
    create_folder(checkpoints_dir)

    # All folds result should write to the same directory
    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type)
    create_folder(temp_submissions_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:

            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()
            '''
            # Uncomment for evaluating on training dataset
            train_statistics = evaluator.evaluate(
                data_type='train', 
                metadata_dir=metadata_dir, 
                submissions_dir=temp_submissions_dir, 
                max_validate_num=max_validate_num)
            '''

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    metadata_dir=metadata_dir,
                    submissions_dir=temp_submissions_dir,
                    max_validate_num=max_validate_num)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 5000:
            break

        iteration += 1