def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters

    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    n_epoch = args.n_epoch
    batch_size = args.batch_size
    valid_source = args.valid_source
    pretrained = args.pretrained
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if pretrained == 'none':
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type,
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_logging(logs_dir, 'w')

    else:
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume',
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_logging(logs_dir, 'w')

    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    if pretrained != 'none':
        model.load_state_dict(torch.load(pretrained)['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Calculate total iteration required for n_epoch
    iter_per_epoch = np.ceil(
        len(data_generator.train_segments_indexes) / batch_size).astype(int)
    total_iter = iter_per_epoch * n_epoch

    # Define Warm-up LR scheduler
    epoch_to_warm = 10
    epoch_to_flat = 200

    def _warmup_lr(optimizer,
                   iteration,
                   iter_per_epoch,
                   epoch_to_warm,
                   min_lr=0,
                   max_lr=0.0035):
        delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm
        lr = min_lr + delta * iteration
        for p in optimizer.param_groups:
            p['lr'] = lr
        return lr

    # Optimizer
    criterion = FocalLoss(2)
    # metric_loss = RingLoss(type='auto', loss_weight=1.0)
    metric_loss = ArcFaceLoss()
    if cuda:
        metric_loss.cuda()
    optimizer = Nadam(model.parameters(),
                      lr=0.0035,
                      betas=(0.9, 0.999),
                      eps=1e-8,
                      weight_decay=0,
                      schedule_decay=4e-3)
    scheduler = CosineLRWithRestarts(
        optimizer,
        batch_size,
        len(data_generator.train_segments_indexes),
        restart_period=epoch_to_flat - epoch_to_warm + 1,
        t_mult=1,
        verbose=True)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Valid source
    if valid_source == 'curated':
        target_sources = ['curated']
    elif valid_source == 'noisy':
        target_sources = ['noisy']
    elif valid_source == 'both':
        target_sources = ['curated', 'noisy']

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    epoch = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 2500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            # logging.info('Train statistics:')

            # for target_source in target_sources:
            #     validate_curated_statistics = evaluator.evaluate(
            #         data_type='train',
            #         target_source=target_source,
            #         max_iteration=max_iteration,
            #         verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in target_sources:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 2500 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_feature, batch_output = model(batch_data_dict['feature'],
                                            is_infer=False)

        # loss
        loss = criterion(batch_output,
                         batch_data_dict['target']) + metric_loss(
                             batch_feature, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()

        # LR Warm up
        if iteration < epoch_to_warm * iter_per_epoch:
            cur_lr = _warmup_lr(optimizer,
                                iteration,
                                iter_per_epoch,
                                epoch_to_warm=epoch_to_warm,
                                min_lr=0,
                                max_lr=0.0035)

        loss.backward()
        optimizer.step()

        if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch:
            if data_generator.pointer >= len(
                    data_generator.train_segments_indexes):
                scheduler.step()
            scheduler.batch_step()

        # Show LR information
        if iteration % iter_per_epoch == 0 and iteration != 0:
            epoch += 1
            if epoch % 10 == 0:
                for p in optimizer.param_groups:
                    logging.info(
                        'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}'
                        .format(epoch, iteration, p['lr']))

        # Stop learning
        if iteration == total_iter:
            break

        iteration += 1

        if iteration == epoch_to_warm * iter_per_epoch:
            scheduler.step()

        if iteration == epoch_to_flat * iter_per_epoch:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 1e-5
Esempio n. 2
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            logging.info('Train statistics:')

            for target_source in ['curated', 'noisy']:
                validate_curated_statistics = evaluator.evaluate(
                    data_type='train',
                    target_source=target_source,
                    max_iteration=max_iteration,
                    verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in ['curated', 'noisy']:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        loss = binary_cross_entropy(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 20000:
            break

        iteration += 1