Beispiel #1
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all 
        data without validation to train
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = None  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type,
        'holdout_fold={}'.format(holdout_fold))
    create_folder(checkpoints_dir)

    # All folds result should write to the same directory
    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), model_type)
    create_folder(temp_submissions_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:

            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()
            '''
            # Uncomment for evaluating on training dataset
            train_statistics = evaluator.evaluate(
                data_type='train', 
                metadata_dir=metadata_dir, 
                submissions_dir=temp_submissions_dir, 
                max_validate_num=max_validate_num)
            '''

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    metadata_dir=metadata_dir,
                    submissions_dir=temp_submissions_dir,
                    max_validate_num=max_validate_num)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 5000:
            break

        iteration += 1
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    sources = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_train.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    #checkpoint = torch.load(checkpoint_path)
    #model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    if subtask in ['a', 'c']:
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        
    elif subtask == 'b':
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        evaluator.evaluate(data_type='validate', source='b', verbose=True)
        evaluator.evaluate(data_type='validate', source='c', verbose=True)
    
    # Visualize log mel spectrogram
    if visualize:
        evaluator.visualize(data_type='validate', source='a')
    hf.create_dataset(name='fold', data=folds, dtype=np.int32)

    hf.close()

    logging.info('Write out hdf5 file to {}'.format(hdf5_path))
    logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    subparsers = parser.add_subparsers(dest='mode')

    parser_logmel = subparsers.add_parser('logmel')
    parser_logmel.add_argument('--workspace', type=str, required=True)
    parser_logmel.add_argument('--scene_type', type=str, required=True)
    parser_logmel.add_argument('--snr', type=int, required=True)

    args = parser.parse_args()

    logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__))
    create_folder(logs_dir)
    logging = create_logging(logs_dir, filemode='w')

    logging.info(args)

    if args.mode == 'logmel':
        calculate_logmel_features(args)

    else:
        raise Exception('Incorrect arguments!')
def pack_waveforms_to_hdf5(args):
    """Pack waveforms to a single hdf5 file.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveform_hdf5_path = args.waveform_hdf5_path
    target_hdf5_path = args.target_hdf5_path
    mini_data = args.mini_data

    audio_length = config.audio_length
    classes_num = config.classes_num
    sample_rate = config.sample_rate

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveform_hdf5_path += '.mini'
        target_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveform_hdf5_path))
    create_folder(os.path.dirname(target_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveform_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, audio_length)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Read audio
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, audio_length)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

        # Pack target to hdf5
        hdf5_name = target_hdf5_path.split('/')[-1]

        with h5py.File(target_hdf5_path, 'w') as target_hf:
            target_hf.create_dataset('audio_name',
                                     data=hf['audio_name'][:],
                                     dtype='S20')
            target_hf.create_dataset('hdf5_name',
                                     data=[hdf5_name.encode()] * audios_num,
                                     dtype='S40')
            target_hf.create_dataset('index_in_hdf5',
                                     data=np.arange(audios_num),
                                     dtype=np.int32)
            target_hf.create_dataset('target',
                                     data=hf['target'][:],
                                     dtype=np.bool)

    logging.info('Write to {}'.format(waveform_hdf5_path))
    logging.info('Write to {}'.format(target_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
Beispiel #5
0
def train(args):

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    pretrained_checkpoint_path = args.pretrained_checkpoint_path
    freeze_base = args.freeze_base
    loss_type = args.loss_type
    augmentation = args.augmentation
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    few_shots = args.few_shots
    random_seed = args.random_seed
    resume_iteration = args.resume_iteration
    stop_iteration = args.stop_iteration
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
    mini_data = args.mini_data
    filename = args.filename

    loss_func = get_loss_func(loss_type)
    pretrain = True if pretrained_checkpoint_path else False
    num_workers = 16
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(workspace, 'features', 
        '{}training.waveform.h5'.format(prefix))

    test_hdf5_path = os.path.join(workspace, 'features', 
        'testing.waveform.h5'.format(prefix))

    evaluate_hdf5_path = os.path.join(workspace, 'features', 
        'evaluation.waveform.h5'.format(prefix))

    test_reference_csv_path = os.path.join(dataset_dir, 'metadata', 
        'groundtruth_strong_label_testing_set.csv')
        
    evaluate_reference_csv_path = os.path.join(dataset_dir, 'metadata', 
        'groundtruth_strong_label_evaluation_set.csv')

    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        'holdout_fold={}'.format(holdout_fold), model_type, 
        'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 
        'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 
        'freeze_base={}'.format(freeze_base))
    create_folder(checkpoints_dir)

    tmp_submission_path = os.path.join(workspace, '_tmp_submission', 
        '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 
        'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 
        'freeze_base={}'.format(freeze_base), '_submission.csv')
    create_folder(os.path.dirname(tmp_submission_path))

    statistics_path = os.path.join(workspace, 'statistics', 
        '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 
        'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 
        'freeze_base={}'.format(freeze_base), 'statistics.pickle')
    create_folder(os.path.dirname(statistics_path))

    predictions_dir = os.path.join(workspace, 'predictions', 
        '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'pretrain={}'.format(pretrain), 
        'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 
        'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 
        'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size))
    create_folder(predictions_dir)

    logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename), 
        'holdout_fold={}'.format(holdout_fold), model_type, 
        'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 
        'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 
        'batch_size={}'.format(batch_size))
    create_logging(logs_dir, 'w')
    logging.info(args)

    if 'cuda' in device:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')
    
    # Model
    Model = eval(model_type)
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, 
        classes_num)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    if pretrain:
        logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)

    if resume_iteration:
        resume_checkpoint_path = os.path.join(checkpoints_dir, '{}_iterations.pth'.format(resume_iteration))
        logging.info('Load resume model from {}'.format(resume_checkpoint_path))
        resume_checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(resume_checkpoint['model'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = resume_checkpoint['iteration']
    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in device:
        model.to(device)

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, 
        betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)

    train_dataset = DCASE2017Task4Dataset(hdf5_path=train_hdf5_path)
    test_dataset = DCASE2017Task4Dataset(hdf5_path=test_hdf5_path)
    evaluate_dataset = DCASE2017Task4Dataset(hdf5_path=evaluate_hdf5_path)

    train_sampler = TrainSampler(
        hdf5_path=train_hdf5_path, 
        batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size, 
        few_shots=few_shots, 
        random_seed=random_seed)

    test_sampler = EvaluateSampler(dataset_size=len(test_dataset), batch_size=batch_size)
    evaluate_sampler = EvaluateSampler(dataset_size=len(evaluate_dataset), batch_size=batch_size)

    collector = Collator()

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
        batch_sampler=train_sampler, collate_fn=collector, 
        num_workers=num_workers, pin_memory=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
        batch_sampler=test_sampler, collate_fn=collector, 
        num_workers=num_workers, pin_memory=True)

    evaluate_loader = torch.utils.data.DataLoader(dataset=evaluate_dataset, 
        batch_sampler=evaluate_sampler, collate_fn=collector, 
        num_workers=num_workers, pin_memory=True)

    if 'mixup' in augmentation:
        mixup_augmenter = Mixup(mixup_alpha=1.)
        
    # Evaluator
    test_evaluator = Evaluator(
        model=model, 
        generator=test_loader)

    evaluate_evaluator = Evaluator(
        model=model, 
        generator=evaluate_loader)

    train_bgn_time = time.time()
    
    # Train on mini batches
    for batch_data_dict in train_loader:
        
        # Evaluate
        if iteration % 1000 == 0:
            if resume_iteration > 0 and iteration == resume_iteration:
                pass
            else:
                logging.info('------------------------------------')
                logging.info('Iteration: {}'.format(iteration))

                train_fin_time = time.time()

                for (data_type, evaluator, reference_csv_path) in [
                    ('test', test_evaluator, test_reference_csv_path), 
                    ('evaluate', evaluate_evaluator, evaluate_reference_csv_path)]:

                    logging.info('{} statistics:'.format(data_type))

                    (statistics, predictions) = evaluator.evaluate(
                        reference_csv_path, tmp_submission_path)

                    statistics_container.append(data_type, iteration, statistics)

                    prediction_path = os.path.join(predictions_dir, 
                        '{}_iterations.prediction.{}.h5'.format(iteration, data_type))

                    write_out_prediction(predictions, prediction_path)
                
                statistics_container.dump()

                train_time = train_fin_time - train_bgn_time
                validate_time = time.time() - train_fin_time

                logging.info(
                    'Train time: {:.3f} s, validate time: {:.3f} s'
                    ''.format(train_time, validate_time))

                train_bgn_time = time.time()

        # Save model 
        if iteration % 10000 == 0 and iteration > 49999:
            checkpoint = {
                'iteration': iteration, 
                'model': model.module.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
        
        if 'mixup' in augmentation:
            batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(len(batch_data_dict['waveform']))

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device)
        
        # Train
        model.train()

        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda'])
            batch_target_dict = {'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda'])}
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            batch_target_dict = {'target': batch_data_dict['target']}

        # loss
        loss = loss_func(batch_output_dict, batch_target_dict)
        print(iteration, loss)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == stop_iteration:
            break 
            
        iteration += 1
Beispiel #6
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    best_map = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best2.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        feature, spacetime, targets_a, targets_b, lam = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['{}_target'.format(taxonomy_level)],
            alpha=1.0,
            use_cuda=True)

        # Train
        model.train()
        criterion = nn.BCELoss().cuda()
        batch_output = model(feature, spacetime)

        # loss
        #batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = mixup_criterion(criterion, batch_output, targets_a, targets_b,
                               lam)
        #loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 100 == 0:
            plt_x.append(iteration)
            plt_y.append(loss.item())
        if iteration % 10000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                '/home/fangjunyan/count/' +
                time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) +
                '{}'.format(holdout_fold) + '{}.jpg'.format(taxonomy_level))
        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
Beispiel #7
0
def train(args):
    '''Training. Model will be saved after several iterations.

    Args:
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                             'fold1_train.csv')

    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                                'fold1_evaluate.csv')

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_left = os.path.join(
        workspace, 'features_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_right = os.path.join(
        workspace, 'features_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_side = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_left = os.path.join(
        workspace, 'scalars_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_right = os.path.join(
        workspace, 'scalars_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_side = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')

    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)
    scalar_left = load_scalar(scalar_path_left)
    scalar_right = load_scalar(scalar_path_right)
    scalar_side = load_scalar(scalar_path_side)
    # Model
    Model = eval(model_type)

    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss

    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path,
        feature_hdf5_path_left=feature_hdf5_path_left,
        feature_hdf5_path_right=feature_hdf5_path_right,
        feature_hdf5_path_side=feature_hdf5_path_side,
        train_csv=train_csv,
        validate_csv=validate_csv,
        scalar=scalar,
        scalar_left=scalar_left,
        scalar_right=scalar_right,
        scalar_side=scalar_side,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          subtask=subtask,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train(
    ):

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(data_type='train',
                                                      source=source,
                                                      max_iteration=None,
                                                      verbose=False)

            for source in sources_to_evaluate:
                validate_statistics = evaluator.evaluate(data_type='validate',
                                                         source=source,
                                                         max_iteration=None,
                                                         verbose=False)

                validate_statistics_container.append_and_dump(
                    iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        for key in batch_data_dict_left.keys():
            if key in ['feature_left', 'target']:
                batch_data_dict_left[key] = move_data_to_gpu(
                    batch_data_dict_left[key], cuda)

        for key in batch_data_dict_right.keys():
            if key in ['feature_right', 'target']:
                batch_data_dict_right[key] = move_data_to_gpu(
                    batch_data_dict_right[key], cuda)

        for key in batch_data_dict_side.keys():
            if key in ['feature_side', 'target']:
                batch_data_dict_side[key] = move_data_to_gpu(
                    batch_data_dict_side[key], cuda)

        # Train
        model.train()
        batch_output = model(data=batch_data_dict['feature'],
                             data_left=batch_data_dict_left['feature_left'],
                             data_right=batch_data_dict_right['feature_right'],
                             data_side=batch_data_dict_side['feature_side'])

        # loss
        loss = loss_func(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 15000:
            break

        iteration += 1
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    T_max = 300
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    def mixup_data(x1, x2, y, alpha=1.0, use_cuda=True):  # 数据增强,看下那个博客
        '''Returns mixed inputs, pairs of targets, and lambda'''
        if alpha > 0:
            lam = np.random.beta(alpha, alpha)  # 随机生成一个(1,1)的张量
        else:
            lam = 1
        #
        batch_size = x1.size()[0]
        if use_cuda:
            index = torch.randperm(
                batch_size).cuda()  # 给定参数n,返回一个从0到n-1的随机整数序列
        else:
            index = torch.randperm(batch_size)  # 使用cpu还是gpu

        mixed_x1 = lam * x1 + (1 - lam) * x1[index, :]
        mixed_x2 = lam * x2 + (1 - lam) * x2[index, :]  # 混合数据
        y_a, y_b = y, y[index]
        return mixed_x1, mixed_x2, y_a, y_b, lam

    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))
    loss_path = os.path.join(
        workspace, 'loss',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(loss_path)

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)

    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)
    logging.info(
        " Space_Duo_Cnn_9_Avg  多一层 258*258 不共用FC,必须带时空标签 用loss 监测,使用去零one hot "
    )

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    logging.info('model parm:{} '.format(
        sum(param.numel() for param in model.parameters())))
    #计算模型参数量

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    last_loss1 = []
    last_loss2 = []
    last_loss = []
    best_map = 0
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best7.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9
        batch_data2_dict = batch_data_dict.copy()
        n = []

        for i, l in enumerate(batch_data2_dict['coarse_target']):
            k = 0
            for j in range(0, 8):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 8:
                    if taxonomy_level == 'coarse':
                        n.append(i)

        for i, l in enumerate(batch_data2_dict['fine_target']):
            k = 0
            for j in range(0, 29):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 29:
                    if taxonomy_level == 'fine':
                        n.append(i)

        batch_data2_dict['fine_target'] = np.delete(
            batch_data2_dict['fine_target'], n, axis=0)
        batch_data2_dict['coarse_target'] = np.delete(
            batch_data2_dict['coarse_target'], n, axis=0)
        batch_data2_dict['audio_name'] = np.delete(
            batch_data2_dict['audio_name'], n, axis=0)
        batch_data2_dict['feature'] = np.delete(batch_data2_dict['feature'],
                                                n,
                                                axis=0)
        batch_data2_dict['spacetime'] = np.delete(
            batch_data2_dict['spacetime'], n, axis=0)
        if batch_data2_dict['audio_name'].size == 0:
            iteration += 1
            continue
        #使用 概率数据请注释下行,使用去零onehot数据不用注释
        batch_data_dict = batch_data2_dict

        # if iteration <8655:
        #      batch_data_dict = batch_data2_dict
        # elif iteration >=8655 and  iteration % 2 == 0:
        #     batch_data_dict = batch_data2_dict

        # Move data to GPU                                       ,'external_target','external_feature'
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)
        # Train
        model.train()
        # 使用mix_up  数据增强
        feature1, spacetime1, targets1_a, targets1_b, lam1 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['fine_target'],
            alpha=1.0,
            use_cuda=True)
        feature2, spacetime2, targets2_a, targets2_b, lam2 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['coarse_target'],
            alpha=1.0,
            use_cuda=True)
        batch_output1 = model.forward1(feature1, spacetime1)
        batch_output2 = model.forward2(feature2, spacetime2)
        lam1 = int(lam1)
        lam2 = int(lam2)
        loss1 = (lam1 * binary_cross_entropy(batch_output1, targets1_a) +
                 (1 - lam1) * binary_cross_entropy(batch_output1, targets1_b))
        loss2 = (lam2 * binary_cross_entropy(batch_output2, targets2_a) +
                 (1 - lam2) * binary_cross_entropy(batch_output2, targets2_b))

        #不使用mix_up  数据增强,请使用以下代码
        # batch_target1 = batch_data_dict['fine_target']
        # batch_output1 = model.forward1(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # batch_target2 = batch_data_dict['coarse_target']
        # batch_output2 = model.forward2(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # loss1 = binary_cross_entropy(batch_output1, batch_target1)
        # loss2 = binary_cross_entropy(batch_output2, batch_target2)

        loss = loss1 + loss2

        #使用loss监测请使用以下代码否者注释
        if iteration > 4320:
            new_loss = loss.item()
            if len(last_loss) < 5:
                last_loss.append(new_loss)
            else:
                cha = 0
                for i in range(4):
                    cha += abs(last_loss[i + 1] - last_loss[i])
                if new_loss > last_loss[4] and cha >= (new_loss -
                                                       last_loss[4]) > cha / 2:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' drop iteration:{}'.format(iteration))
                    iteration += 1
                    continue
                elif new_loss > last_loss[4] and (new_loss -
                                                  last_loss[4]) > cha / 2.75:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' low weightiteration:{}'.format(iteration))
                    loss = loss / 2

                else:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss

        # # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 50 == 0:
            plt_x.append(iteration)
            plt_y.append(loss)

        if iteration % 13000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                loss_path + '/' +
                time.strftime('%m%d_%H%M%S', time.localtime(time.time())) +
                'loss.jpg')
            plt.savefig(loss_path + '/loss.jpg')

        # Stop learning
        if iteration == 13000:
            # logging.info("best_micro_auprc:{:.3f}".format(best_inde['micro_auprc']))
            # logging.info("best_micro_f1:{:.3f}".format(best_inde['micro_f1']))
            # logging.info("best_macro_auprc:{:.3f}".format(best_inde['macro_auprc']))
            # labels = get_labels(taxonomy_level)
            # for k, label in enumerate(labels):
            #     logging.info('    {:<40}{:.3f}'.format(label, best_inde['average_precision'][k]))
            break
        iteration += 1
Beispiel #9
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds,
                                                 pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            logging.info('Train statistics:')

            for target_source in ['curated', 'noisy']:
                validate_curated_statistics = evaluator.evaluate(
                    data_type='train',
                    target_source=target_source,
                    max_iteration=max_iteration,
                    verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in ['curated', 'noisy']:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        loss = binary_cross_entropy(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 20000:
            break

        iteration += 1
def pack_maps_dataset_to_hdf5(args):
    """MAPS is a piano dataset only used for evaluating our piano transcription
    system (optional). Ref:

    [1] Emiya, Valentin. "MAPS Database A piano database for multipitch 
    estimation and automatic transcription of music. 2016

    Load & resample MAPS audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate
    pianos = ['ENSTDkCl', 'ENSTDkAm']

    # Paths
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maps')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    feature_time = time.time()
    count = 0

    # Load & resample each audio file to a hdf5 file
    for piano in pianos:
        sub_dir = os.path.join(dataset_dir, piano, 'MUS')

        audio_names = [os.path.splitext(name)[0] for name in os.listdir(sub_dir) 
            if os.path.splitext(name)[-1] == '.mid']
        
        for audio_name in audio_names:
            print('{} {}'.format(count, audio_name))
            audio_path = '{}.wav'.format(os.path.join(sub_dir, audio_name))
            midi_path = '{}.mid'.format(os.path.join(sub_dir, audio_name))

            (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
            midi_dict = read_maps_midi(midi_path)
            
            packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(audio_name))
            create_folder(os.path.dirname(packed_hdf5_path))

            with h5py.File(packed_hdf5_path, 'w') as hf:
                hf.attrs.create('split', data='test'.encode(), dtype='S20')
                hf.attrs.create('midi_filename', data='{}.mid'.format(audio_name).encode(), dtype='S100')
                hf.attrs.create('audio_filename', data='{}.wav'.format(audio_name).encode(), dtype='S100')
                hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
                hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
                hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
            
            count += 1

    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
def train(args):
    """Train and evaluate.

    Args:
      dataset_dir: str
      workspace: str
      holdout_fold: '1'
      model_type: str, e.g., 'Cnn_9layers_Gru_FrameAtt'
      loss_type: str, e.g., 'clip_bce'
      augmentation: str, e.g., 'mixup'
      learning_rate, float
      batch_size: int
      resume_iteration: int
      stop_iteration: int
      device: 'cuda' | 'cpu'
      mini_data: bool
    """

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    loss_type = args.loss_type
    augmentation = args.augmentation
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    resume_iteration = args.resume_iteration
    stop_iteration = args.stop_iteration
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
    mini_data = args.mini_data
    filename = args.filename

    loss_func = get_loss_func(loss_type)
    num_workers = 8

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(workspace, 'hdf5s',
                                   '{}training.h5'.format(prefix))

    test_hdf5_path = os.path.join(workspace, 'hdf5s',
                                  '{}testing.h5'.format(prefix))

    evaluate_hdf5_path = os.path.join(workspace, 'hdf5s',
                                      'evaluation.h5'.format(prefix))

    test_reference_csv_path = os.path.join(
        dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv')

    evaluate_reference_csv_path = os.path.join(
        dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv')

    checkpoints_dir = os.path.join(workspace, 'checkpoints', '{}{}'.format(
        prefix, filename), 'holdout_fold={}'.format(holdout_fold),
                                   'model_type={}'.format(model_type),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'batch_size={}'.format(batch_size))
    create_folder(checkpoints_dir)

    tmp_submission_path = os.path.join(
        workspace, '_tmp_submission', '{}{}'.format(prefix, filename),
        'holdout_fold={}'.format(holdout_fold),
        'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), '_submission.csv')
    create_folder(os.path.dirname(tmp_submission_path))

    statistics_path = os.path.join(workspace, 'statistics', '{}{}'.format(
        prefix, filename), 'holdout_fold={}'.format(holdout_fold),
                                   'model_type={}'.format(model_type),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'batch_size={}'.format(batch_size),
                                   'statistics.pickle')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename),
                            'holdout_fold={}'.format(holdout_fold),
                            'model_type={}'.format(model_type),
                            'loss_type={}'.format(loss_type),
                            'augmentation={}'.format(augmentation),
                            'batch_size={}'.format(batch_size))
    create_logging(logs_dir, 'w')
    logging.info(args)

    if 'cuda' in device:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Model
    assert model_type, 'Please specify model_type!'
    Model = eval(model_type)
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
                  classes_num)

    if resume_iteration:
        resume_checkpoint_path = os.path.join(
            checkpoints_dir, '{}_iterations.pth'.format(resume_iteration))
        logging.info(
            'Load resume model from {}'.format(resume_checkpoint_path))
        resume_checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(resume_checkpoint['model'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = resume_checkpoint['iteration']
    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in device:
        model.to(device)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Dataset
    dataset = DCASE2017Task4Dataset()

    # Sampler
    train_sampler = TrainSampler(hdf5_path=train_hdf5_path,
                                 batch_size=batch_size *
                                 2 if 'mixup' in augmentation else batch_size)

    test_sampler = TestSampler(hdf5_path=test_hdf5_path, batch_size=batch_size)

    evaluate_sampler = TestSampler(hdf5_path=evaluate_hdf5_path,
                                   batch_size=batch_size)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=collate_fn,
                                               num_workers=num_workers,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_sampler=test_sampler,
                                              collate_fn=collate_fn,
                                              num_workers=num_workers,
                                              pin_memory=True)

    evaluate_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=evaluate_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    if 'mixup' in augmentation:
        mixup_augmenter = Mixup(mixup_alpha=1.)

    # Evaluator
    evaluator = Evaluator(model=model)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    train_bgn_time = time.time()

    # Train on mini batches
    for batch_data_dict in train_loader:

        # Evaluate
        if (iteration % 1000 == 0
                and iteration > resume_iteration):  # or (iteration == 0):

            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for (data_type, data_loader, reference_csv_path) in [
                ('test', test_loader, test_reference_csv_path),
                ('evaluate', evaluate_loader, evaluate_reference_csv_path)
            ]:

                # Calculate tatistics
                (statistics, _) = evaluator.evaluate(data_loader,
                                                     reference_csv_path,
                                                     tmp_submission_path)

                logging.info('{} statistics:'.format(data_type))
                logging.info('    Clipwise mAP: {:.3f}'.format(
                    np.mean(statistics['clipwise_ap'])))
                logging.info('    Framewise mAP: {:.3f}'.format(
                    np.mean(statistics['framewise_ap'])))
                logging.info('    {}'.format(
                    statistics['sed_metrics']['overall']['error_rate']))

                statistics_container.append(data_type, iteration, statistics)

            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 10000 == 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        if 'mixup' in augmentation:
            batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
                batch_size=len(batch_data_dict['waveform']))

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        # Train
        model.train()

        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            batch_target_dict = {
                'target':
                do_mixup(batch_data_dict['target'],
                         batch_data_dict['mixup_lambda'])
            }
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            batch_target_dict = {'target': batch_data_dict['target']}

        # loss
        loss = loss_func(batch_output_dict, batch_target_dict)
        print(iteration, loss)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == stop_iteration:
            break

        iteration += 1
def pack_maestro_dataset_to_hdf5(args):
    """Load & resample MAESTRO audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate

    # Paths
    csv_path = os.path.join(dataset_dir, 'maestro-v2.0.0.csv')
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Read meta dict
    meta_dict = read_metadata(csv_path)

    audios_num = len(meta_dict['canonical_composer'])
    logging.info('Total audios number: {}'.format(audios_num))

    feature_time = time.time()

    # Load & resample each audio file to a hdf5 file
    for n in range(audios_num):
        logging.info('{} {}'.format(n, meta_dict['midi_filename'][n]))

        # Read midi
        midi_path = os.path.join(dataset_dir, meta_dict['midi_filename'][n])
        midi_dict = read_midi(midi_path)

        # Load audio
        audio_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n])
        (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)

        packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(
            os.path.splitext(meta_dict['audio_filename'][n])[0]))

        create_folder(os.path.dirname(packed_hdf5_path))

        with h5py.File(packed_hdf5_path, 'w') as hf:
            hf.attrs.create('canonical_composer', data=meta_dict['canonical_composer'][n].encode(), dtype='S100')
            hf.attrs.create('canonical_title', data=meta_dict['canonical_title'][n].encode(), dtype='S100')
            hf.attrs.create('split', data=meta_dict['split'][n].encode(), dtype='S20')
            hf.attrs.create('year', data=meta_dict['year'][n].encode(), dtype='S10')
            hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100')
            hf.attrs.create('audio_filename', data=meta_dict['audio_filename'][n].encode(), dtype='S100')
            hf.attrs.create('duration', data=meta_dict['duration'][n], dtype=np.float32)

            hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
            hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
            hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
        
    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
Beispiel #13
0
def train(args):

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    pretrained_checkpoint_path = args.pretrained_checkpoint_path
    freeze_base = args.freeze_base
    loss_type = args.loss_type
    augmentation = args.augmentation
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    resume_iteration = args.resume_iteration
    stop_iteration = args.stop_iteration
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
    filename = args.filename
    num_workers = 8

    loss_func = get_loss_func(loss_type)
    pretrain = True if pretrained_checkpoint_path else False

    #TODO вернуть путь до полного набора обработанных данных

    hdf5_path = os.path.join(workspace, 'features_ramas',
                             'waveform_meta_test.h5')
    # hdf5_path = os.path.join(workspace, 'features', 'waveform.h5')

    checkpoints_dir = os.path.join(workspace, 'checkpoints')
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(workspace, 'statistics', filename,
                                   'holdout_fold={}'.format(holdout_fold),
                                   model_type, 'pretrain={}'.format(pretrain),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'batch_size={}'.format(batch_size),
                                   'freeze_base={}'.format(freeze_base),
                                   'statistics.pickle')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(workspace, 'logs', filename,
                            'holdout_fold={}'.format(holdout_fold), model_type,
                            'pretrain={}'.format(pretrain),
                            'loss_type={}'.format(loss_type),
                            'augmentation={}'.format(augmentation),
                            'batch_size={}'.format(batch_size),
                            'freeze_base={}'.format(freeze_base))
    create_logging(logs_dir, 'w')
    logging.info(args)

    if 'cuda' in device:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Model
    Model = eval(model_type)

    #TODO захардкодил classes num- это нехорошо
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, 4,
                  freeze_base)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    if pretrain:
        logging.info(
            'Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    dataset = GtzanDataset()

    validate_sampler = EvaluateSampler(hdf5_path=hdf5_path,
                                       holdout_fold=holdout_fold,
                                       batch_size=1)

    validate_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=validate_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    if 'cuda' in device:
        model.to(device)

    # Evaluator
    evaluator = Evaluator(model=model)

    torch.manual_seed(729720439)

    statistics, output_dict = evaluator.evaluate(validate_loader)
    logging.info('Validate precision: {:.3f}'.format(statistics['precision']))
    logging.info('Validate recall: {:.3f}'.format(statistics['recall']))
    logging.info('Validate f_score: {:.3f}'.format(statistics['f_score']))
    logging.info('\n' + str(statistics['cm']))

    df_audio = pd.read_csv(
        '/home/den/DATASETS/AUDIO/preprocessed/ramas/meta_test.csv')
    df_audio = df_audio[df_audio['cur_label'].isin(
        ['ang', 'hap', 'sad', 'neu'])]

    temp_df = pd.DataFrame(columns=['cur_name', 'hap', 'ang', 'sad', 'neu'])
    temp_df['cur_name'] = output_dict['audio_name']
    temp_df.loc[:, ['hap', 'ang', 'sad', 'neu']] = np.vstack(
        output_dict['clipwise_output2'])

    merge_df = pd.merge(df_audio, temp_df, on='cur_name', how='inner')
    merge_df.to_csv(
        '/home/den/Documents/diploma/panns/panns_ramas_inference.csv',
        index=False)
def train(args):

    # Arugments & parameters
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    pretrained_checkpoint_path = args.pretrained_checkpoint_path
    freeze_base = args.freeze_base
    freeze_base = True
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
    sample_rate = config.sample_rate
    classes_num = config.classes_num
    pretrain = True if pretrained_checkpoint_path else False

    # Model
    Model = eval(model_type)
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
                  classes_num, freeze_base)

    # Load pretrained model
    if pretrain:
        logging.info(
            'Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in device:
        model.to(device)

    print('Load pretrained model successfully!')
    ###############Copying main.py####################
    workspace_input = args.workspace_input
    workspace_output = args.workspace_output
    data_type = 'balanced_train'
    loss_type = 'clip_bce'
    balanced = 'balanced'
    augmentation = 'none'
    batch_size = 1
    learning_rate = 1e-3
    resume_iteration = 0
    early_stop = 100000
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')
    filename = args.filename
    num_workers = 8
    clip_samples = config.clip_samples
    loss_func = get_loss_func(loss_type)
    black_list_csv = 'metadata/black_list/groundtruth_weak_label_evaluation_set.csv'
    previous_loss = None

    train_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes',
                                           '{}.h5'.format(data_type))

    eval_bal_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s',
                                              'indexes', 'balanced_train.h5')

    eval_test_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s',
                                               'indexes', 'eval.h5')

    checkpoints_dir = os.path.join(
        workspace_output, 'checkpoints', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(
        workspace_output, 'statistics', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'statistics.pkl')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(
        workspace_output, 'logs', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))

    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if 'cuda' in str(device):
        logging.info('Using GPU.')
        device = 'cuda'
    else:
        logging.info('Using CPU.')
        device = 'cpu'

    # Model
    Model = eval(model_type)
    model = Model(sample_rate=sample_rate,
                  window_size=window_size,
                  hop_size=hop_size,
                  mel_bins=mel_bins,
                  fmin=fmin,
                  fmax=fmax,
                  classes_num=classes_num,
                  freeze_base=freeze_base)
    params_num = count_parameters(model)
    # flops_num = count_flops(model, clip_samples)
    logging.info('Parameters num: {}'.format(params_num))
    # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9))

    # Dataset will be used by DataLoader later. Dataset takes a meta as input
    # and return a waveform and a target.
    dataset = AudioSetDataset(clip_samples=clip_samples,
                              classes_num=classes_num)

    # Train sampler
    (train_sampler, train_collector) = get_train_sampler(
        balanced, augmentation,
        workspace_input + 'hdf5s/indexes/balanced_train.h5', black_list_csv,
        batch_size)

    # Evaluate sampler
    eval_bal_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input +
                                       'hdf5s/indexes/balanced_train.h5',
                                       batch_size=batch_size)

    eval_test_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input +
                                        'hdf5s/indexes/eval.h5',
                                        batch_size=batch_size)

    eval_collector = Collator(mixup_alpha=None)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=train_collector,
                                               num_workers=num_workers,
                                               pin_memory=True)

    eval_bal_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_bal_sampler,
        collate_fn=eval_collector,
        num_workers=num_workers,
        pin_memory=True)

    eval_test_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_test_sampler,
        collate_fn=eval_collector,
        num_workers=num_workers,
        pin_memory=True)

    # Evaluator
    bal_evaluator = Evaluator(model=model, generator=eval_bal_loader)
    test_evaluator = Evaluator(model=model, generator=eval_test_loader)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    train_bgn_time = time.time()
    if resume_iteration > 0:
        resume_checkpoint_path = os.path.join(
            workspace_input, 'checkpoints', filename,
            'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
            .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                    fmax), 'data_type={}'.format(data_type), model_type,
            'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
            'augmentation={}'.format(augmentation),
            'batch_size={}'.format(batch_size),
            '{}_iterations.pth'.format(resume_iteration))

        logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
        if torch.cuda.is_available():
            checkpoint = torch.load(resume_checkpoint_path)
        else:
            checkpoint = torch.load(resume_checkpoint_path, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        train_sampler.load_state_dict(checkpoint['sampler'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = checkpoint['iteration']

    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in str(device):
        model.to(device)

    time1 = time.time()

    for iterate_n, batch_data_dict in enumerate(train_loader):
        """batch_data_dict: {
            'audio_name': (batch_size [*2 if mixup],), 
            'waveform': (batch_size [*2 if mixup], clip_samples), 
            'target': (batch_size [*2 if mixup], classes_num), 
            (ifexist) 'mixup_lambda': (batch_size * 2,)}
        """

        # Evaluate
        if (iteration % 2000 == 0
                and iteration > resume_iteration) or (iteration == 0):
            train_fin_time = time.time()

            bal_statistics = bal_evaluator.evaluate()
            test_statistics = test_evaluator.evaluate()

            logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(bal_statistics['average_precision'])))

            logging.info('Validate test mAP: {:.3f}'.format(
                np.mean(test_statistics['average_precision'])))

            statistics_container.append(iteration,
                                        bal_statistics,
                                        data_type='bal')
            statistics_container.append(iteration,
                                        test_statistics,
                                        data_type='test')
            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Save model
        if iteration % 20000 == 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'optimizer': optimizer.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Move data to device
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        # Forward
        model.train()
        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {
                'target':
                do_mixup(batch_data_dict['target'],
                         batch_data_dict['mixup_lambda'])
            }
            """{'target': (batch_size, classes_num)}"""
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': batch_data_dict['target']}
            """{'target': (batch_size, classes_num)}"""
        loss = loss_func(batch_output_dict, batch_target_dict)
        # Loss
        # try:
        #     loss = loss_func(batch_output_dict, batch_target_dict)
        # except:
        #     tensor = batch_output_dict['clipwise_output'].detach().numpy()
        #     arr = -1. * np.where(tensor > 0,0.,tensor)
        #     batch_output_dict['clipwise_output'] = torch.tensor(np.where(arr > 1,1.,arr),requires_grad=True)
        #     loss = loss_func(batch_output_dict, batch_target_dict)
        # Backward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if iteration % 10 == 0:
            print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\
                .format(iteration, time.time() - time1))
            time1 = time.time()

        iteration += 1

        # Stop learning
        if iteration == early_stop:
            break
Beispiel #15
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: '1', which means using validation data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=True)

    # Evaluate on validation data
    evaluator.evaluate(data_type='validate',
                       submission_path=submission_path,
                       annotation_path=annotation_path,
                       yaml_path=yaml_path,
                       max_iteration=None)

    # Visualize
    if visualize:
        evaluator.visualize(data_type='validate')
Beispiel #16
0
def inference_validation(args):
    '''Inference validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics
          on all 1, 2, 3 and 4 folds. 
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      visualize: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    visualize = args.visualize
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    submissions_dir = os.path.join(
        workspace, 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'iteration={}'.format(iteration))
    create_folder(submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Inference and calculate metrics for a fold
    if holdout_fold != -1:

        features_dir = os.path.join(
            workspace, 'features',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins))

        scalar_path = os.path.join(
            workspace, 'scalars',
            '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                       'dev',
                                                       frames_per_second,
                                                       mel_bins), 'scalar.h5')

        checkoutpoint_path = os.path.join(
            workspace, 'models', filename,
            '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(
                model_name, '', audio_type, 'dev', frames_per_second,
                mel_bins), 'holdout_fold={}'.format(holdout_fold),
            'md_{}_iters.pth'.format(iteration))

        # Load scalar
        scalar = load_scalar(scalar_path)

        # Load model
        checkpoint = torch.load(checkoutpoint_path)
        model = checkpoint['model']

        if cuda:
            model.cuda()

        # Data generator
        data_generator = DataGenerator(features_dir=features_dir,
                                       scalar=scalar,
                                       batch_size=batch_size,
                                       holdout_fold=holdout_fold)

        # Evaluator
        evaluator = Evaluator(model=model,
                              data_generator=data_generator,
                              cuda=cuda)

        # Calculate metrics
        data_type = 'validate'
        list_dict = evaluator.evaluate(data_type=data_type)
        evaluator.metrics(list_dict=list_dict,
                          submissions_dir=submissions_dir,
                          metadata_dir=metadata_dir)

        # Visualize reference and predicted events, elevation and azimuth
        if visualize:
            evaluator.visualize(data_type=data_type)

    # Calculate metrics for all folds
    else:
        prediction_names = os.listdir(submissions_dir)
        prediction_paths = [os.path.join(submissions_dir, name) for \
            name in prediction_names]

        metrics = calculate_metrics(metadata_dir=metadata_dir,
                                    prediction_paths=prediction_paths)

        logging.info('Metrics of {} files: '.format(len(prediction_names)))
        for key in metrics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :', metrics[key]))
Beispiel #17
0
def inference_evaluation(args):
    '''Inference on evaluation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: 'none', which means using model trained on all development data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    evaluate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'evaluate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth')

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'best2_submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path,
                                       scalar=scalar,
                                       batch_size=batch_size)

    # Forward
    output_dict = forward(model=model,
                          generate_func=data_generator.generate(),
                          cuda=cuda,
                          return_target=False)

    # Write submission
    write_submission_csv(audio_names=output_dict['audio_name'],
                         outputs=output_dict['output'],
                         taxonomy_level=taxonomy_level,
                         submission_path=submission_path)
Beispiel #18
0
def train(args):
    '''Train. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      audio_type: 'foa' | 'mic'
      holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data 
          without validation for training
      model_name: string, e.g. 'Cnn_9layers'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    audio_type = args.audio_type
    holdout_fold = args.holdout_fold
    model_name = args.model_name
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    max_validate_num = 10  # Number of audio recordings to validate
    reduce_lr = True  # Reduce learning rate after several iterations

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    metadata_dir = os.path.join(dataset_dir, 'metadata_dev')

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev',
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    models_dir = os.path.join(
        workspace, 'models', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_folder(models_dir)

    temp_submissions_dir = os.path.join(
        workspace, '_temp', 'submissions', filename,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins))
    create_folder(temp_submissions_dir)

    logs_dir = os.path.join(
        args.workspace, 'logs', filename, args.mode,
        '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix,
                                                      audio_type, 'dev',
                                                      frames_per_second,
                                                      mel_bins),
        'holdout_fold={}'.format(holdout_fold))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_name)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    # Data generator
    data_generator = DataGenerator(features_dir=features_dir,
                                   scalar=scalar,
                                   batch_size=batch_size,
                                   holdout_fold=holdout_fold)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          max_validate_num=max_validate_num,
                          cuda=cuda)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0:

            logging.info('------------------------------------')
            logging.info('iteration: {}'.format(iteration))

            train_fin_time = time.time()
            train_list_dict = evaluator.evaluate(data_type='train')
            evaluator.metrics(train_list_dict, temp_submissions_dir,
                              metadata_dir)

            if holdout_fold != -1:
                validate_list_dict = evaluator.evaluate(data_type='validate')
                evaluator.metrics(validate_list_dict, temp_submissions_dir,
                                  metadata_dir)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            checkpoint = {
                'iteration': iteration,
                'model': model,
                'optimizer': optimizer
            }

            save_path = os.path.join(models_dir,
                                     'md_{}_iters.pth'.format(iteration))

            torch.save(checkpoint, save_path)
            logging.info('Model saved to {}'.format(save_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output_dict = model(batch_data_dict['feature'])
        loss = event_spatial_loss(batch_output_dict, batch_data_dict)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
Beispiel #19
0
def train(args):
    """Train AudioSet tagging model. 

    Args:
      dataset_dir: str
      workspace: str
      data_type: 'balanced_train' | 'full_train'
      window_size: int
      hop_size: int
      mel_bins: int
      model_type: str
      loss_type: 'clip_bce'
      balanced: 'none' | 'balanced' | 'alternate'
      augmentation: 'none' | 'mixup'
      batch_size: int
      learning_rate: float
      resume_iteration: int
      early_stop: int
      accumulation_steps: int
      cuda: bool
    """

    # Arugments & parameters
    workspace = args.workspace
    data_type = args.data_type
    sample_rate = args.sample_rate
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    loss_type = args.loss_type
    balanced = args.balanced
    augmentation = args.augmentation
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    resume_iteration = args.resume_iteration
    early_stop = args.early_stop
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')
    filename = args.filename

    num_workers = 128
    prefetch_factor = 4

    #os.environ["MASTER_ADDR"] = "localhost"
    #os.environ["MASTER_PORT"] = "12355"
    #dist.init_process_group("nccl", rank=rank, world_size=args.world_size)

    clip_samples = config.clip_samples
    classes_num = config.classes_num
    loss_func = get_loss_func(loss_type)

    # Paths
    black_list_csv = None

    train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                           '{}.h5'.format(data_type))

    eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                              'balanced_train.h5')

    eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                               'eval.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size),
        datetime.datetime.now().strftime("%d%m%Y_%H%M%S"))

    #if rank == 0:
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(
        workspace, 'statistics', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'statistics.pkl')

    #if rank == 0:
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))

    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if 'cuda' in str(device):
        logging.info('Using GPU.')
        device = 'cuda'
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')
        device = 'cpu'

    # Model
    Model = eval(model_type)
    model = Model(sample_rate=sample_rate,
                  window_size=window_size,
                  hop_size=hop_size,
                  mel_bins=mel_bins,
                  fmin=fmin,
                  fmax=fmax,
                  classes_num=classes_num)

    params_num = count_parameters(model)
    # flops_num = count_flops(model, clip_samples)
    logging.info('Parameters num: {}'.format(params_num))
    # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9))

    # Dataset will be used by DataLoader later. Dataset takes a meta as input
    # and return a waveform and a target.
    dataset = AudioSetDataset(sample_rate=sample_rate)

    # Train sampler
    if balanced == 'none':
        Sampler = TrainSampler
    elif balanced == 'balanced':
        Sampler = BalancedTrainSampler
    elif balanced == 'alternate':
        Sampler = AlternateTrainSampler

    train_sampler = Sampler(indexes_hdf5_path=train_indexes_hdf5_path,
                            batch_size=batch_size *
                            2 if 'mixup' in augmentation else batch_size,
                            black_list_csv=black_list_csv)

    # Evaluate sampler
    eval_bal_sampler = EvaluateSampler(
        indexes_hdf5_path=eval_bal_indexes_hdf5_path,
        batch_size=2 * batch_size)

    eval_test_sampler = EvaluateSampler(
        indexes_hdf5_path=eval_test_indexes_hdf5_path,
        batch_size=2 * batch_size)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=collate_fn,
                                               num_workers=num_workers,
                                               pin_memory=True,
                                               prefetch_factor=prefetch_factor)

    eval_bal_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_bal_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=prefetch_factor)

    eval_test_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_test_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=prefetch_factor)

    if 'mixup' in augmentation:
        mixup_augmenter = Mixup(mixup_alpha=1.)

    # Evaluator
    evaluator = Evaluator(model=model)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    train_bgn_time = time.time()

    # Resume training
    if resume_iteration > 0:
        resume_checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
            .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                    fmax), 'data_type={}'.format(data_type), model_type,
            'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
            'augmentation={}'.format(augmentation),
            'batch_size={}'.format(batch_size),
            '{}_iterations.pth'.format(resume_iteration))

        logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
        checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(checkpoint['model'])
        train_sampler.load_state_dict(checkpoint['sampler'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = checkpoint['iteration']

    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in str(device):
        model.to(device)
        #model = model.cuda(rank)

    #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
    #print([(s[0], s[1].is_cuda) for s in model.named_parameters()])

    time1 = time.time()

    prev_bal_map = 0.0
    prev_test_map = 0.0
    save_bal_model = 0
    save_test_model = 0

    for batch_data_dict in train_loader:
        """batch_data_dict: {
            'audio_name': (batch_size [*2 if mixup],), 
            'waveform': (batch_size [*2 if mixup], clip_samples), 
            'target': (batch_size [*2 if mixup], classes_num), 
            (ifexist) 'mixup_lambda': (batch_size * 2,)}
        """
        #print(batch_data_dict)
        # Evaluate
        if (iteration % 2000 == 0
                and iteration > resume_iteration) or (iteration == -1):
            train_fin_time = time.time()

            bal_statistics = evaluator.evaluate(eval_bal_loader)
            test_statistics = evaluator.evaluate(eval_test_loader)

            logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(bal_statistics['average_precision'])))

            logging.info('Validate test mAP: {:.3f}'.format(
                np.mean(test_statistics['average_precision'])))

            save_bal_model = 1 if np.mean(
                bal_statistics['average_precision']) > prev_bal_map else 0
            save_test_model = 1 if np.mean(
                test_statistics['average_precision']) > prev_test_map else 0

            statistics_container.append(iteration,
                                        bal_statistics,
                                        data_type='bal')
            statistics_container.append(iteration,
                                        test_statistics,
                                        data_type='test')
            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Save model
        if iteration % 100000 == 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        if save_bal_model:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations_bal.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            save_bal_model = 0

        if save_test_model:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations_test.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            save_test_model = 0

        # Mixup lambda
        if 'mixup' in augmentation:
            batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
                batch_size=len(batch_data_dict['waveform']))

        # Move data to device
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        # Forward
        model.train()

        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {
                'target':
                do_mixup(batch_data_dict['target'],
                         batch_data_dict['mixup_lambda'])
            }
            """{'target': (batch_size, classes_num)}"""
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': batch_data_dict['target']}
            """{'target': (batch_size, classes_num)}"""

        # Loss
        loss = loss_func(batch_output_dict, batch_target_dict)

        # Backward
        loss.backward()
        print(loss)

        optimizer.step()
        optimizer.zero_grad()

        if iteration % 10 == 0:
            print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\
                .format(iteration, time.time() - time1))
            time1 = time.time()

        # Stop learning
        if iteration == early_stop:
            break

        iteration += 1
Beispiel #20
0
def pack_waveforms_to_hdf5(args):
    """Pack waveform and target of several audio clips to a single hdf5 file. 
    This can speed up loading and training.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveforms_hdf5_path = args.waveforms_hdf5_path
    mini_data = args.mini_data

    clip_samples = config.clip_samples
    classes_num = config.classes_num
    sample_rate = config.sample_rate
    id_to_ix = config.id_to_ix

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveforms_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveforms_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path, classes_num, id_to_ix)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveforms_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, clip_samples)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Pack waveform & target of several audio clips to a single hdf5 file
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, clip_samples)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

    logging.info('Write to {}'.format(waveforms_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
Beispiel #21
0
def train(args):
    """Train AudioSet tagging model. 

    Args:
      dataset_dir: str
      workspace: str
      data_type: 'balanced_train' | 'unbalanced_train'
      frames_per_second: int
      mel_bins: int
      model_type: str
      loss_type: 'bce'
      balanced: bool
      augmentation: str
      batch_size: int
      learning_rate: float
      resume_iteration: int
      early_stop: int
      accumulation_steps: int
      cuda: bool
    """

    # Arugments & parameters
    # dataset_dir = args.dataset_dir
    workspace = args.workspace
    data_type = args.data_type
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    loss_type = args.loss_type
    balanced = args.balanced
    augmentation = args.augmentation
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    resume_iteration = args.resume_iteration
    early_stop = args.early_stop
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')
    filename = args.filename

    num_workers = 0
    sample_rate = config.sample_rate
    audio_length = config.audio_length
    classes_num = config.classes_num
    assert loss_type == 'clip_bce'

    # Paths
    black_list_csv = os.path.join(workspace, 'black_list',
                                  'dcase2017task4.csv')

    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'waveforms')

    # Target hdf5 path
    eval_train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets',
                                                'balanced_train.h5')

    eval_test_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets',
                                               'eval.h5')

    if data_type == 'balanced_train':
        train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets',
                                               'balanced_train.h5')
    elif data_type == 'full_train':
        train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets',
                                               'full_train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(
        workspace, 'statistics', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'statistics.pkl')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))

    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if 'cuda' in str(device):
        logging.info('Using GPU.')
        device = 'cuda'
    else:
        logging.info('Using CPU.')
        device = 'cpu'

    # Model
    model = Cnn13(audio_length, sample_rate, window_size, hop_size, mel_bins,
                  fmin, fmax, classes_num)
    model.summary()
    logging.info('Parameters number: {}'.format(model.count_params()))

    # Optimizer
    optimizer = keras.optimizers.Adam(lr=learning_rate,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      amsgrad=True)

    # Loss
    loss = keras.losses.binary_crossentropy

    model.compile(loss=loss, optimizer=optimizer)

    # Dataset will be used by DataLoader later. Provide an index and return
    # waveform and target of audio
    train_dataset = AudioSetDataset(target_hdf5_path=train_targets_hdf5_path,
                                    waveform_hdf5s_dir=waveform_hdf5s_dir,
                                    audio_length=audio_length,
                                    classes_num=classes_num)

    bal_dataset = AudioSetDataset(
        target_hdf5_path=eval_train_targets_hdf5_path,
        waveform_hdf5s_dir=waveform_hdf5s_dir,
        audio_length=audio_length,
        classes_num=classes_num)

    test_dataset = AudioSetDataset(
        target_hdf5_path=eval_test_targets_hdf5_path,
        waveform_hdf5s_dir=waveform_hdf5s_dir,
        audio_length=audio_length,
        classes_num=classes_num)

    # Sampler
    if balanced == 'balanced':
        if 'mixup' in augmentation:
            train_sampler = BalancedSamplerMixup(
                target_hdf5_path=train_targets_hdf5_path,
                black_list_csv=black_list_csv,
                batch_size=batch_size,
                start_mix_epoch=1)
            train_collector = Collator(mixup_alpha=1.)
            assert batch_size % torch.cuda.device_count(
            ) == 0, 'To let mixup working properly this must be satisfied.'
        else:
            train_sampler = BalancedSampler(
                target_hdf5_path=train_targets_hdf5_path,
                black_list_csv=black_list_csv,
                batch_size=batch_size)
            train_collector = Collator(mixup_alpha=None)

    bal_sampler = EvaluateSampler(dataset_size=len(bal_dataset),
                                  batch_size=batch_size)

    test_sampler = EvaluateSampler(dataset_size=len(test_dataset),
                                   batch_size=batch_size)

    eval_collector = Collator(mixup_alpha=None)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=train_collector,
                                               num_workers=num_workers,
                                               pin_memory=True)

    bal_loader = torch.utils.data.DataLoader(dataset=bal_dataset,
                                             batch_sampler=bal_sampler,
                                             collate_fn=eval_collector,
                                             num_workers=num_workers,
                                             pin_memory=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_sampler=test_sampler,
                                              collate_fn=eval_collector,
                                              num_workers=num_workers,
                                              pin_memory=True)

    # Evaluator
    bal_evaluator = Evaluator(model=model, generator=bal_loader)

    test_evaluator = Evaluator(model=model, generator=test_loader)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    train_bgn_time = time.time()

    # Resume training
    if resume_iteration > 0:
        resume_weights_path = os.path.join(
            checkpoints_dir,
            '{}_iterations.weights.h5'.format(resume_iteration))
        resume_sampler_path = os.path.join(
            checkpoints_dir,
            '{}_iterations.sampler.h5'.format(resume_iteration))
        iteration = resume_iteration

        model.load_weights(resume_weights_path)
        sampler_state_dict = cPickle.load(open(resume_sampler_path, 'rb'))
        train_sampler.load_state_dict(sampler_state_dict)
        statistics_container.load_state_dict(resume_iteration)

    else:
        iteration = 0

    t_ = time.time()

    for batch_data_dict in train_loader:

        # Evaluate
        if (iteration % 2000 == 0
                and iteration > resume_iteration) or (iteration == 0):
            train_fin_time = time.time()

            bal_statistics = bal_evaluator.evaluate()
            test_statistics = test_evaluator.evaluate()

            logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(bal_statistics['average_precision'])))

            logging.info('Validate test mAP: {:.3f}'.format(
                np.mean(test_statistics['average_precision'])))

            statistics_container.append(iteration,
                                        bal_statistics,
                                        data_type='bal')
            statistics_container.append(iteration,
                                        test_statistics,
                                        data_type='test')
            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Save model
        # if iteration % 20000 == 0 and iteration > resume_iteration:
        if iteration == 10:
            weights_path = os.path.join(
                checkpoints_dir, '{}_iterations.weights.h5'.format(iteration))

            sampler_path = os.path.join(
                checkpoints_dir, '{}_iterations.sampler.h5'.format(iteration))

            model.save_weights(weights_path)
            cPickle.dump(train_sampler.state_dict(), open(sampler_path, 'wb'))

            logging.info('Model weights saved to {}'.format(weights_path))
            logging.info('Sampler saved to {}'.format(sampler_path))
        '''
        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda'])
            batch_target_dict = {'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda'])}
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            batch_target_dict = {'target': batch_data_dict['target']}
        '''

        loss = model.train_on_batch(x=batch_data_dict['waveform'],
                                    y=batch_data_dict['target'])
        print(iteration, loss)

        iteration += 1

        # Stop learning
        if iteration == early_stop:
            break
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters

    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    n_epoch = args.n_epoch
    batch_size = args.batch_size
    valid_source = args.valid_source
    pretrained = args.pretrained
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second
    max_iteration = 500  # Number of mini-batches to evaluate on training data
    reduce_lr = False

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if pretrained == 'none':
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type,
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type)
        create_logging(logs_dir, 'w')

    else:
        checkpoints_dir = os.path.join(
            workspace, 'checkpoints', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_folder(checkpoints_dir)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume',
            'validate_statistics.pickle')
        create_folder(os.path.dirname(validate_statistics_path))

        logs_dir = os.path.join(
            workspace, 'logs', filename, args.mode,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'.format(segment_seconds,
                                                     hop_seconds, pad_type),
            'holdout_fold={}'.format(holdout_fold), model_type, 'resume')
        create_logging(logs_dir, 'w')

    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    if pretrained != 'none':
        model.load_state_dict(torch.load(pretrained)['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Calculate total iteration required for n_epoch
    iter_per_epoch = np.ceil(
        len(data_generator.train_segments_indexes) / batch_size).astype(int)
    total_iter = iter_per_epoch * n_epoch

    # Define Warm-up LR scheduler
    epoch_to_warm = 10
    epoch_to_flat = 200

    def _warmup_lr(optimizer,
                   iteration,
                   iter_per_epoch,
                   epoch_to_warm,
                   min_lr=0,
                   max_lr=0.0035):
        delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm
        lr = min_lr + delta * iteration
        for p in optimizer.param_groups:
            p['lr'] = lr
        return lr

    # Optimizer
    criterion = FocalLoss(2)
    # metric_loss = RingLoss(type='auto', loss_weight=1.0)
    metric_loss = ArcFaceLoss()
    if cuda:
        metric_loss.cuda()
    optimizer = Nadam(model.parameters(),
                      lr=0.0035,
                      betas=(0.9, 0.999),
                      eps=1e-8,
                      weight_decay=0,
                      schedule_decay=4e-3)
    scheduler = CosineLRWithRestarts(
        optimizer,
        batch_size,
        len(data_generator.train_segments_indexes),
        restart_period=epoch_to_flat - epoch_to_warm + 1,
        t_mult=1,
        verbose=True)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Valid source
    if valid_source == 'curated':
        target_sources = ['curated']
    elif valid_source == 'noisy':
        target_sources = ['noisy']
    elif valid_source == 'both':
        target_sources = ['curated', 'noisy']

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    epoch = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 2500 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            # Evaluate on partial of train data
            # logging.info('Train statistics:')

            # for target_source in target_sources:
            #     validate_curated_statistics = evaluator.evaluate(
            #         data_type='train',
            #         target_source=target_source,
            #         max_iteration=max_iteration,
            #         verbose=False)

            # Evaluate on holdout validation data
            if holdout_fold != 'none':
                logging.info('Validate statistics:')

                for target_source in target_sources:
                    validate_curated_statistics = evaluator.evaluate(
                        data_type='validate',
                        target_source=target_source,
                        max_iteration=None,
                        verbose=False)

                    validate_statistics_container.append(
                        iteration, target_source, validate_curated_statistics)

                validate_statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 2500 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'mask', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_feature, batch_output = model(batch_data_dict['feature'],
                                            is_infer=False)

        # loss
        loss = criterion(batch_output,
                         batch_data_dict['target']) + metric_loss(
                             batch_feature, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()

        # LR Warm up
        if iteration < epoch_to_warm * iter_per_epoch:
            cur_lr = _warmup_lr(optimizer,
                                iteration,
                                iter_per_epoch,
                                epoch_to_warm=epoch_to_warm,
                                min_lr=0,
                                max_lr=0.0035)

        loss.backward()
        optimizer.step()

        if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch:
            if data_generator.pointer >= len(
                    data_generator.train_segments_indexes):
                scheduler.step()
            scheduler.batch_step()

        # Show LR information
        if iteration % iter_per_epoch == 0 and iteration != 0:
            epoch += 1
            if epoch % 10 == 0:
                for p in optimizer.param_groups:
                    logging.info(
                        'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}'
                        .format(epoch, iteration, p['lr']))

        # Stop learning
        if iteration == total_iter:
            break

        iteration += 1

        if iteration == epoch_to_warm * iter_per_epoch:
            scheduler.step()

        if iteration == epoch_to_flat * iter_per_epoch:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 1e-5
def download_wavs(args):
    """Download videos and extract audio in wav format.
    """

    # Paths
    csv_path = args.csv_path
    audios_dir = args.audios_dir
    mini_data = args.mini_data

    if mini_data:
        logs_dir = '_logs/download_dataset/{}'.format(get_filename(csv_path))
    else:
        logs_dir = '_logs/download_dataset_minidata/{}'.format(
            get_filename(csv_path))

    create_folder(audios_dir)
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Download log is saved to {}'.format(logs_dir))

    # Read csv
    with open(csv_path, 'r') as f:
        lines = f.readlines()

    lines = lines[3:]  # Remove csv head info

    if mini_data:
        lines = lines[0:10]  # Download small data for debug

    download_time = time.time()

    # Download
    for (n, line) in enumerate(lines):

        items = line.split(', ')
        audio_id = items[0]
        start_time = float(items[1])
        end_time = float(items[2])
        duration = end_time - start_time

        logging.info('{} {} start_time: {:.1f}, end_time: {:.1f}'.format(
            n, audio_id, start_time, end_time))

        # Download full video of whatever format
        video_name = os.path.join(audios_dir, '_Y{}.%(ext)s'.format(audio_id))
        os.system("youtube-dl --quiet -o '{}' -x https://www.youtube.com/watch?v={}"\
            .format(video_name, audio_id))

        video_paths = glob.glob(
            os.path.join(audios_dir, '_Y' + audio_id + '.*'))

        # If download successful
        if len(video_paths) > 0:
            video_path = video_paths[0]  # Choose one video

            # Add 'Y' to the head because some video ids are started with '-'
            # which will cause problem
            audio_path = os.path.join(audios_dir, 'Y' + audio_id + '.wav')

            # Extract audio in wav format
            os.system("ffmpeg -loglevel panic -i {} -ac 1 -ar 32000 -ss {} -t 00:00:{} {} "\
                .format(video_path,
                str(datetime.timedelta(seconds=start_time)), duration,
                audio_path))

            # Remove downloaded video
            os.system("rm {}".format(video_path))

            logging.info("Download and convert to {}".format(audio_path))

    logging.info(
        'Download finished! Time spent: {:.3f} s'.format(time.time() -
                                                         download_time))

    logging.info('Logs can be viewed in {}'.format(logs_dir))
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      holdout_fold: '1', '2', '3', '4'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool, visualize the logmel spectrogram of segments
    '''

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    resume = args.resume
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename

    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    curated_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_curated.h5')

    noisy_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    curated_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_curated_cross_validation.csv')

    noisy_cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        'train_noisy_cross_validation.csv')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if not resume:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type,
            '{}_iterations.pth'.format(iteration))
    else:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations.pth'.format(iteration))

    figs_dir = os.path.join(workspace, 'figures')
    create_folder(figs_dir)

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'train_source={}'.format(train_source),
        'segment={}s,hop={}s,pad_type={}'
        ''.format(segment_seconds, hop_seconds, pad_type),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(
        curated_feature_hdf5_path=curated_feature_hdf5_path,
        noisy_feature_hdf5_path=noisy_feature_hdf5_path,
        curated_cross_validation_path=curated_cross_validation_path,
        noisy_cross_validation_path=noisy_cross_validation_path,
        train_source=train_source,
        holdout_fold=holdout_fold,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Evaluate
    for target_source in ['curated', 'noisy']:
        validate_curated_statistics = evaluator.evaluate(
            data_type='validate',
            target_source=target_source,
            max_iteration=None,
            verbose=True)

        # Visualize
        if visualize:
            save_fig_path = os.path.join(figs_dir,
                                         '{}_logmel.png'.format(target_source))

            validate_curated_statistics = evaluator.visualize(
                data_type='validate',
                target_source=target_source,
                save_fig_path=save_fig_path,
                max_iteration=None,
                verbose=False)
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    fixed = args.fixed
    finetune = args.finetune
    ite_train = args.ite_train
    ite_eva = args.ite_eva
    ite_store = args.ite_store
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None      # Number of mini-batches to evaluate on training data
    reduce_lr = True
    
    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'validate_statistics.pickle')
    
    create_folder(os.path.dirname(validate_statistics_path))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)
    
    # Model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        if fixed=='True':
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True)
        else :
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False)
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()
    
    # Optimizer
    if fixed=='True':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999),
                         eps=1e-08, weight_decay=0., amsgrad=True)
    else :
        optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999),
                               eps=1e-08, weight_decay=0., amsgrad=True)

    if finetune=='True':
        model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth'
        device = torch.device('cuda')
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model'])
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    # Statistics
    validate_statistics_container = StatisticsContainer(validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():
        
        # Evaluate
        #1800
        if iteration % 200 == 0 and iteration > ite_eva:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(
                    data_type='train', 
                    source=source, 
                    max_iteration=None, 
                    verbose=False)
            
            if holdout_fold != 'none':
                for source in sources_to_evaluate:
                    validate_statistics = evaluator.evaluate(
                        data_type='validate', 
                        source=source, 
                        max_iteration=None, 
                        verbose=False)

                    validate_statistics_container.append_and_dump(
                        iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'Train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 200 == 0 and iteration > ite_store:
            checkpoint = {
                'iteration': iteration, 
                'model': model.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            
        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.93
        
        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']:
                batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
        
        # Train
#         batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns'])
#         loss = loss_func(batch_output, batch_data_dict['target'])
    
        # Using Mixup
        model.train()
        mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2)
        batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4)

        if batch_output.shape[1] == 10: # single scale models
            loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam)
        else:                  # multi scale models
            losses = []
            for ite in range(batch_output.shape[1]-1):
                loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam)
                losses.append(loss)
            loss = sum(losses)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        # 12000 for scratch
        if iteration == ite_train:
            break
            
        iteration += 1
Beispiel #26
0
def train(args):
    """Train a piano transcription system.

    Args:
      workspace: str, directory of your workspace
      model_type: str, e.g. 'Regressonset_regressoffset_frame_velocity_CRNN'
      loss_type: str, e.g. 'regress_onset_offset_frame_velocity_bce'
      augmentation: str, e.g. 'none'
      batch_size: int
      learning_rate: float
      reduce_iteration: int
      resume_iteration: int
      early_stop: int
      device: 'cuda' | 'cpu'
      mini_data: bool
    """

    # Arugments & parameters
    workspace = args.workspace
    model_type = args.model_type
    loss_type = args.loss_type
    augmentation = args.augmentation
    max_note_shift = args.max_note_shift
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    reduce_iteration = args.reduce_iteration
    resume_iteration = args.resume_iteration
    early_stop = args.early_stop
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')
    mini_data = args.mini_data
    filename = args.filename

    sample_rate = config.sample_rate
    segment_seconds = config.segment_seconds
    hop_seconds = config.hop_seconds
    segment_samples = int(segment_seconds * sample_rate)
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    num_workers = 8

    # Loss function
    loss_func = get_loss_func(loss_type)

    # Paths
    hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro')

    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename,
                                   model_type,
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'max_note_shift={}'.format(max_note_shift),
                                   'batch_size={}'.format(batch_size))
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(workspace, 'statistics', filename,
                                   model_type,
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'max_note_shift={}'.format(max_note_shift),
                                   'batch_size={}'.format(batch_size),
                                   'statistics.pkl')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(workspace, 'logs', filename, model_type,
                            'loss_type={}'.format(loss_type),
                            'augmentation={}'.format(augmentation),
                            'max_note_shift={}'.format(max_note_shift),
                            'batch_size={}'.format(batch_size))
    create_folder(logs_dir)

    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if 'cuda' in str(device):
        logging.info('Using GPU.')
        device = 'cuda'
    else:
        logging.info('Using CPU.')
        device = 'cpu'

    # Model
    Model = eval(model_type)
    model = Model(frames_per_second=frames_per_second, classes_num=classes_num)

    if augmentation == 'none':
        augmentor = None
    elif augmentation == 'aug':
        augmentor = Augmentor()
    else:
        raise Exception('Incorrect argumentation!')

    # Dataset
    train_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir,
                                   segment_seconds=segment_seconds,
                                   frames_per_second=frames_per_second,
                                   max_note_shift=max_note_shift,
                                   augmentor=augmentor)

    evaluate_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir,
                                      segment_seconds=segment_seconds,
                                      frames_per_second=frames_per_second,
                                      max_note_shift=0)

    # Sampler for training
    train_sampler = Sampler(hdf5s_dir=hdf5s_dir,
                            split='train',
                            segment_seconds=segment_seconds,
                            hop_seconds=hop_seconds,
                            batch_size=batch_size,
                            mini_data=mini_data)

    # Sampler for evaluation
    evaluate_train_sampler = TestSampler(hdf5s_dir=hdf5s_dir,
                                         split='train',
                                         segment_seconds=segment_seconds,
                                         hop_seconds=hop_seconds,
                                         batch_size=batch_size,
                                         mini_data=mini_data)

    evaluate_validate_sampler = TestSampler(hdf5s_dir=hdf5s_dir,
                                            split='validation',
                                            segment_seconds=segment_seconds,
                                            hop_seconds=hop_seconds,
                                            batch_size=batch_size,
                                            mini_data=mini_data)

    evaluate_test_sampler = TestSampler(hdf5s_dir=hdf5s_dir,
                                        split='test',
                                        segment_seconds=segment_seconds,
                                        hop_seconds=hop_seconds,
                                        batch_size=batch_size,
                                        mini_data=mini_data)

    # Dataloader
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=collate_fn,
                                               num_workers=num_workers,
                                               pin_memory=True)

    evaluate_train_loader = torch.utils.data.DataLoader(
        dataset=evaluate_dataset,
        batch_sampler=evaluate_train_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    validate_loader = torch.utils.data.DataLoader(
        dataset=evaluate_dataset,
        batch_sampler=evaluate_validate_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    test_loader = torch.utils.data.DataLoader(
        dataset=evaluate_dataset,
        batch_sampler=evaluate_test_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    # Evaluator
    evaluator = SegmentEvaluator(model, batch_size)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Resume training
    if resume_iteration > 0:
        resume_checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename, model_type,
            'loss_type={}'.format(loss_type),
            'augmentation={}'.format(augmentation),
            'batch_size={}'.format(batch_size),
            '{}_iterations.pth'.format(resume_iteration))

        logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
        checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(checkpoint['model'])
        train_sampler.load_state_dict(checkpoint['sampler'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = checkpoint['iteration']

    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in str(device):
        model.to(device)

    train_bgn_time = time.time()

    for batch_data_dict in train_loader:

        # Evaluation
        if iteration % 5000 == 0:  # and iteration > 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            evaluate_train_statistics = evaluator.evaluate(
                evaluate_train_loader)
            validate_statistics = evaluator.evaluate(validate_loader)
            test_statistics = evaluator.evaluate(test_loader)

            logging.info(
                '    Train statistics: {}'.format(evaluate_train_statistics))
            logging.info(
                '    Validation statistics: {}'.format(validate_statistics))
            logging.info('    Test statistics: {}'.format(test_statistics))

            statistics_container.append(iteration,
                                        evaluate_train_statistics,
                                        data_type='train')
            statistics_container.append(iteration,
                                        validate_statistics,
                                        data_type='validation')
            statistics_container.append(iteration,
                                        test_statistics,
                                        data_type='test')
            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 20000 == 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if iteration % reduce_iteration == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to device
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        model.train()
        batch_output_dict = model(batch_data_dict['waveform'])

        loss = loss_func(model, batch_output_dict, batch_data_dict)

        print(iteration, loss)

        # Backward
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        # Stop learning
        if iteration == early_stop:
            break

        iteration += 1
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
Beispiel #28
0
def train(args):
    """Train AudioSet tagging model. 

    Args:
      dataset_dir: str
      workspace: str
      data_type: 'balanced_train' | 'unbalanced_train'
      frames_per_second: int
      mel_bins: int
      model_type: str
      loss_type: 'bce'
      balanced: bool
      augmentation: str
      batch_size: int
      learning_rate: float
      resume_iteration: int
      early_stop: int
      accumulation_steps: int
      cuda: bool
    """

    # Arugments & parameters
    workspace = args.workspace
    data_type = args.data_type
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    loss_type = args.loss_type
    balanced = args.balanced
    augmentation = args.augmentation
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    resume_iteration = args.resume_iteration
    early_stop = args.early_stop
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')
    filename = args.filename

    num_workers = 8
    sample_rate = config.sample_rate
    clip_samples = config.clip_samples
    classes_num = config.classes_num
    loss_func = get_loss_func(loss_type)

    # Paths
    black_list_csv = os.path.join(workspace, 'black_list',
                                  'dcase2017task4.csv')

    train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                           '{}.h5'.format(data_type))

    eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                              'balanced_train.h5')

    eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
                                               'eval.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(
        workspace, 'statistics', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'statistics.pkl')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename,
        'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
        .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                fmax), 'data_type={}'.format(data_type), model_type,
        'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size))

    create_logging(logs_dir, filemode='w')
    logging.info(args)

    if 'cuda' in str(device):
        logging.info('Using GPU.')
        device = 'cuda'
    else:
        logging.info('Using CPU.')
        device = 'cpu'

    # Model
    Model = eval(model_type)
    model = Model(sample_rate=sample_rate,
                  window_size=window_size,
                  hop_size=hop_size,
                  mel_bins=mel_bins,
                  fmin=fmin,
                  fmax=fmax,
                  classes_num=classes_num)

    params_num = count_parameters(model)
    # flops_num = count_flops(model, clip_samples)
    logging.info('Parameters num: {}'.format(params_num))
    # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9))

    # Dataset will be used by DataLoader later. Dataset takes a meta as input
    # and return a waveform and a target.
    dataset = AudioSetDataset(clip_samples=clip_samples,
                              classes_num=classes_num)

    # Train sampler
    (train_sampler,
     train_collector) = get_train_sampler(balanced, augmentation,
                                          train_indexes_hdf5_path,
                                          black_list_csv, batch_size)

    # Evaluate sampler
    eval_bal_sampler = EvaluateSampler(
        indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=batch_size)

    eval_test_sampler = EvaluateSampler(
        indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=batch_size)

    eval_collector = Collator(mixup_alpha=None)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=train_collector,
                                               num_workers=num_workers,
                                               pin_memory=True)

    eval_bal_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_bal_sampler,
        collate_fn=eval_collector,
        num_workers=num_workers,
        pin_memory=True)

    eval_test_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=eval_test_sampler,
        collate_fn=eval_collector,
        num_workers=num_workers,
        pin_memory=True)

    # Evaluator
    bal_evaluator = Evaluator(model=model, generator=eval_bal_loader)
    test_evaluator = Evaluator(model=model, generator=eval_test_loader)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    train_bgn_time = time.time()

    # Resume training
    if resume_iteration > 0:
        resume_checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
            .format(sample_rate, window_size, hop_size, mel_bins, fmin,
                    fmax), 'data_type={}'.format(data_type), model_type,
            'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
            'augmentation={}'.format(augmentation),
            'batch_size={}'.format(batch_size),
            '{}_iterations.pth'.format(resume_iteration))

        logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
        checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(checkpoint['model'])
        train_sampler.load_state_dict(checkpoint['sampler'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = checkpoint['iteration']

    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in str(device):
        model.to(device)

    time1 = time.time()

    for batch_data_dict in train_loader:
        """batch_data_dict: {
            'audio_name': (batch_size [*2 if mixup],), 
            'waveform': (batch_size [*2 if mixup], clip_samples), 
            'target': (batch_size [*2 if mixup], classes_num), 
            (ifexist) 'mixup_lambda': (batch_size * 2,)}
        """

        # Evaluate
        if (iteration % 2000 == 0
                and iteration > resume_iteration) or (iteration == 0):
            train_fin_time = time.time()

            bal_statistics = bal_evaluator.evaluate()
            test_statistics = test_evaluator.evaluate()

            logging.info('Validate bal mAP: {:.3f}'.format(
                np.mean(bal_statistics['average_precision'])))

            logging.info('Validate test mAP: {:.3f}'.format(
                np.mean(test_statistics['average_precision'])))

            statistics_container.append(iteration,
                                        bal_statistics,
                                        data_type='bal')
            statistics_container.append(iteration,
                                        test_statistics,
                                        data_type='test')
            statistics_container.dump()

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Save model
        if iteration % 20000 == 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict(),
                'optimizer': optimizer.state_dict(),
                'sampler': train_sampler.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Move data to device
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        # Forward
        model.train()

        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {
                'target':
                do_mixup(batch_data_dict['target'],
                         batch_data_dict['mixup_lambda'])
            }
            """{'target': (batch_size, classes_num)}"""
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': batch_data_dict['target']}
            """{'target': (batch_size, classes_num)}"""

        # Loss
        loss = loss_func(batch_output_dict, batch_target_dict)

        # Backward
        loss.backward()
        print(loss)

        optimizer.step()
        optimizer.zero_grad()

        if iteration % 10 == 0:
            print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\
                .format(iteration, time.time() - time1))
            time1 = time.time()

        iteration += 1

        # Stop learning
        if iteration == early_stop:
            break
Beispiel #29
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    seq_len = 640
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, seq_len, mel_bins, cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    print('cliqueNet parameters:',
          sum(param.numel() for param in model.parameters()))
    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 3000:
            break

        iteration += 1
Beispiel #30
0
def train(args):

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    pretrained_checkpoint_path = args.pretrained_checkpoint_path
    freeze_base = args.freeze_base
    loss_type = args.loss_type
    augmentation = args.augmentation
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    resume_iteration = args.resume_iteration
    stop_iteration = args.stop_iteration
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
    filename = args.filename
    num_workers = 8

    loss_func = get_loss_func(loss_type)
    pretrain = True if pretrained_checkpoint_path else False

    hdf5_path = os.path.join(workspace, 'features', 'waveform.h5')

    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename,
                                   'holdout_fold={}'.format(holdout_fold),
                                   model_type, 'pretrain={}'.format(pretrain),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'batch_size={}'.format(batch_size),
                                   'freeze_base={}'.format(freeze_base))
    create_folder(checkpoints_dir)

    statistics_path = os.path.join(workspace, 'statistics', filename,
                                   'holdout_fold={}'.format(holdout_fold),
                                   model_type, 'pretrain={}'.format(pretrain),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'batch_size={}'.format(batch_size),
                                   'freeze_base={}'.format(freeze_base),
                                   'statistics.pickle')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(workspace, 'logs', filename,
                            'holdout_fold={}'.format(holdout_fold), model_type,
                            'pretrain={}'.format(pretrain),
                            'loss_type={}'.format(loss_type),
                            'augmentation={}'.format(augmentation),
                            'batch_size={}'.format(batch_size),
                            'freeze_base={}'.format(freeze_base))
    create_logging(logs_dir, 'w')
    logging.info(args)

    if 'cuda' in device:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Model
    Model = eval(model_type)
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
                  classes_num, freeze_base)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    if pretrain:
        logging.info(
            'Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)

    if resume_iteration:
        resume_checkpoint_path = os.path.join(
            checkpoints_dir, '{}_iterations.pth'.format(resume_iteration))
        logging.info(
            'Load resume model from {}'.format(resume_checkpoint_path))
        resume_checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(resume_checkpoint['model'])
        statistics_container.load_state_dict(resume_iteration)
        iteration = resume_checkpoint['iteration']
    else:
        iteration = 0

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    dataset = GtzanDataset()

    # Data generator
    train_sampler = TrainSampler(hdf5_path=hdf5_path,
                                 holdout_fold=holdout_fold,
                                 batch_size=batch_size *
                                 2 if 'mixup' in augmentation else batch_size)

    validate_sampler = EvaluateSampler(hdf5_path=hdf5_path,
                                       holdout_fold=holdout_fold,
                                       batch_size=batch_size)

    # Data loader
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_sampler=train_sampler,
                                               collate_fn=collate_fn,
                                               num_workers=num_workers,
                                               pin_memory=True)

    validate_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_sampler=validate_sampler,
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True)

    if 'cuda' in device:
        model.to(device)

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    if 'mixup' in augmentation:
        mixup_augmenter = Mixup(mixup_alpha=1.)

    # Evaluator
    evaluator = Evaluator(model=model)

    train_bgn_time = time.time()

    # Train on mini batches
    for batch_data_dict in train_loader:

        # import crash
        # asdf

        # Evaluate
        if iteration % 200 == 0 and iteration > 0:
            if resume_iteration > 0 and iteration == resume_iteration:
                pass
            else:
                logging.info('------------------------------------')
                logging.info('Iteration: {}'.format(iteration))

                train_fin_time = time.time()

                statistics = evaluator.evaluate(validate_loader)
                logging.info('Validate accuracy: {:.3f}'.format(
                    statistics['accuracy']))

                statistics_container.append(iteration, statistics, 'validate')
                statistics_container.dump()

                train_time = train_fin_time - train_bgn_time
                validate_time = time.time() - train_fin_time

                logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                             ''.format(train_time, validate_time))

                train_bgn_time = time.time()

        # Save model
        if iteration % 2000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.module.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        if 'mixup' in augmentation:
            batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
                len(batch_data_dict['waveform']))

        # Move data to GPU
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key],
                                                       device)

        # Train
        model.train()

        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {
                'target':
                do_mixup(batch_data_dict['target'],
                         batch_data_dict['mixup_lambda'])
            }
            """{'target': (batch_size, classes_num)}"""
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': batch_data_dict['target']}
            """{'target': (batch_size, classes_num)}"""

        # loss
        loss = loss_func(batch_output_dict, batch_target_dict)
        print(iteration, loss)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == stop_iteration:
            break

        iteration += 1