Ejemplo n.º 1
0
def main():

    # Parse Arguments
    parser = argparse.ArgumentParser(description='Train WaveRNN Vocoder')
    parser.add_argument('--lr',
                        '-l',
                        type=float,
                        help='[float] override hparams.py learning rate')
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        help='[int] override hparams.py batch size')
    parser.add_argument('--force_train',
                        '-f',
                        action='store_true',
                        help='Forces the model to train past total steps')
    parser.add_argument('--gta',
                        '-g',
                        action='store_true',
                        help='train wavernn on GTA features')
    parser.add_argument(
        '--force_cpu',
        '-c',
        action='store_true',
        help='Forces CPU-only training, even when in CUDA capable environment')
    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')
    args = parser.parse_args()

    hp.configure(args.hp_file)  # load hparams from file
    if args.lr is None:
        args.lr = hp.voc_lr
    if args.batch_size is None:
        args.batch_size = hp.voc_batch_size

    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    batch_size = args.batch_size
    force_train = args.force_train
    train_gta = args.gta
    lr = args.lr

    if not args.force_cpu and torch.cuda.is_available():
        device = torch.device('cuda')
        if batch_size % torch.cuda.device_count() != 0:
            raise ValueError(
                '`batch_size` must be evenly divisible by n_gpus!')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    print('\nInitialising Model...\n')

    # Instantiate WaveRNN Model
    voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                        fc_dims=hp.voc_fc_dims,
                        bits=hp.bits,
                        pad=hp.voc_pad,
                        upsample_factors=hp.voc_upsample_factors,
                        feat_dims=hp.num_mels,
                        compute_dims=hp.voc_compute_dims,
                        res_out_dims=hp.voc_res_out_dims,
                        res_blocks=hp.voc_res_blocks,
                        hop_length=hp.hop_length,
                        sample_rate=hp.sample_rate,
                        mode=hp.voc_mode).to(device)

    # Check to make sure the hop length is correctly factorised
    assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length

    optimizer = optim.Adam(voc_model.parameters())
    restore_checkpoint('voc',
                       paths,
                       voc_model,
                       optimizer,
                       create_if_missing=True)

    train_set, test_set = get_vocoder_datasets(paths.data, batch_size,
                                               train_gta)

    total_steps = 10_000_000 if force_train else hp.voc_total_steps

    simple_table([
        ('Remaining', str(
            (total_steps - voc_model.get_step()) // 1000) + 'k Steps'),
        ('Batch Size', batch_size), ('LR', lr),
        ('Sequence Len', hp.voc_seq_len), ('GTA Train', train_gta)
    ])

    loss_func = F.cross_entropy if voc_model.mode == 'RAW' else discretized_mix_logistic_loss

    voc_train_loop(paths, voc_model, loss_func, optimizer, train_set, test_set,
                   lr, total_steps)

    print('Training Complete.')
    print(
        'To continue training increase voc_total_steps in hparams.py or use --force_train'
    )
Ejemplo n.º 2
0
def main():
    # Parse Arguments
    parser = argparse.ArgumentParser(description='Train Tacotron TTS')
    parser.add_argument('--force_train',
                        '-f',
                        action='store_true',
                        help='Forces the model to train past total steps')
    parser.add_argument('--force_gta',
                        '-g',
                        action='store_true',
                        help='Force the model to create GTA features')
    parser.add_argument(
        '--force_cpu',
        '-c',
        action='store_true',
        help='Forces CPU-only training, even when in CUDA capable environment')
    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')
    args = parser.parse_args()

    hp.configure(args.hp_file)  # Load hparams from file
    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    force_train = args.force_train
    force_gta = args.force_gta

    if not args.force_cpu and torch.cuda.is_available():
        device = torch.device('cuda')
        for session in hp.tts_schedule:
            _, _, _, batch_size = session
            if batch_size % torch.cuda.device_count() != 0:
                raise ValueError(
                    '`batch_size` must be evenly divisible by n_gpus!')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    # Instantiate Tacotron Model
    print('\nInitialising Tacotron Model...\n')
    model = Tacotron(embed_dims=hp.tts_embed_dims,
                     num_chars=len(symbols),
                     encoder_dims=hp.tts_encoder_dims,
                     decoder_dims=hp.tts_decoder_dims,
                     n_mels=hp.num_mels,
                     fft_bins=hp.num_mels,
                     postnet_dims=hp.tts_postnet_dims,
                     encoder_K=hp.tts_encoder_K,
                     lstm_dims=hp.tts_lstm_dims,
                     postnet_K=hp.tts_postnet_K,
                     num_highways=hp.tts_num_highways,
                     dropout=hp.tts_dropout,
                     stop_threshold=hp.tts_stop_threshold).to(device)

    optimizer = optim.Adam(model.parameters())
    restore_checkpoint('tts', paths, model, optimizer, create_if_missing=True)

    if not force_gta:
        for i, session in enumerate(hp.tts_schedule):
            current_step = model.get_step()

            r, lr, max_step, batch_size = session

            training_steps = max_step - current_step

            # Do we need to change to the next session?
            if current_step >= max_step:
                # Are there no further sessions than the current one?
                if i == len(hp.tts_schedule) - 1:
                    # There are no more sessions. Check if we force training.
                    if force_train:
                        # Don't finish the loop - train forever
                        training_steps = 999_999_999
                    else:
                        # We have completed training. Breaking is same as continue
                        break
                else:
                    # There is a following session, go to it
                    continue

            model.r = r

            simple_table([('Steps with r=%s' % (repr1(r)),
                           str(training_steps // 1000) + 'k Steps'),
                          ('Batch Size', batch_size), ('Learning Rate', lr),
                          ('Outputs/Step (r)', model.r)])

            train_set, attn_example = get_tts_datasets(paths.data, batch_size,
                                                       r)
            tts_train_loop(paths, model, optimizer, train_set, lr,
                           training_steps, attn_example)

        print('Training Complete.')
        print(
            'To continue training increase tts_total_steps in hparams.py or use --force_train\n'
        )

    print('Creating Ground Truth Aligned Dataset...\n')

    train_set, attn_example = get_tts_datasets(paths.data, 8, model.r)
    create_gta_features(model, train_set, paths.gta)

    print(
        '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n'
    )
Ejemplo n.º 3
0
                            postnet_k=hp.forward_postnet_K,
                            postnet_dims=hp.forward_postnet_dims,
                            prenet_k=hp.forward_prenet_K,
                            prenet_dims=hp.forward_prenet_dims,
                            highways=hp.forward_num_highways,
                            dropout=hp.forward_dropout,
                            n_mels=hp.num_mels).to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print(f'num params {params}')

    optimizer = optim.Adam(model.parameters())
    restore_checkpoint('forward',
                       paths,
                       model,
                       optimizer,
                       create_if_missing=True)

    if force_gta:
        print('Creating Ground Truth Aligned Dataset...\n')
        train_set, val_set = get_tts_datasets(paths.data,
                                              8,
                                              r=1,
                                              model_type='forward')
        create_gta_features(model, train_set, val_set, paths.gta)
        print(
            '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n'
        )
    else:
        trainer = ForwardTrainer(paths)
Ejemplo n.º 4
0
                     fft_bins=hp.num_mels,
                     postnet_dims=hp.tts_postnet_dims,
                     encoder_K=hp.tts_encoder_K,
                     lstm_dims=hp.tts_lstm_dims,
                     postnet_K=hp.tts_postnet_K,
                     num_highways=hp.tts_num_highways,
                     dropout=hp.tts_dropout,
                     stop_threshold=hp.tts_stop_threshold).to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print(f'Num Params: {params}')
    optimizer = optim.Adam(model.parameters())
    restore_checkpoint('tts',
                       paths,
                       model,
                       optimizer,
                       create_if_missing=True,
                       device=device)

    if args.force_gta:
        print('Creating Ground Truth Aligned Dataset...\n')
        train_set, val_set = get_tts_datasets(paths.data, 8, model.r)
        create_gta_features(model, train_set, val_set, paths.gta)
        print(
            '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n'
        )
    elif args.force_align:
        print('Creating Attention Alignments and Pitch Values...')
        train_set, val_set = get_tts_datasets(paths.data, 1, model.r)
        create_align_features(model, train_set, val_set,
                              paths.alg)  # paths.phon_pitch)
Ejemplo n.º 5
0
def main(argv):
    args = parser.parse_args(argv)

    # Load configuration
    conf = Configuration.from_json(args.config)
    conf.args = args
    if args.conf:
        new_conf_entries = {}
        for arg in args.conf:
            key, value = arg.split('=')
            new_conf_entries[key] = value
        conf.update(new_conf_entries)

    # Setup log directory
    if args.run_dir:
        conf.run_dir = args.run_dir
    elif args.resume:
        if os.path.exists(args.resume):
            conf.run_dir = os.path.dirname(args.resume)
    if not conf.has_attr('run_dir'):
        run_name = conf.get_attr('run_name', default='unnamed_run')
        conf.run_dir = get_run_dir(args.log_dir, run_name)
    if not args.dry:
        if not os.path.isdir(conf.run_dir):
            os.mkdir(conf.run_dir)

    setup_logging(conf.run_dir, 'train', args.verbose, args.dry)

    logging.info('Commandline arguments: {}'.format(' '.join(argv)))

    if not args.dry:
        logging.info('This run is saved to: {}'.format(conf.run_dir))
        config_path = get_config_path(conf.run_dir)
        conf.serialize(config_path)

    if args.cuda != '':
        try:
            args.cuda = utils.set_cuda_env(args.cuda)
        except Exception:
            logging.critical('No free GPU on this machine. Aborting run.')
            return
        logging.info('Running on GPU {}'.format(args.cuda))

    if args.verbose:
        logging.debug(str(conf))

    utils.set_random_seeds(conf.seed)

    # Setup model
    logging.info('Setting up training runner {}'.format(conf.runner_type))
    runner = build_runner(conf, conf.runner_type, args.cuda, mode='train')

    if args.print_model:
        print(str(runner))

    if args.print_parameters:
        print_model_parameters(runner)

    # Handle resuming from checkpoint
    restore_state = None
    if args.resume:
        if os.path.exists(args.resume):
            restore_state = restore_checkpoint(args.resume, runner)
            logging.info('Restored checkpoint from {}'.format(args.resume))
        else:
            logging.critical(('Checkpoint {} to restore '
                              'from not found').format(args.resume))
            return

    use_tensorboard = conf.get_attr('use_tensorboard',
                                    default=DEFAULT_USE_TENSORBOARD)
    if use_tensorboard and not args.dry:
        from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(conf.run_dir)
        logging.debug('Using tensorboardX summary writer')
    else:
        summary_writer = None

    # Load datasets
    num_workers = conf.get_attr('num_data_workers',
                                default=DEFAULT_NUM_WORKERS)
    num_train_samples = conf.get_attr('num_train_subset_samples', default=None)
    num_val_samples = conf.get_attr('num_validation_subset_samples',
                                    default=None)

    train_dataset_name = conf.get_attr('train_dataset', alternative='dataset')
    logging.info('Loading training dataset {}'.format(train_dataset_name))
    train_dataset = load_dataset(conf, args.data_dir, train_dataset_name,
                                 'train')
    train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=num_workers,
                              batch_size=conf.batch_size,
                              sampler=train_sampler,
                              shuffle=train_sampler is None,
                              worker_init_fn=utils.set_worker_seeds)

    val_dataset_name = conf.get_attr('validation_dataset',
                                     alternative='dataset')
    logging.info('Loading validation dataset {}'.format(val_dataset_name))
    val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val')
    val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset)
    val_loader = DataLoader(dataset=val_dataset,
                            num_workers=num_workers,
                            batch_size=conf.get_attr('validation_batch_size',
                                                     default=conf.batch_size),
                            sampler=val_sampler,
                            shuffle=False,
                            worker_init_fn=utils.set_worker_seeds)

    # Setup validation checkpoints
    chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[])
    chkpt_metric_dirs = {
        metric: os.path.join(conf.run_dir, 'best_' + metric)
        for metric in chkpt_metrics
    }
    for metric_dir in chkpt_metric_dirs.values():
        if not args.dry and not os.path.isdir(metric_dir):
            os.mkdir(metric_dir)

    # Setup early stopping
    if conf.has_attr('early_stopping'):
        from training.early_stopping import EarlyStopper
        early_stoppers = [
            EarlyStopper(conf.early_stopping['metric_name'],
                         conf.early_stopping['patience'],
                         conf.early_stopping.get('min_value', None),
                         conf.early_stopping.get('max_difference', None))
        ]
    elif conf.has_attr('early_stoppers'):
        from training.early_stopping import EarlyStopper
        early_stoppers = []
        for early_stopping_conf in conf.early_stoppers:
            min_value = early_stopping_conf.get('min_value', None)
            max_diff = early_stopping_conf.get('max_difference', None)
            early_stoppers.append(
                EarlyStopper(early_stopping_conf['metric_name'],
                             early_stopping_conf['patience'], min_value,
                             max_diff))
    else:
        early_stoppers = []

    logging.info('Starting training run of {} epochs'.format(conf.num_epochs))

    # Train
    try:
        train_net(conf, runner, train_loader, val_loader, args.cuda,
                  chkpt_metric_dirs, restore_state, summary_writer,
                  early_stoppers)
    except KeyboardInterrupt:
        if summary_writer is not None:
            summary_writer.close()
Ejemplo n.º 6
0
def main(argv):
  args = parser.parse_args(argv)

  if args.cuda != '':
    try:
      args.cuda = utils.set_cuda_env(args.cuda)
    except Exception:
      print('No free GPU on this machine. Aborting run.')
      return
    print('Running on GPU {}'.format(args.cuda))

  # Load configuration
  conf = Configuration.from_json(args.config)
  conf.args = args
  if args.conf:
    new_conf_entries = {}
    for arg in args.conf:
      key, value = arg.split('=')
      new_conf_entries[key] = value
    conf.update(new_conf_entries)
  if args.verbose:
    print(conf)

  utils.set_random_seeds(conf.seed)

  # Setup model
  runner = build_runner(conf, conf.runner_type, args.cuda, mode='test')

  # Handle resuming from checkpoint
  if args.checkpoint != 'NONE':
    if os.path.exists(args.checkpoint):
      _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda)
      print('Restored checkpoint from {}'.format(args.checkpoint))
    else:
      print('Checkpoint {} to restore from not found'.format(args.checkpoint))
      return

  # Evaluate on full image, not crops
  conf.full_image = True

  # Load datasets
  mode = 'dataset'
  if len(args.files_or_dirs) == 0:
    datasets = [load_dataset(conf, args.data_dir, conf.validation_dataset, args.fold)]
  else:
    datasets = []
    for f in args.files_or_dirs:
      if is_dataset(f):
        dataset = load_dataset(conf, args.data_dir, f, args.fold)
        datasets.append(dataset)
      else:
        mode = 'image'
        transform = get_sr_transform(conf, 'test', downscale=False)
        datasets = [make_sr_dataset_from_folder(conf, f, transform,
                                                inference=True)
                    for f in args.files_or_dirs]

  num_workers = conf.get_attr('num_data_workers', default=DEFAULT_NUM_WORKERS)

  # Evaluate all datasets
  for dataset in datasets:
    loader = DataLoader(dataset=dataset,
                        num_workers=num_workers,
                        batch_size=1,
                        shuffle=False)

    if mode == 'dataset':
      data, _, val_metrics = runner.validate(loader, len(loader))

      print('Average metrics for {}'.format(dataset.name))
      for metric_name, metric in val_metrics.items():
        print('     {}: {}'.format(metric_name, metric))
    else:
      data = runner.infer(loader)

    if args.infer or args.dump:
      if mode == 'dataset':
        output_dir = get_run_dir(args.out_dir, dataset.name)
        if not os.path.isdir(output_dir):
          os.mkdir(output_dir)

      file_idx = 0
      for batch in data:
        if mode == 'image':
          output_dir = os.path.dirname(dataset.images[file_idx])

        named_batch = runner.get_named_outputs(batch)
        inputs = named_batch['input']
        predictions = named_batch['prediction']
        targets = named_batch['target']
        for (inp, target, prediction) in zip(inputs, targets, predictions):
          image_file = os.path.basename(dataset.images[file_idx])
          name, _ = os.path.splitext(image_file)
          file_idx += 1

          if args.dump:
            input_file = os.path.join(output_dir,
                                      '{}_input.png'.format(name))
            save_image(inp.data, input_file)
            target_file = os.path.join(output_dir,
                                       '{}_target.png'.format(name))
            save_image(target.data, target_file)
          pred_file = os.path.join(output_dir,
                                   '{}_pred.png'.format(name))
          save_image(prediction.data, pred_file)
Ejemplo n.º 7
0
    parser.add_argument('--gta',
                        '-g',
                        action='store_true',
                        help='train wavernn on GTA features')
    parser.add_argument('--config',
                        metavar='FILE',
                        default='config.yaml',
                        help='The config containing all hyperparams.')
    args = parser.parse_args()

    config = read_config(args.config)
    paths = Paths(config['data_path'], config['voc_model_id'],
                  config['tts_model_id'])
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    print('Using device:', device)
    print('\nInitialising Model...\n')
    voc_model = WaveRNN.from_config(config).to(device)
    dsp = DSP.from_config(config)
    assert np.cumprod(
        config['vocoder']['model']['upsample_factors'])[-1] == dsp.hop_length

    optimizer = optim.Adam(voc_model.parameters())
    restore_checkpoint(model=voc_model,
                       optim=optimizer,
                       path=paths.voc_checkpoints / 'latest_model.pt',
                       device=device)

    voc_trainer = VocTrainer(paths=paths, dsp=dsp, config=config)
    voc_trainer.train(voc_model, optimizer, train_gta=args.gta)
Ejemplo n.º 8
0
                  config['tts_model_id'])

    assert len(os.listdir(paths.alg)) > 0, f'Could not find alignment files in {paths.alg}, please predict ' \
                                           f'alignments first with python train_tacotron.py --force_align!'

    force_gta = args.force_gta
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    print('Using device:', device)

    # Instantiate Forward TTS Model
    print('\nInitialising Forward TTS Model...\n')
    model = ForwardTacotron.from_config(config).to(device)
    optimizer = optim.Adam(model.parameters())
    restore_checkpoint(model=model,
                       optim=optimizer,
                       path=paths.forward_checkpoints / 'latest_model.pt',
                       device=device)

    if force_gta:
        print('Creating Ground Truth Aligned Dataset...\n')
        train_set, val_set = get_tts_datasets(paths.data,
                                              8,
                                              r=1,
                                              model_type='forward',
                                              filter_attention=False,
                                              max_mel_len=None)
        create_gta_features(model, train_set, val_set, paths.gta)
        print(
            '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n'
        )
    else:
Ejemplo n.º 9
0
def main(argv):
    args = parser.parse_args(argv)

    if args.cuda != '':
        try:
            args.cuda = utils.set_cuda_env(args.cuda)
        except Exception:
            print('No free GPU on this machine. Aborting run.')
            return
        print('Running on GPU {}'.format(args.cuda))

    # Load configuration
    conf = Configuration.from_json(args.config)
    conf.args = args
    if args.conf:
        new_conf_entries = {}
        for arg in args.conf:
            key, value = arg.split('=')
            new_conf_entries[key] = value
        conf.update(new_conf_entries)
    if args.verbose:
        print(conf)

    utils.set_random_seeds(conf.seed)

    # Setup model
    runner = build_runner(conf,
                          conf.runner_type,
                          args.cuda,
                          mode='train',
                          resume=args.resume is not None)

    if args.print_model:
        print(str(runner))

    # Handle resuming from checkpoint
    restore_state = None
    if args.resume:
        if os.path.exists(args.resume):
            restore_state = restore_checkpoint(args.resume, runner)
            conf.run_dir = os.path.dirname(args.resume)
            print('Restored checkpoint from {}'.format(args.resume))
        else:
            print('Checkpoint {} to restore from not found'.format(
                args.resume))
            return

    # Setup log directory
    if args.run_dir:
        conf.run_dir = args.run_dir
    if not conf.has_attr('run_dir'):
        run_name = conf.get_attr('run_name', default='unnamed_run')
        conf.run_dir = get_run_dir(args.log_dir, run_name)
    if not args.dry:
        if not os.path.isdir(conf.run_dir):
            os.mkdir(conf.run_dir)
        print('This run is saved to: {}'.format(conf.run_dir))
        config_path = get_config_path(conf.run_dir)
        conf.serialize(config_path)

    use_tensorboard = conf.get_attr('use_tensorboard',
                                    default=DEFAULT_USE_TENSORBOARD)
    if use_tensorboard and not args.dry:
        from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(conf.run_dir)
    else:
        summary_writer = None

    # Load datasets
    num_workers = conf.get_attr('num_data_workers',
                                default=DEFAULT_NUM_WORKERS)
    num_train_samples = conf.get_attr('num_train_subset_samples', default=None)
    num_val_samples = conf.get_attr('num_validation_subset_samples',
                                    default=None)

    train_dataset_name = conf.get_attr('train_dataset', alternative='dataset')
    train_dataset = load_dataset(conf, args.data_dir, train_dataset_name,
                                 'train')
    train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=num_workers,
                              batch_size=conf.batch_size,
                              sampler=train_sampler,
                              shuffle=train_sampler is None)

    val_dataset_name = conf.get_attr('validation_dataset',
                                     alternative='dataset')
    val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val')
    val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset)
    val_loader = DataLoader(dataset=val_dataset,
                            num_workers=num_workers,
                            batch_size=conf.get_attr('validation_batch_size',
                                                     default=conf.batch_size),
                            sampler=val_sampler,
                            shuffle=False)

    chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[])
    chkpt_metric_dirs = {
        metric: os.path.join(conf.run_dir, 'best_' + metric)
        for metric in chkpt_metrics
    }
    for metric_dir in chkpt_metric_dirs.values():
        if not args.dry and not os.path.isdir(metric_dir):
            os.mkdir(metric_dir)

    # Train
    try:
        train_net(conf, runner, train_loader, val_loader, args.cuda,
                  chkpt_metric_dirs, restore_state, summary_writer)
    except KeyboardInterrupt:
        if summary_writer is not None:
            summary_writer.close()
def main():
    # Parse Arguments
    parser = argparse.ArgumentParser(description='Train Tacotron TTS')
    parser.add_argument('--force_train',
                        '-f',
                        action='store_true',
                        help='Forces the model to train past total steps')
    parser.add_argument('--force_gta',
                        '-g',
                        action='store_true',
                        help='Force the model to create GTA features')
    parser.add_argument(
        '--force_cpu',
        '-c',
        action='store_true',
        help='Forces CPU-only training, even when in CUDA capable environment')
    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')
    args = parser.parse_args()

    hp.configure(args.hp_file)  # Load hparams from file

    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    force_gta = args.force_gta

    if not args.force_cpu and torch.cuda.is_available():
        device = torch.device('cuda')
        for session in hp.forward_schedule:
            _, _, batch_size = session
            if batch_size % torch.cuda.device_count() != 0:
                raise ValueError(
                    '`batch_size` must be evenly divisible by n_gpus!')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    # Instantiate Forward TTS Model
    print('\nInitialising Forward TTS Model...\n')
    model = ForwardTacotron(embed_dims=hp.forward_embed_dims,
                            num_chars=len(symbols),
                            durpred_rnn_dims=hp.forward_durpred_rnn_dims,
                            durpred_conv_dims=hp.forward_durpred_conv_dims,
                            rnn_dim=hp.forward_rnn_dims,
                            postnet_k=hp.forward_postnet_K,
                            postnet_dims=hp.forward_postnet_dims,
                            prenet_k=hp.forward_prenet_K,
                            prenet_dims=hp.forward_prenet_dims,
                            highways=hp.forward_num_highways,
                            dropout=hp.forward_dropout,
                            n_mels=hp.num_mels).to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print(f'num params {params}')

    optimizer = optim.Adam(model.parameters())
    restore_checkpoint('forward',
                       paths,
                       model,
                       optimizer,
                       create_if_missing=True)

    if not force_gta:
        for i, session in enumerate(hp.forward_schedule):
            current_step = model.get_step()

            lr, max_step, batch_size = session

            training_steps = max_step - current_step

            simple_table([(f'Steps', str(training_steps // 1000) + 'k Steps'),
                          ('Batch Size', batch_size), ('Learning Rate', lr)])

            train_set, mel_example = get_tts_datasets(paths.data,
                                                      batch_size,
                                                      1,
                                                      alignments=True)
            train_loop(paths, model, optimizer, train_set, lr, training_steps,
                       mel_example)

    train_set, mel_example = get_tts_datasets(paths.data,
                                              8,
                                              1,
                                              alignments=True)
    create_gta_features(model, train_set, paths.gta)
    print('Training Complete.')
Ejemplo n.º 11
0
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    print('\nInitialising Model...\n')

    # Instantiate WaveRNN Model
    voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                        fc_dims=hp.voc_fc_dims,
                        bits=hp.bits,
                        pad=hp.voc_pad,
                        upsample_factors=hp.voc_upsample_factors,
                        feat_dims=hp.num_mels,
                        compute_dims=hp.voc_compute_dims,
                        res_out_dims=hp.voc_res_out_dims,
                        res_blocks=hp.voc_res_blocks,
                        hop_length=hp.hop_length,
                        sample_rate=hp.sample_rate,
                        mode=hp.voc_mode).to(device)

    # Check to make sure the hop length is correctly factorised
    assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length

    optimizer = optim.Adam(voc_model.parameters())
    restore_checkpoint('voc', paths, voc_model, optimizer, create_if_missing=True)

    voc_trainer = VocTrainer(paths)
    voc_trainer.train(voc_model, optimizer, train_gta=args.gta)
Ejemplo n.º 12
0
def main(argv):
  args = parser.parse_args(argv)

  setup_logging(os.path.dirname(args.checkpoint), 'eval',
                args.verbose, args.dry)

  logging.info('Commandline arguments: {}'.format(' '.join(argv)))

  if args.cuda != '':
    try:
      args.cuda = utils.set_cuda_env(args.cuda)
    except Exception:
      logging.critical('No free GPU on this machine. Aborting run.')
      return
    logging.info('Running on GPU {}'.format(args.cuda))

  # Load configuration
  conf = Configuration.from_json(args.config)
  conf.args = args
  if args.conf:
    new_conf_entries = {}
    for arg in args.conf:
      key, value = arg.split('=')
      new_conf_entries[key] = value
    conf.update(new_conf_entries)

  if args.verbose:
    logging.debug(conf)

  utils.set_random_seeds(conf.seed)

  if args.raw:
    # This is a hack to suppress the output transform when we request raw data
    conf.application = 'none'
    if conf.has_attr('tasks'):
      for name, task in conf.tasks.items():
        if 'application' in task:
          logging.debug(('Changing output transform in task {} '
                         'from {} to none').format(name,
                                                   task['application']))
          task['application'] = 'none'

  # Setup model
  runner = build_runner(conf, conf.runner_type, args.cuda, mode='test')

  # Handle resuming from checkpoint
  if args.checkpoint != 'NONE':
    if os.path.exists(args.checkpoint):
      _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda)
      logging.info('Restored checkpoint from {}'.format(args.checkpoint))
    else:
      logging.critical(('Checkpoint {} to restore '
                       'from not found').format(args.checkpoint))
      return

  # Load datasets
  mode = 'dataset'
  if len(args.files_or_dirs) == 0:
    datasets = [load_dataset(conf, args.data_dir,
                             conf.validation_dataset, args.fold)]
  else:
    datasets = []
    for f in args.files_or_dirs:
      if is_dataset(f):
        dataset = load_dataset(conf, args.data_dir, f, args.fold)
        datasets.append(dataset)

  if args.raw:
    mode = 'raw'

  num_samples = conf.get_attr('num_validation_subset_samples',
                              default=None)

  # Evaluate all datasets
  for dataset in datasets:
    logging.info('Evaluating dataset {}'.format(dataset.name))

    sampler = maybe_get_subset_sampler(num_samples, dataset)
    loader = DataLoader(dataset=dataset,
                        num_workers=DEFAULT_NUM_WORKERS,
                        batch_size=1,
                        sampler=sampler,
                        shuffle=False)

    if mode == 'dataset':
      data, _, val_metrics = runner.validate(loader, len(loader))

      res_str = 'Average metrics for {}\n'.format(dataset.name)
      for metric_name, metric in val_metrics.items():
        res_str += '     {}: {}\n'.format(metric_name, metric)
      logging.info(res_str)
    else:
      data = runner.infer(loader)

    if not args.dry and (args.infer or args.dump):
      if mode == 'dataset' or mode == 'raw':
        conf_name = os.path.splitext(os.path.basename(conf.file))[0]
        output_dir = get_run_dir(args.out_dir, '{}_{}'.format(dataset.name,
                                                              conf_name))
        if not os.path.isdir(output_dir):
          os.mkdir(output_dir)

      logging.info('Writing images to {}'.format(output_dir))

      file_idx = 0
      for batch in data:
        if mode == 'image':
          output_dir = os.path.dirname(dataset.images[file_idx])

        named_batch = runner.get_named_outputs(batch)
        inp = named_batch['input']

        if 'prediction' in named_batch:
          batch_size = named_batch['prediction'].shape[0]
          filenames = [dataset.get_filename(idx)
                       for idx in range(file_idx, file_idx + batch_size)]
          save_output_images(dataset, inp, named_batch['prediction'],
                             named_batch['target'], output_dir,
                             filenames, 'default', args.dump, args.raw)

        file_idx += len(filenames)

      logging.info(('Finished writing images for '
                   'dataset {}').format(dataset.name))