def specific_default_hparams(cls):
        """Return default HParams"""
        hparams = HParams(
            num_layers=4,  # number of Multihead Attn + ffn blocks
            d_model=128,  # internal model dimension
            dff=512,
            num_heads=8,
            dropout_rate=0.1,
            lowerdim=256,
            attn_version=1,
            do_classification=True,  # softmax classification
            class_weight=1.0,
            class_buffer_layers=0,  # buffer FC layers before classifier
            class_dropout=0.1,  # dropout rate for each class buffer layer
            do_reconstruction=True,  # reconstruction
            recon_weight=1.0,
            blind_decoder_mask=
            True,  # if True, the decoder knows padding location of the input

            # training params
            is_training=True,
            optimizer='Adam',  # SGD, Adam, sgd, adam
            lr=0.01,  # initial lr
            lr_scheduler='WarmupDecay',  # defined in core.lr_scheduler_tf20
            warmup_steps=10000,
        )
        return hparams
Beispiel #2
0
 def get_default_hparams():
     return HParams(dec_emb_size=128,
                    dec_hidden_size=128,
                    dec_dropout=0.1,
                    dec_layers=1,
                    dec_bidirectional=True,
                    dec_use_deep_fusion=True,
                    dec_use_shallow_fusion=False)
def get_default_hparams():
    return HParams(z_size=args.z_size,
                   act_func=F.elu,
                   has_flow=args.has_flow,
                   n_flows=args.n_flows,
                   wide_encoder=args.wide_encoder,
                   cuda=args.cuda,
                   hamiltonian_flow=False)
Beispiel #4
0
 def get_default_hparams():
   return HParams(
     tm_init = False,
     tm_overfitted = False,
     tm_bin_path = os.path.join(SE_DIR, "se.bin"),
     tm_top_size = 3,
     tm_train_dataset_path = os.path.join(DATASET_DIR, "he-en"),
     tm_50_50 = True
   )
Beispiel #5
0
 def get_default_hparams():
     return HParams(
         use_cuda=False,
         max_length=15,
         batch_size=128,
         n_epochs=40,
         clip=0.25,
         starting_learning_rate=1e-3,  # todo
         learning_rate_strategy="constant_decay",  # todo
         optimizer="Adam",  # todo,
         prefix="",
         model_name="",
         logdir="",
         use_tm_on_test=False,
         n_tm_epochs=10,
         cuda_visible_devices=1,
         force_override=False)
Beispiel #6
0
sns.set()
import matplotlib.pyplot as plt
from scipy.stats import entropy

from datasets import get_dataset
from models import get_model
from utils.hparams import HParams

parser = argparse.ArgumentParser()
parser.add_argument('--cfg_file', type=str)
parser.add_argument('--gpu', type=str, default='0')
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--num_samples', type=int, default=100)
args = parser.parse_args()

params = HParams(args.cfg_file)
params.gpu = args.gpu
params.batch_size = args.batch_size
params.num_samples = args.num_samples
pprint(params.dict)

os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu
np.random.seed(params.seed)
tf.set_random_seed(params.seed)

save_dir = os.path.join(params.exp_dir, 'cmi_greedy')
os.makedirs(save_dir, exist_ok=True)

############################################################
logging.basicConfig(filename=save_dir + '/cmi_greedy.log',
                    filemode='w',
Beispiel #7
0
            ['--model', args.model, '--dataset', args.dataset]))

    if args.dataset is None and args.file is None:
        raise ValueError('dataset or file args must be set.')

    if args.dataset and args.file:
        print('Both dataset and file args was set. Ignoring file args.')

    # GPU configuration
    setup_gpu(args.gpu, args.allow_growth)

    # Loading model
    model, meta = load_model(args.model, return_meta=True,
                             mode='predict', decoder=(not args.no_decoder))

    args = HParams(**meta['training_args']).update(vars(args_nondefault))

    # Features extractor
    input_parser = utils.get_from_module('preprocessing.audio',
                                         args.input_parser,
                                         params=args.input_parser_params)

    # Recovering text parser
    label_parser = utils.get_from_module('preprocessing.text',
                                         args.label_parser,
                                         params=args.label_parser_params)

    if args.dataset is not None:
        data_gen = DatasetGenerator(input_parser, label_parser,
                                    batch_size=1, seed=0, mode='predict',
                                    shuffle=False)
Beispiel #8
0
    parser.add_argument('--output_file', type=str, default=None)

    parser.add_argument('--input_parser', type=str, default=None)
    parser.add_argument('--input_parser_params', nargs='+', default=[])

    parser.add_argument('--label_parser', type=str, default=None)
    parser.add_argument('--label_parser_params', nargs='+', default=[])

    parser.add_argument('--override', action='store_true')

    args = parser.parse_args()

    parser = utils.get_from_module('datasets*', args.parser, regex=True)
    print('input args ', args.input_parser_params)
    input_parser = utils.get_from_module('preprocessing.audio',
                                         args.input_parser,
                                         params=args.input_parser_params)
    label_parser = utils.get_from_module('preprocessing.text',
                                         args.label_parser,
                                         params=args.label_parser_params)

    dataset = parser(args.dataset_dir,
                     **HParams().parse(args.parser_params).values())

    output_file = dataset.to_h5(fname=args.output_file,
                                input_parser=input_parser,
                                label_parser=label_parser,
                                override=args.override)

    print('Dataset %s saved at %s' % (parser.name, output_file))
Beispiel #9
0
              args.allow_growth,
              log_device_placement=args.verbose > 1)

    # Initial configuration
    epoch_offset = 0
    meta = None

    if args.load:
        args_nondefault = utils.parse_nondefault_args(args,
                                                      parser.parse_args([]))

        logger.info('Loading model...')
        model, meta = load_model(args.load, return_meta=True)

        logger.info('Loading parameters...')
        args = HParams(**meta['training_args']).update(vars(args_nondefault))

        epoch_offset = len(meta['epochs'])
        logger.info('Current epoch: %d' % epoch_offset)

        if args_nondefault.lr:
            logger.info('Setting current learning rate to %f...' % args.lr)
            K.set_value(model.optimizer.lr, args.lr)

    else:
        logger.info('Creating model...')
        # Recovering all valid models
        model_fn = utils.get_from_module('core.models', args.model)
        # Loading model
        model = model_fn(**(HParams().parse(args.model_params).values()))
Beispiel #10
0
hparams = HParams(

    # dataset parameters
    data_dir="./data/LJSpeech-1.1/",  # directory to the data
    # directory to save all precomputed inputs
    save_data_dir="./data/LJSpeech-1.1/tacotron",
    # output variables in dataloader
    out_variables=["mel", "linear", "text"],
    n_frames=162,  # number of frames in mel spectrogram
    text_len=188,  # maximum text length
    sr=20000,  # sampling rate used to read audios
    # length of windowed signal after padding with zeros
    n_fft=2048,
    n_mels=80,  # number of mel filters

    # audio samples between adjacent STFT columns
    hop_length=250,
    win_length=1000,  # window length
    ref_db=20,  # reference decibel
    max_db=100,  # maximum decibel
    mel_fmin=0.0,  # minimum mel bank
    mel_fmax=None,  # maximum mel bank
    preemphasis=0.97,  # preemphasis factor

    # dictionary
    vocab="~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;?,_ ",
    r=5,  # number of frames generated on each timestep
    n_iter=60,  # number of iterations for Griffin-Lim
    power=1.5,  # power used for Griffin-Lim

    # number of dimensions used for character embedding
    symbols_embedding_dim=256,
    prenet_channels=(256, 128),  # number channels for prenet
    # number of dimensions used for encoder embedding
    encoder_embedding_dim=256,
    attention_dim=256,  # dimension of attention
    # number of dimensions for decoder embedding
    postnet_embedding_dim=256,
    batch_size=32,  # batch size
    epoch=1001,  # number of epochs
    # number of iterations before printing to log file
    print_frequency=50,
    weight_decay=0.0,  # weight decay
    max_norm=1.0,  # maximum norm used in clip_grad_by_norm
    alpha=0.001,  # learning rate
    warmup=4000,  # number of iterations for warmup
    epochs_per_checkpoint=50,  # number of epochs for each checkpoint
    output_path="./log/tacotron/",  # directory to save results
    seed=123456,  # random seed
)
Beispiel #11
0
from utils.files import get_files
import json


parser = argparse.ArgumentParser(description='Preprocessing for WaveRNN and Tacotron')
parser.add_argument('--path', '-p', default=hp.wav_path, help='directly point to dataset path (overrides hparams.wav_path')
parser.add_argument('--extension', '-e', default='.wav', help='file extension to search for in dataset folder')
parser.add_argument('--cfg', type=str, default=None)
parser.add_argument('--mel_guia', type=str, default=None)
parser.add_argument('--wav_guia', type=str, default=None)
args = parser.parse_args()

extension = args.extension
path = args.path

hp = HParams(args.cfg)


def convert_file(in_path, out_path):
    # load the output waveform to be predicted
    y = load_wav(out_path)
    if len(y) < hp.voc_seq_len * 3:
        # skip too short files
        return None, None
    peak = np.abs(y).max()
    if hp.peak_norm or peak > 1.0:
        y /= peak
    # load the input waveform from which melspec is computed
    x = load_wav(in_path)
    mel = melspectrogram(x)
    if hp.voc_mode == 'RAW':
Beispiel #12
0
 def get_default_hparams():
     return HParams(enc_emb_size=128,
                    enc_hidden_size=128,
                    enc_dropout=0.1,
                    enc_layers=1,
                    enc_bidirectional=True)
Beispiel #13
0
def main(args):

    # hack in ProgbarLogger: avoid printing the dummy losses
    keras.callbacks.ProgbarLogger = lambda: ProgbarLogger(
        show_metrics=['loss', 'decoder_ler', 'val_loss', 'val_decoder_ler'])

    # GPU configuration
    setup_gpu(args.gpu,
              args.allow_growth,
              log_device_placement=args.verbose > 1)

    # Initial configuration
    epoch_offset = 0
    meta = None

    default_args = parser.parse_args([
        args.mode,
        '--dataset',
        args.dataset,
    ])

    args_nondefault = utils.parse_nondefault_args(args, default_args)

    if args.mode == 'eval':
        model, meta = load_model(args.load, return_meta=True, mode='eval')

        args = HParams(**meta['training_args']).update(vars(args_nondefault))
        args.mode = 'eval'
    else:
        if args.load:

            print('Loading model...')
            model, meta = load_model(args.load, return_meta=True)

            print('Loading parameters...')
            args = HParams(**meta['training_args']).update(
                vars(args_nondefault))

            epoch_offset = len(meta['epochs'])
            print('Current epoch: %d' % epoch_offset)

            if args_nondefault.lr:
                print('Setting current learning rate to %f...' % args.lr)
                K.set_value(model.optimizer.lr, args.lr)
        else:
            print('Creating model...')
            # Load model
            model = sbrt2017(num_hiddens=args.num_hiddens,
                             var_dropout=args.var_dropout,
                             dropout=args.dropout,
                             weight_decay=args.weight_decay)

            print('Setting the optimizer...')
            # Optimization
            opt = Adam(lr=args.lr, clipnorm=args.clipnorm)

            # Compile with dummy loss
            model.compile(loss={
                'ctc': ctc_dummy_loss,
                'decoder': decoder_dummy_loss
            },
                          optimizer=opt,
                          metrics={'decoder': ler},
                          loss_weights=[1, 0])

    print('Creating results folder...')
    if args.save is None:
        args.save = os.path.join('results',
                                 'sbrt2017_%s' % (datetime.datetime.now()))
    if not os.path.isdir(args.save):
        os.makedirs(args.save)

    if args.mode == 'train':
        print('Adding callbacks')
        # Callbacks
        model_ckpt = MetaCheckpoint(os.path.join(args.save, 'model.h5'),
                                    training_args=args,
                                    meta=meta)
        best_ckpt = MetaCheckpoint(os.path.join(args.save, 'best.h5'),
                                   monitor='val_decoder_ler',
                                   save_best_only=True,
                                   mode='min',
                                   training_args=args,
                                   meta=meta)
        callback_list = [model_ckpt, best_ckpt]

    print('Getting the text parser...')
    # Recovering text parser
    label_parser = preprocessing.SimpleCharParser()

    print('Getting the data generator...')
    # Data generator
    data_gen = DatasetGenerator(None,
                                label_parser,
                                batch_size=args.batch_size,
                                seed=args.seed)

    # iterators over datasets
    train_flow, valid_flow, test_flow = None, None, None
    num_val_samples = num_test_samples = 0

    print(str(vars(args)))
    print('Generating flow...')

    if args.mode == 'train':
        train_flow, valid_flow, test_flow = data_gen.flow_from_fname(
            args.dataset, datasets=['train', 'valid', 'test'])
        num_val_samples = valid_flow.len
        print('Initialzing training...')
        # Fit the model
        model.fit_generator(train_flow,
                            samples_per_epoch=train_flow.len,
                            nb_epoch=args.num_epochs,
                            validation_data=valid_flow,
                            nb_val_samples=num_val_samples,
                            max_q_size=10,
                            nb_worker=1,
                            callbacks=callback_list,
                            verbose=1,
                            initial_epoch=epoch_offset)

        del model
        model = load_model(os.path.join(args.save, 'best.h5'), mode='eval')
    else:
        test_flow = data_gen.flow_from_fname(args.dataset, datasets='test')

    print('Evaluating model on test set')
    metrics = model.evaluate_generator(test_flow,
                                       test_flow.len,
                                       max_q_size=10,
                                       nb_worker=1)

    msg = 'Total loss: %.4f\n\
CTC Loss: %.4f\nLER: %.2f%%' % (metrics[0], metrics[1], metrics[3] * 100)
    print(msg)

    K.clear_session()
Beispiel #14
0
    # Model settings
    parser.add_argument('--num_hiddens', default=1024, type=int)
    parser.add_argument('--var_dropout', default=0.2, type=float)
    parser.add_argument('--dropout', default=0, type=float)
    parser.add_argument('--weight-decay', default=1e-4, type=float)

    # Hyper parameters
    parser.add_argument('--num_epochs', default=100, type=int)
    parser.add_argument('--lr', default=0.001, type=float)
    parser.add_argument('--momentum', default=0.9, type=float)
    parser.add_argument('--clipnorm', default=400, type=float)
    parser.add_argument('--batch_size', default=32, type=int)
    # End of hyper parameters

    # Dataset definitions
    parser.add_argument('--dataset', default=None, type=str, required='True')

    # Other configs
    parser.add_argument('--save', default=None, type=str)
    parser.add_argument('--gpu', default='0', type=str)
    parser.add_argument('--allow_growth', default=False, action='store_true')
    parser.add_argument('--verbose', default=0, type=int)
    parser.add_argument('--seed', default=None, type=float)

    args = parser.parse_args()

    args = HParams(**vars(args))

    main(args)