def run_nmt(path):
    external_validation_script = get_external_validation_script(path)

    validerr = train(saveto=conf.work_dir + "/model.npz",
                    external_validation_script=external_validation_script,
                    datasets=[path["train_src"], path["train_trg"]],
                    valid_datasets=[path["dev_src"], path["dev_trg"]],
                    dictionaries=[path["dict_src"], path["dict_trg"]],
                    n_words_src=ast.literal_eval(conf.n_words_src),
                    n_words=ast.literal_eval(conf.n_words),
                    maxlen=ast.literal_eval(conf.maxlen),
                    dim=ast.literal_eval(conf.dim),
                    dim_word=ast.literal_eval(conf.dim_word),
                    factors=ast.literal_eval(conf.factors),
                    dim_per_factor=ast.literal_eval(conf.dim_per_factor),
                    batch_size=ast.literal_eval(conf.batch_size),
                    valid_batch_size=ast.literal_eval(conf.valid_batch_size),
                    reload_=ast.literal_eval(conf.reload_),
                    overwrite=ast.literal_eval(conf.overwrite),
                    optimizer=conf.optimizer,
                    lrate=ast.literal_eval(conf.lrate),
                    dispFreq=ast.literal_eval(conf.dispFreq),
                    validFreq=ast.literal_eval(conf.validFreq),
                    saveFreq=ast.literal_eval(conf.saveFreq),
                    sampleFreq=ast.literal_eval(conf.sampleFreq),
                    use_dropout=ast.literal_eval(conf.use_dropout),
                    dropout_embedding=ast.literal_eval(conf.dropout_embedding),
                    dropout_hidden=ast.literal_eval(conf.dropout_hidden),
                    dropout_source=ast.literal_eval(conf.dropout_source),
                    dropout_target=ast.literal_eval(conf.dropout_target),
                    shuffle_each_epoch=ast.literal_eval(conf.shuffle_each_epoch),
                    max_epochs=ast.literal_eval(conf.max_epochs),
                    finish_after=ast.literal_eval(conf.finish_after),
                    finetune=ast.literal_eval(conf.finetune),
                    finetune_only_last=ast.literal_eval(conf.finetune),
                    sort_by_length=ast.literal_eval(conf.sort_by_length),
                    use_domain_interpolation=ast.literal_eval(conf.use_domain_interpolation),
                    domain_interpolation_min=ast.literal_eval(conf.domain_interpolation_min),
                    domain_interpolation_inc=ast.literal_eval(conf.domain_interpolation_inc),
                    domain_interpolation_indomain_datasets=ast.literal_eval(conf.domain_interpolation_indomain_datasets),
                    maxibatch_size=ast.literal_eval(conf.maxibatch_size),
                    decay_c=ast.literal_eval(conf.decay_c),
                    map_decay_c=ast.literal_eval(conf.decay_c),
                    alpha_c=ast.literal_eval(conf.alpha_c),
                    clip_c=ast.literal_eval(conf.clip_c),
                    patience=ast.literal_eval(conf.patience),
                    encoder=conf.encoder,
                    decoder=conf.decoder,
                    embs=conf.external_embeddings,
                    emb_type=conf.emb_type)
    return validerr
Exemple #2
0
def run_nmt(path):
    external_validation_script = get_external_validation_script(path)

    validerr = train(saveto=conf.work_dir + "/model.npz",
                    external_validation_script=external_validation_script,
                    datasets=[path["train_src"], path["train_trg"]],
                    valid_datasets=[path["dev_src"], path["dev_trg"]],
                    dictionaries=[path["dict_src"], path["dict_trg"]],
                    n_words_src=int(conf.n_words_src),
                    n_words=int(conf.n_words),
                    maxlen=int(conf.maxlen),
                    dim_word=int(conf.dim_word),
                    dim=int(conf.dim),
                    batch_size=int(conf.batch_size),
                    valid_batch_size=int(conf.valid_batch_size),
                    reload_=bool(conf.reload_),
                    overwrite=bool(conf.overwrite),
                    optimizer=conf.optimizer,
                    lrate=float(conf.lrate),
                    dispFreq=int(conf.dispFreq),
                    validFreq=int(conf.validFreq),
                    saveFreq=int(conf.saveFreq),
                    sampleFreq=int(conf.sampleFreq),
                    use_dropout=bool(conf.use_dropout),
                    dropout_embedding=float(conf.dropout_embedding),
                    dropout_hidden=float(conf.dropout_hidden),
                    dropout_source=float(conf.dropout_source),
                    dropout_target=float(conf.dropout_target),
                    shuffle_each_epoch=bool(conf.shuffle_each_epoch),
                    max_epochs=int(conf.max_epochs),
                    finish_after=int(conf.finish_after),
                    finetune=bool(conf.finetune),
                    decay_c=float(conf.decay_c),
                    alpha_c=float(conf.alpha_c),
                    clip_c=float(conf.clip_c),
                    patience=int(conf.patience),
                    encoder=conf.encoder,
                    decoder=conf.decoder)
    return validerr
Exemple #3
0
def run_nmt(path):
    external_validation_script = get_external_validation_script(path)

    validerr = train(saveto=conf.work_dir + "/model.npz",
                     external_validation_script=external_validation_script,
                     datasets=[path["train_src"], path["train_trg"]],
                     valid_datasets=[path["dev_src"], path["dev_trg"]],
                     dictionaries=[path["dict_src"], path["dict_trg"]],
                     n_words_src=int(conf.n_words_src),
                     n_words=int(conf.n_words),
                     maxlen=int(conf.maxlen),
                     dim_word=int(conf.dim_word),
                     dim=int(conf.dim),
                     batch_size=int(conf.batch_size),
                     valid_batch_size=int(conf.valid_batch_size),
                     reload_=bool(conf.reload_),
                     overwrite=bool(conf.overwrite),
                     optimizer=conf.optimizer,
                     lrate=float(conf.lrate),
                     dispFreq=int(conf.dispFreq),
                     validFreq=int(conf.validFreq),
                     saveFreq=int(conf.saveFreq),
                     sampleFreq=int(conf.sampleFreq),
                     use_dropout=bool(conf.use_dropout),
                     dropout_embedding=float(conf.dropout_embedding),
                     dropout_hidden=float(conf.dropout_hidden),
                     dropout_source=float(conf.dropout_source),
                     dropout_target=float(conf.dropout_target),
                     shuffle_each_epoch=bool(conf.shuffle_each_epoch),
                     max_epochs=int(conf.max_epochs),
                     finish_after=int(conf.finish_after),
                     finetune=bool(conf.finetune),
                     decay_c=float(conf.decay_c),
                     alpha_c=float(conf.alpha_c),
                     clip_c=float(conf.clip_c),
                     patience=int(conf.patience),
                     encoder=conf.encoder,
                     decoder=conf.decoder)
    return validerr
Exemple #4
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     maxlen=50,
                     batch_size=80,
                     valid_batch_size=80,
                     datasets=[
                         WDIR + 'hq.20170623.cs-en.train.bpe.en',
                         WDIR + 'hq.20170623.cs-en.train.bpe.cs'
                     ],
                     valid_datasets=[
                         WDIR + 'hq.20170623.cs-en.dev.bpe.en',
                         WDIR + 'hq.20170623.cs-en.dev.bpe.cs'
                     ],
                     dictionaries=[
                         WDIR + 'hq.20170623.cs-en.train.bpe.en.json',
                         WDIR + 'hq.20170623.cs-en.train.bpe.cs.json'
                     ],
                     validFreq=20000,
                     dispFreq=20000,
                     saveFreq=40000,
                     sampleFreq=20000,
                     use_dropout=params['use-dropout'][0],
                     overwrite=False,
                     external_validation_script=WDIR + 'validate.sh',
                     objective='MRT')
    return validerr
Exemple #5
0
if __name__ == '__main__':
    validerr = train(saveto='model/model.npz',
                    reload_=True,
                    init_accumulators_path='model/accumulators.iter440000.npz',
                    model_reload_path='model/model.iter440000.npz',
                    dim_word=500,
                    dim=1024,
                    n_words=VOCAB_SIZE,
                    n_words_src=VOCAB_SIZE,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.0001,
                    optimizer='adadelta',
                    maxlen=50,
                    batch_size=80,
                    valid_batch_size=80,
                    datasets=[DATA_DIR + '/' + TRAIN + '.bpe.' + SRC, DATA_DIR + '/' + TRAIN + '.bpe.' + TGT],
                    valid_datasets=[DATA_DIR + '/' + DEV + '.bpe.' + SRC, DATA_DIR + '/' + DEV + '.bpe.' + TGT],
                    dictionaries=[DATA_DIR + '/' + TRAIN + '.bpe.' + SRC + '.json',DATA_DIR + '/' + TRAIN + '.bpe.' + TGT + '.json'],
                    validFreq=10000,
                    dispFreq=1000,
                    saveFreq=10000,
                    sampleFreq=10000,
                    use_dropout=False,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
                    external_validation_script='./validate.sh')
    print validerr
if __name__ == '__main__':
    validerr = train(saveto='model/model.npz',
                    reload_=True,
                    dim_word=256,
                    dim=512,
                    n_words=VOCAB_SIZE,
                    n_words_src=VOCAB_SIZE,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.0001,
                    optimizer='adam',
                    maxlen=100,
                    batch_size=80,
                    valid_batch_size=80,
                    datasets=[DATA_DIR + '/train.bpe.' + SRC, DATA_DIR + '/train.bpe.' + TGT],
                    valid_datasets=[DATA_DIR + '/tun.bpe.' + SRC, DATA_DIR + '/tun.bpe.' + TGT],
                    dictionaries=[DATA_DIR + '/train.bpe.' + SRC + '.json',DATA_DIR + '/train.bpe.' + TGT + '.json'],
                    validFreq=10000,
                    dispFreq=1000,
                    saveFreq=10000,
                    sampleFreq=10000,
                    use_dropout=True,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
                    external_validation_script='./validate.sh')
    print validerr
Exemple #7
0
 validerr = train(
     saveto='model/model.npz',
     reload_=True,
     dim_word=256,
     dim=512,
     n_words=PE_VOCAB_SIZE,
     n_words_src=SRC_VOCAB_SIZE,
     decay_c=0.,
     clip_c=1.,
     lrate=0.0001,
     optimizer='adadelta',
     maxlen=50,
     batch_size=32,
     valid_batch_size=32,
     datasets=[SRC_TRAIN, TRG_TRAIN],
     valid_datasets=[SRC_DEV, TRG_DEV],
     dictionaries=[MT_VOCAB, SRC_VOCAB, PE_VOCAB],
     tie_encoder_decoder_embeddings=False,
     factors=2,
     dim_per_factor=[128, 128],
     validFreq=5000,
     dispFreq=500,
     saveFreq=5000,
     sampleFreq=5000,
     use_dropout=False,
     dropout_embedding=0.2,  # dropout for input embeddings (0: no dropout)
     dropout_hidden=0.2,  # dropout for hidden layers (0: no dropout)
     dropout_source=0.1,  # dropout source words (0: no dropout)
     dropout_target=0.1,  # dropout target words (0: no dropout)
     overwrite=False,
     external_validation_script='./validate.sh')
Exemple #8
0
 validerr = train(
     saveto='model/model.npz',
     reload_=True,
     dim_word=500,
     factors=5,
     dim_per_factor=[360, 5, 115, 10, 10],
     dim=1024,
     n_words=VOCAB_SIZE,
     n_words_src=VOCAB_SIZE,
     decay_c=0.,
     clip_c=1.,
     lrate=0.0001,
     optimizer='adadelta',
     maxlen=50,
     batch_size=80,
     valid_batch_size=80,
     datasets=[
         DATA_DIR + '/corpus.factors.' + SRC,
         DATA_DIR + '/corpus.bpe.' + TGT
     ],
     valid_datasets=[
         DATA_DIR + '/newstest2013.factors.' + SRC,
         DATA_DIR + '/newstest2013.bpe.' + TGT
     ],
     dictionaries=[
         DATA_DIR + '/corpus.bpe.' + SRC + '.json',
         DATA_DIR + '/corpus.factors.1.' + SRC + '.json',
         DATA_DIR + '/corpus.factors.2.' + SRC + '.json',
         DATA_DIR + '/corpus.factors.3.' + SRC + '.json',
         DATA_DIR + '/corpus.factors.4.' + SRC + '.json',
         DATA_DIR + '/corpus.bpe.' + TGT + '.json'
     ],
     validFreq=10000,
     dispFreq=1000,
     saveFreq=30000,
     sampleFreq=10000,
     use_dropout=False,
     dropout_embedding=0.2,  # dropout for input embeddings (0: no dropout)
     dropout_hidden=0.2,  # dropout for hidden layers (0: no dropout)
     dropout_source=0.1,  # dropout source words (0: no dropout)
     dropout_target=0.1,  # dropout target words (0: no dropout)
     overwrite=False,
     external_validation_script='validate.sh')
 validerr = train(
     saveto="%smodel.npz" % LOC,
     reload_=True,
     patience=50,
     dim_word=1000,
     dim=1024,
     shuffle_each_epoch=True,
     clip_c=1.,
     decay_c=1.0e-8,
     lrate=0.0001,
     optimizer='adadelta',
     maxlen=201,
     batch_size=80,
     valid_batch_size=80,
     datasets=[
         DATA_DIR + '/PATH/TO/SOURCES_FROM_LOG/' + SRC, LOG + '.tgt'
     ],  # first: file with log source sentences, second: file with translations from the model that created the log
     valid_datasets=[
         DATA_DIR + '/PATH/TO/VALIDATION_SOURCE/' + SRC,
         DATA_DIR + '/PATH/TO/VALIDATION_REFERENCE/' + TGT
     ],
     dictionaries=[
         DATA_DIR + '/PATH/TO/SOURCE_DICTIONARY_FROM_BASE_MODEL/' + SRC +
         '.json', DATA_DIR + '/PATH/TO/TARGET_DICTIONARY_FROM_BASE_MODEL/' +
         TGT + '.json'
     ],
     validFreq=100,
     sampleFreq=100,
     saveFreq=100,
     dispFreq=100,
     use_dropout=False,
     dropout_embedding=0.0,  # dropout for input embeddings (0: no dropout)
     dropout_hidden=0.0,  # dropout for hidden layers (0: no dropout)
     dropout_source=0.0,  # dropout source words (0: no dropout)
     dropout_target=0.0,  # dropout target words (0: no dropout)
     overwrite=True,
     objective='CL',
     cl_deterministic=True,
     cl_log=LOG + '.json',
     cl_reweigh=True,
     cl_word_rewards=True,
     cl_external_reward=NEMATUS + 'PATH/TO/WORD_REWARDS',
     external_validation_script='./validate_nlmaps.sh %s' % LOC)
if __name__ == '__main__':
    validerr = train(saveto='model/model.npz',
                     reload_=True,
                     dim_word=512,
                     dim=1028,
                     n_words=PE_VOCAB_SIZE,
                     n_words_src=SRC_AND_MT_VOCAB_SIZE,
                     decay_c=0.,
                     clip_c=1.,
                     lrate=0.0001,
                     optimizer='adadelta',
                     maxlen=100,
                     batch_size=32,
                     valid_batch_size=32,
                     datasets=[SRC_TRAIN, TRG_TRAIN],
                     valid_datasets=[SRC_DEV, TRG_DEV],
                     dictionaries=[
                         SRC_VOCAB, FACTOR_1_VOCAB, FACTOR_2_VOCAB,
                         FACTOR_3_VOCAB, PE_VOCAB
                     ],
                     factors=4,
                     dim_per_factor=[256, 64, 128, 64],
                     validFreq=5000,
                     dispFreq=500,
                     saveFreq=5000,
                     sampleFreq=5000,
                     overwrite=False,
                     external_validation_script='./validate.sh')
    print validerr
Exemple #11
0
if __name__ == '__main__':
    train(
        saveto=config['work.dir'] + '/' + 'model.npz',
        reload_=True,
        dim_word=500,
        dim=1024,
        n_words_src=50000,
        n_words=50000,
        decay_c=0.,
        clip_c=1.,
        lrate=0.0001,
        optimizer='adam',
        maxlen=100,
        batch_size=40,
        valid_batch_size=40,
        datasets=[config["train.src"], config["train.trg"]],
        valid_datasets=[config["valid.src"], config["valid.trg"]],
        dictionaries=[config["dict.src"], config["dict.trg"]],
        validFreq=10000,
        dispFreq=1000,
        saveFreq=10000,
        sampleFreq=10000,
        use_dropout=True,
        max_epochs=5,
        shuffle_each_epoch=True,
        dropout_embedding=0.1,  # dropout for input embeddings (0: no dropout)
        dropout_hidden=0.1,  # dropout for hidden layers (0: no dropout)
        dropout_source=0.1,  # dropout source words (0: no dropout)
        dropout_target=0.1,  # dropout target words (0: no dropout)
        overwrite=False,
        external_validation_script=config["valid.script"])
Exemple #12
0
import sys

VOCAB_SIZE = 35000
SRC = "lv"
TGT = "en"
DATA_DIR = "~/data/LvEn/"

from nematus.nmt import train

if __name__ == '__main__':
    validerr = train(saveto='model/LvEn.npz',
                     datasets=[
                         DATA_DIR + '/full.bpe.' + SRC,
                         DATA_DIR + '/full.bpe.' + TGT
                     ],
                     valid_datasets=[
                         DATA_DIR + '/dev.bpe.' + SRC,
                         DATA_DIR + '/dev.bpe.' + TGT
                     ],
                     dictionaries=[
                         DATA_DIR + '/full.bpe.' + SRC + '.json',
                         DATA_DIR + '/full.bpe.' + TGT + '.json'
                     ],
                     validFreq=15000,
                     dispFreq=2000,
                     saveFreq=15000,
                     sampleFreq=10000,
                     use_dropout=True,
                     external_validation_script='./validate.sh')
    print validerr
Exemple #13
0
if __name__ == '__main__':
    validerr = train(saveto='model/model.npz',
                    reload_=True,
                    dim_word=500,
                    dim=1024,
                    n_words=VOCAB_SIZE,
                    n_words_src=VOCAB_SIZE,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.0001,
                    optimizer='adadelta',
                    maxlen=50,
                    batch_size=80,
                    valid_batch_size=80,
                    datasets=[DATA_DIR + '/corpus.bpe.' + SRC, DATA_DIR + '/corpus.bpe.' + TGT],
                    valid_datasets=[DATA_DIR + '/newsdev2016.bpe.' + SRC, DATA_DIR + '/newsdev2016.bpe.' + TGT],
                    dictionaries=[DATA_DIR + '/corpus.bpe.' + SRC + '.json',DATA_DIR + '/corpus.bpe.' + TGT + '.json'],
                    validFreq=10000,
                    dispFreq=1000,
                    saveFreq=30000,
                    sampleFreq=10000,
                    use_dropout=False,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
                    external_validation_script='./validate.sh')
    print validerr
Exemple #14
0
    validerr = train(saveto=work_dir + '/model.npz',
                    reload_=True,
                    dim_word=620,
                    dim=620,
                    n_words=vocab_tgt,
                    n_words_src=vocab_src,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.001,
                    optimizer='adadelta',
                    maxlen=50,
                    batch_size=100,
                    valid_batch_size=100,
                    datasets=[train_src, train_tgt],
                    valid_datasets=[dev_src, dev_tgt],
                    dictionaries=[dict_src, dict_tgt],
                    validFreq=1000,
                    dispFreq=100000,
                    saveFreq=1000,
                    sampleFreq=100000,
                    use_dropout=True,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
                    finish_after=720000,
                    max_epochs=10,
                    external_validation_script=validation_script)
    print validerr
Exemple #15
0
if __name__ == '__main__':
    train(saveto= config['work.dir'] + '/' + 'model.npz',
        factors=[1, 1],
        encoders=2,
        reload_=True,
        dim_word=500,
        dim=1024,
        n_words_src=[30000, 30000],
        n_words=30000,
        decay_c=0.,
        clip_c=1.,
        lrate=0.0001,
        optimizer='adadelta',
        maxlen=100,
        batch_size=30,
        valid_batch_size=30,
        datasets=[config["train.src"], config["train.src"], config["train.trg"]],
        valid_datasets=[config["valid.src"], config["valid.src"], config["valid.trg"]],
        dictionaries=[[config["dict.src"]], [config["dict.src"]], config["dict.trg"]],
        validFreq=10000,
        dispFreq=1000,
        saveFreq=30000,
        sampleFreq=10000,
        use_dropout=False,
        shuffle_each_epoch=False,
        dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
        dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
        dropout_source=0.1, # dropout source words (0: no dropout)
        dropout_target=0.1, # dropout target words (0: no dropout)
        overwrite=False)
        #external_validation_script=config["valid.script"])
 validerr = train(
     saveto=model + '/model.npz',
     reload_=True,
     dim_word=500,
     dim=1024,
     n_words_tgt=VOCAB_SIZE_TGT,
     n_words_src=VOCAB_SIZE_SRC,
     decay_c=0.,
     clip_c=1.,
     lrate=0.0001,
     optimizer='adam',  #adam,adadelta
     maxlen=50,
     batch_size=80,
     valid_batch_size=80,
     datasets=[
         DATA_DIR + '/train.bpe.' + SRCTAG,
         DATA_DIR + '/train.bpe.' + TRGTAG
     ],
     valid_datasets=[
         DATA_DIR + '/dev.bpe.' + SRCTAG, DATA_DIR + '/dev.bpe.' + TRGTAG
     ],
     dictionaries=[
         DATA_DIR + '/train.bpe.' + SRCTAG + '.json',
         DATA_DIR + '/train.bpe.' + TRGTAG + '.json'
     ],
     validFreq=10000,  #10000,3000
     dispFreq=1000,  #1000,100
     saveFreq=30000,  #30000,10000
     #sampleFreq=10000,
     sampleFreq=0,  #不产生样本
     use_dropout=True,
     dropout_embedding=0.2,  # dropout for input embeddings (0: no dropout)
     dropout_hidden=0.2,  # dropout for hidden layers (0: no dropout)
     dropout_source=0.1,  # dropout source words (0: no dropout)
     dropout_target=0.1,  # dropout target words (0: no dropout)
     overwrite=False,
     external_validation_script='./validate.sh')
Exemple #17
0
# start training from best model from previous experiment
STARTING_MODEL = '/media/1tb_drive/nematus_ape_experiments/ape_qe/en-de_models/en-de_concat_src_mt/model/model.npz.npz.best_bleu'

if __name__ == '__main__':
    validerr = train(saveto=os.path.join('model/model.npz'),
                     prior_model=STARTING_MODEL,
                     reload_=True,
                     dim_word=256,
                     dim=1028,
                     n_words=PE_VOCAB_SIZE,
                     n_words_src=SRC_AND_MT_VOCAB_SIZE,
                     decay_c=0.,
                     clip_c=1.,
                     lrate=0.0001,
                     optimizer='adadelta',
                     maxlen=100,
                     batch_size=32,
                     valid_batch_size=32,
                     datasets=[SRC_TRAIN, TRG_TRAIN],
                     valid_datasets=[SRC_DEV, TRG_DEV],
                     dictionaries=[SRC_VOCAB, PE_VOCAB],
                     factors=1,
                     validFreq=1000,
                     dispFreq=500,
                     saveFreq=1000,
                     sampleFreq=1000,
                     overwrite=False,
                     external_validation_script='./fine_tune_validate.sh')
    print validerr
Exemple #18
0
 validerr = train(
     saveto='model/model.npz',
     finish_after=300000,
     reload_=True,
     dim_word=500,
     dim=1024,
     n_words=TRG_VOCAB_SIZE,
     n_words_src=SRC_VOCAB_SIZE,
     decay_c=0.,
     clip_c=1.,
     lrate=0.02,
     optimizer='adadelta',
     maxlen=50,
     batch_size=60,
     valid_batch_size=60,
     datasets=[
         DATA_DIR + '/' + TRAIN_FILE + '.bpe.' + SRC,
         DATA_DIR + '/' + TRAIN_FILE + '.bpe.' + TGT
     ],
     valid_datasets=[
         DATA_DIR + '/' + DEV_FILE + '.bpe.' + SRC,
         DATA_DIR + '/' + DEV_FILE + '.bpe.' + TGT
     ],
     dictionaries=[
         DATA_DIR + '/' + TRAIN_FILE + '.bpe.' + SRC + '.json',
         DATA_DIR + '/' + TRAIN_FILE + '.bpe.' + TGT + '.json'
     ],
     validFreq=3000,
     dispFreq=500,
     saveFreq=6000,
     sampleFreq=3000,
     use_dropout=False,
     dropout_embedding=0.2,  # dropout for input embeddings (0: no dropout)
     dropout_hidden=0.2,  # dropout for hidden layers (0: no dropout)
     dropout_source=0.1,  # dropout source words (0: no dropout)
     dropout_target=0.1,  # dropout target words (0: no dropout)
     overwrite=False,
     external_validation_script='./validate.sh')
Exemple #19
0
 validerr = train(
     alpha_c=0.,  # alignment regularization
     batch_size=80,
     clip_c=1.,
     datasets=[
         DATA_DIR + '/train.bpe.' + SRC, DATA_DIR + '/train.bpe.' + TGT
     ],
     decay_c=0.,
     decoder='gru_cond',
     dictionaries=[
         DATA_DIR + '/train.bpe.' + SRC + '.json',
         DATA_DIR + '/train.bpe.' + TGT + '.json'
     ],
     dim=1024,
     dim_per_factor=None,
     dim_word=500,
     dispFreq=1000,
     dropout_embedding=0.2,  # dropout for input embeddings 0: no dropout
     dropout_hidden=0.2,  # dropout for hidden layers 0: no dropout
     dropout_source=0.1,  # dropout source words 0: no dropout
     dropout_target=0.1,  # dropout target words 0: no dropout
     encoder='gru',
     external_validation_script='./validate.sh',
     factors=1,
     finetune=False,
     finish_after=10000000,  # finish after this many updates
     lrate=0.0001,
     max_epochs=5000,
     maxibatch_size=20,
     maxlen=50,
     n_words=VOCAB_SIZE,
     n_words_src=VOCAB_SIZE,
     optimizer='adadelta',
     overwrite=False,
     reload_=True,
     sampleFreq=1000,
     saveFreq=1000,
     saveto='nematus-model/model.npz',
     use_dropout=False,
     validFreq=1000,
     valid_batch_size=80,
     valid_datasets=[
         DATA_DIR + '/tune.bpe.' + SRC, DATA_DIR + '/tune.bpe.' + TGT
     ])