def main(_):
    if params.test:
        test(params)
    elif params.preprocess:
        preprocess(params)
    else:
        train(params)
Example #2
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=20,
        batch_size=16,
        valid_batch_size=16,
        validFreq=1000,
        dispFreq=1,
        saveFreq=1000,
        sampleFreq=1000,
        dataset='wmt14enfr',
        dictionary=
        '/data/lisatmp3/chokyun/wmt14/parallel-corpus/en-fr/vocab.fr.pkl',
        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #3
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
<<<<<<< HEAD
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0], 
                     patience=1000,
                     maxlen=50,
                     batch_size=32,
                     valid_batch_size=32,
                     validFreq=100,
                     dispFreq=10,
                     saveFreq=100,
                     sampleFreq=100,
                     datasets=['../data/hal/train/tok/en', 
                               '../data/hal/train/tok/ja'],
                     valid_datasets=['../data/hal/dev/tok/en',
                                     '../data/hal/dev/tok/ja'],
                     dictionaries=['../data/hal/train/tok/en.pkl', 
                                   '../data/hal/train/tok/ja.pkl'],
                     use_dropout=params['use-dropout'][0])
Example #4
0
File: train.py Project: isofun/NLP
def main(job_id, params):
    # print params

    validerr = train(
        saveto=params['saveto'][0],
        loadfrom=params['loadfrom'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words_src=params['n-words-src'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=80,
        valid_batch_size=80,
        validFreq=100,
        dispFreq=10,
        saveFreq=20000,
        sampleFreq=100,
        max_epochs=5000,  # max iteration
        patience=1000,  # early stop patience with BLEU score
        finish_after=1000000,  # max updates
        datasets=['../data/cn.1w_with_unk.txt'],
        valid_datasets=['../NIST/MT02/en0'],
        dictionaries=['../data/en.txt.shuf.pkl'],
        use_dropout=params['use-dropout'][0],
        overwrite=False,
        **bleuvalid_params)

    return validerr
Example #5
0
def main(job_id, params):
    print(params)
    validerr = train(datasets=[
        'data/training/news-commentary-v9.fr-en.fr.tok',
        'data/training/news-commentary-v9.fr-en.en.tok'
    ],
                     valid_datasets=[
                         'data/dev/newstest2013.fr.tok',
                         'data/dev/newstest2013.en.tok'
                     ],
                     dictionaries=[
                         'data/training/news-commentary-v9.fr-en.fr.tok.pkl',
                         'data/training/news-commentary-v9.fr-en.en.tok.pkl'
                     ],
                     saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     maxlen=15,
                     batch_size=32,
                     valid_batch_size=32,
                     validFreq=100,
                     dispFreq=100,
                     saveFreq=100,
                     sampleFreq=1000,
                     patience=10,
                     use_dropout=params['use-dropout'][0],
                     overwrite=False)
    return validerr
Example #6
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0],
                                        maxlen_src=300,
                                        maxlen_trg= 75,
                                        batch_size=32,
                                        valid_batch_size=32,
					datasets=['/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/test.content.tok',
					'/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/test.summary.tok'],
					valid_datasets=['/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/train100.content.tok',
					'/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/train100.summary.tok'],
					dictionaries=['/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/dict.content.pkl',
					"/Users/HyNguyen/Documents/Research/Data/stories_4nncnn/dict.summary.pkl"],
                                        validFreq=5000,
                                        dispFreq=10,
                                        saveFreq=5000,
                                        sampleFreq=1000,
                                        use_dropout=params['use-dropout'][0],
                                        overwrite=False)
    return validerr
Example #7
0
def main(job_id, params):
    print ('timestamp {} {}'.format('running',time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    print (params)
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     patience=10000,
                     maxlen=60,
                     batch_size=80,
                     validFreq_fine=2000,
                     validFreq=10000,
                     val_burn_in=20000,
                     val_burn_in_fine=400000,
                     dispFreq=20,
                     saveFreq=2000,
                     sampleFreq=200,
                     datasets=['/data/ycli/resource/wmt2017/deen/corpus.tc.en.bpe',
                               '/data/ycli/resource/wmt2017/deen/corpus.tc.de.bpe'],
                     valid_datasets=['/data/ycli/resource/wmt2017/deen/valid/valid_en_bpe',
                                     '/data/ycli/resource/wmt2017/deen/valid/valid_de_bpe',
                                     './data/valid_out'],
                     dictionaries=['/data/ycli/resource/wmt2017/deen/vocab/v30-bpe/vocab_en.pkl',
                                   '/data/ycli/resource/wmt2017/deen/vocab/v30-bpe/vocab_de.pkl'],
                     use_dropout=params['use-dropout'][0],
                     overwrite=False,
                     valid_mode=params['valid_mode'][1],
                     bleu_script=params['bleu_script'][0])
    return validerr
Example #8
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        encoder='gru',
        decoder='gru_cond',
        hiero='gru_hiero',  # or None
        n_words_src=params['n-words-src'][0],
        n_words=params['n-words'][0],
        decay_c=params['decay-c'][0],
        alpha_c=params['alpha-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=64,
        valid_batch_size=64,
        validFreq=1000,
        dispFreq=1,
        saveFreq=500,
        sampleFreq=10,
        dataset='openmt15zhen',
        dictionary='./openmt15/vocab.en.pkl',
        dictionary_src='./openmt15/vocab.zh.pkl',
        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #9
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     patience=1000,
                     maxlen=50,
                     batch_size=32,
                     valid_batch_size=32,
                     validFreq=100,
                     dispFreq=10,
                     saveFreq=1000,
                     sampleFreq=100,
                     datasets=['/home/zhouh/Data/nmt/corpus.ch',
                               '/home/zhouh/Data/nmt/corpus.en'],
                     valid_datasets=['/home/zhouh/Data/nmt/devntest/MT02/MT02.src',
                                     '/home/zhouh/Data/nmt/devntest/MT02/reference0'],
                     dictionaries=['/home/zhouh/Data/nmt/corpus.ch.pkl',
                                   '/home/zhouh/Data/nmt/corpus.en.pkl'],
                     use_dropout=params['use-dropout'][0],
                     overwrite=False)
    return validerr
Example #10
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        clip_c=params['clip-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0],
                                        maxlen=250,
                                        batch_size=32,
                                        valid_batch_size=32,
					datasets=['../data/newstest2011.content.tok','../data/newstest2011.summary.tok'],
					valid_datasets=['../data/newstest2011.content.tok','../data/newstest2011.summary.tok'],
					dictionaries=['../data/all_content-summary.content.tok.bpe.pkl',"../data/all_content-summary.summary.tok.bpe.pkl"],
                                        validFreq=5000,
                                        dispFreq=10,
                                        saveFreq=5000,
                                        sampleFreq=1000,
                                        use_dropout=params['use-dropout'][0],
                                        overwrite=False)
    return validerr
Example #11
0
def main(job_id, params):
    print params
    basedir = '/data/lisatmp3/firatorh/nmt/europarlv7'
    validerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        clip_c=params['clip-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0],
                                        maxlen=15,
                                        batch_size=32,
                                        valid_batch_size=32,
					datasets=['%s/europarl-v7.fr-en.fr.tok'%basedir,
					'%s/europarl-v7.fr-en.en.tok'%basedir],
					valid_datasets=['%s/newstest2011.fr.tok'%basedir,
					'%s/newstest2011.en.tok'%basedir],
					dictionaries=['%s/europarl-v7.fr-en.fr.tok.pkl'%basedir,
					'%s/europarl-v7.fr-en.en.tok.pkl'%basedir],
                                        validFreq=500000,
                                        dispFreq=1,
                                        saveFreq=100,
                                        sampleFreq=50,
                                        use_dropout=params['use-dropout'][0])
    return validerr
Example #12
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        encoder='gru',
                                        decoder='gru_cond_simple',
                                        hiero=None, #'gru_hiero', # or None
                                        n_words_src=params['n-words-src'][0],
                                        n_words=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        alpha_c=params['alpha-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=100,
                                        batch_size=64,
                                        valid_batch_size=64,
                                        validFreq=1000,
                                        dispFreq=1,
                                        saveFreq=500,
                                        sampleFreq=10,
                                        dataset='stan',
                                        dictionary='./stan/vocab_and_data_sub_europarl/vocab_sub_europarl.fr.pkl',
                                        dictionary_src='./stan/vocab_and_data_sub_europarl/vocab_sub_europarl.en.pkl',
                                        use_dropout=False)
    return validerr
Example #13
0
def main(job_id, params):
    print "Anything printed here will end up in the output directory for job #%d" % job_id
    print params
    trainerr, validerr, testerr = train(
        saveto=params["model"][0],
        reload_=params["reload"][0],
        dim_word=params["dim_word"][0],
        dim=params["dim"][0],
        n_words=params["n-words"][0],
        n_words_src=params["n-words"][0],
        decay_c=params["decay-c"][0],
        lrate=params["learning-rate"][0],
        optimizer=params["optimizer"][0],
        maxlen=20,
        batch_size=16,
        valid_batch_size=16,
        validFreq=1000,
        dispFreq=1,
        saveFreq=1000,
        sampleFreq=1000,
        dataset="wmt14enfr",
        dictionary="/data/lisatmp3/chokyun/wmt14/parallel-corpus/en-fr/vocab.fr.pkl",
        use_dropout=True if params["use-dropout"][0] else False,
    )
    return validerr
Example #14
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words-src'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     patience=1000,
                     maxlen=50,
                     batch_size=32,
                     valid_batch_size=32,
                     validFreq=100,
                     dispFreq=10,
                     saveFreq=100,
                     sampleFreq=100,
                     datasets=['../../data/train.en.tok',
                               '../../data/train.de.tok'],
                     valid_datasets=['../../data/val.en.tok',
                                     '../../data/val.de.tok'],
                     dictionaries=['../../data/train.en.tok.pkl',
                                   '../../data/train.de.tok.pkl'],
                     use_dropout=params['use-dropout'][0],
                     overwrite=False)
    return validerr
Example #15
0
def main(job_id, params):
    print params
    basedir = "/data/lisatmp3/firatorh/nmt/europarlv7"
    validerr = train(
        saveto=params["model"][0],
        reload_=params["reload"][0],
        dim_word=params["dim_word"][0],
        dim=params["dim"][0],
        n_words=params["n-words"][0],
        n_words_src=params["n-words"][0],
        decay_c=params["decay-c"][0],
        clip_c=params["clip-c"][0],
        lrate=params["learning-rate"][0],
        optimizer=params["optimizer"][0],
        maxlen=15,
        batch_size=32,
        valid_batch_size=32,
        datasets=["%s/europarl-v7.fr-en.fr.tok" % basedir, "%s/europarl-v7.fr-en.en.tok" % basedir],
        valid_datasets=["%s/newstest2011.fr.tok" % basedir, "%s/newstest2011.en.tok" % basedir],
        dictionaries=["%s/europarl-v7.fr-en.fr.tok.pkl" % basedir, "%s/europarl-v7.fr-en.en.tok.pkl" % basedir],
        validFreq=500000,
        dispFreq=1,
        saveFreq=100,
        sampleFreq=50,
        use_dropout=params["use-dropout"][0],
        overwrite=False,
    )
    return validerr
Example #16
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words-src'][0],
        decay_c=params['decay-c'][0],
        alpha_c=params['alpha-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=20,
        batch_size=16,
        valid_batch_size=16,
        validFreq=1000,
        dispFreq=1,
        saveFreq=500,
        sampleFreq=10,
        dataset='iwslt14zhen',
        dictionary=
        '/data/lisatmp3/firatorh/nmt/zh-en_lm/trainedModels/unionFinetuneRnd/union_dict.pkl',
        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #17
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=20,
        batch_size=160,
        valid_batch_size=16,
        validFreq=1000,
        dispFreq=1,
        saveFreq=1000,
        sampleFreq=1000,
        dataset='mydata',
        dictionary='v_dst_wi.pkl',
        dictionary_src='v_src_wi.pkl',
        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #18
0
def main(job_id, params):
    print params
    username = os.environ['USER']
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        datasets=[
            '/ichec/home/users/%s/data/all.en.concat.shuf.gz' % username,
            '/ichec/home/users/%s/data/all.fr.concat.shuf.gz' % username],
        valid_datasets=[
            '/ichec/home/users/%s/data/newstest2011.en.tok' % username,
            '/ichec/home/users/%s/data/newstest2011.fr.tok' % username],
        dictionaries=[
            '/ichec/home/users/%s/data/all.en.concat.gz.pkl' % username,
            '/ichec/home/users/%s/data/all.fr.concat.gz.pkl' % username],
        validFreq=5000,
        dispFreq=10,
        saveFreq=5000,
        sampleFreq=1000,
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #19
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        clip_c=params['clip-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=50,
                                        batch_size=32,
                                        valid_batch_size=32,
					datasets=['/home/xqli/data/big/big.ch', 
					'/home/xqli/data/big/big.en'],
					valid_datasets=['/home/xqli/data/nist/03.seg', 
					'/home/xqli/data/nist/03.en'],
					dictionaries=['/home/xqli/data/big/big.ch.pkl', 
					'/home/xqli/data/big/big.en.pkl'],
                                        validFreq=5000,
                                        dispFreq=10,
                                        saveFreq=5000,
                                        sampleFreq=1000,
                                        use_dropout=params['use-dropout'][0])
    return validerr
Example #20
0
def main(job_id, params):
    print params

    bleu_params = {
        'valid_path': '../data/validate/',
        'temp_dir': '../temp/',
        'translate_script': 'translate_gpu.py',
        'bleu_script': 'multi-bleu.perl'
    }
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=1000,
        patience_bleu=100,
        maxlen=100,
        batch_size=100,
        valid_batch_size=100,
        validFreq=100,
        dispFreq=10,
        saveFreq=100,
        sampleFreq=100,
        datasets=['../data/cn.txt.sort', '../data/en.txt.sort'],
        valid_datasets=['../data/MT02.cn.dev', '../data/MT02.en.dev'],
        dictionaries=['../data/cn.txt.pkl', '../data/en.txt.pkl'],
        use_dropout=params['use-dropout'][0],
        overwrite=True,
        **bleu_params)
    return validerr
Example #21
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        datasets=[
            '/home/ubuntu/codes/dl4mt-tutorial/data/europarl-v7.fr-en.en.tok',
            '/home/ubuntu/codes/dl4mt-tutorial/data/europarl-v7.fr-en.fr.tok'
        ],
        valid_datasets=[
            '/home/ubuntu/codes/dl4mt-tutorial/data/newstest2011.en.tok',
            '/home/ubuntu/codes/dl4mt-tutorial/data/newstest2011.fr.tok'
        ],
        dictionaries=[
            '/home/ubuntu/codes/dl4mt-tutorial/data/europarl-v7.fr-en.en.tok.pkl',
            '/home/ubuntu/codes/dl4mt-tutorial/data/europarl-v7.fr-en.fr.tok.pkl'
        ],
        validFreq=5000,
        dispFreq=10,
        saveFreq=5000,
        sampleFreq=1000,
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #22
0
def main(job_id, params):
    print params
    username = os.environ['USER']
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        datasets=[
            '/ichec/home/users/%s/data/all.en.concat.shuf.gz' % username,
            '/ichec/home/users/%s/data/all.fr.concat.shuf.gz' % username
        ],
        valid_datasets=[
            '/ichec/home/users/%s/data/newstest2011.en.tok' % username,
            '/ichec/home/users/%s/data/newstest2011.fr.tok' % username
        ],
        dictionaries=[
            '/ichec/home/users/%s/data/all.en.concat.gz.pkl' % username,
            '/ichec/home/users/%s/data/all.fr.concat.gz.pkl' % username
        ],
        validFreq=5000,
        dispFreq=10,
        saveFreq=5000,
        sampleFreq=1000,
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #23
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim_enc=params['dim_enc'],  # multi layer
        dim_dec=params['dim_dec'][0],
        dim_attention=params['dim_attention'][0],
        dim_coverage=params['dim_coverage'][0],
        kernel_coverage=params['kernel_coverage'][0],
        down_sample=params['down_sample'],
        dim_target=params['dim_target'][0],
        dim_feature=params['dim_feature'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        model_cost_coeff=params['model_cost_coeff'][0],
        optimizer=params['optimizer'][0],
        patience=15,
        maxlen=params['maxlen'][0],
        batch_size=8,
        valid_batch_size=8,
        validFreq=-1,
        dispFreq=100,
        saveFreq=-1,
        sampleFreq=-1,
        datasets=['../data/online-train.pkl', '../data/train_data_v1.txt'],
        valid_datasets=['../data/online-test.pkl', '../data/test_data_v1.txt'],
        dictionaries=['../data/dictionary.txt'],
        valid_output=['./result/valid_decode_result.txt'],
        valid_result=['./result/valid.wer'],
        use_dropout=params['use-dropout'][0])
    return validerr
Example #24
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words-src'][0],
                                        decay_c=params['decay-c'][0],
                                        alpha_c=params['alpha-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        encoder='gru',
                                        decoder='gru_cond', #'gru_cond_simple',
                                        maxlen=30,
                                        batch_size=128,
                                        valid_batch_size=128,
                                        validFreq=1000,
                                        dispFreq=1,
                                        saveFreq=500,
                                        sampleFreq=500,
                                        dataset='trans_enhi', 
                                        dictionary='/data/lisatmp3/chokyun/transliteration/TranslitDataset/vocab.hi.pkl',
                                        dictionary_src='/data/lisatmp3/chokyun/transliteration/TranslitDataset/vocab.en.pkl',
                                        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #25
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0], 
                     patience=1000,
                     maxlen=50,
                     batch_size=32,
                     valid_batch_size=32,
                     validFreq=100,
                     dispFreq=10,
                     saveFreq=100,
                     sampleFreq=100,
                     datasets=['/veu4/usuaris29/mruiz/tfg-imagenes/train/train.un.zh', 
                               '/veu4/usuaris29/mruiz/tfg-imagenes/train/train.un.es'],
                     valid_datasets=['/veu4/usuaris29/mruiz/tfg-imagenes/dev/dev.un.zh',
                                     '/veu4/usuaris29/mruiz/tfg-imagenes/dev/dev.un.es'],
                     dictionaries=['/veu4/usuaris29/mruiz/tfg-imagenes/train/vocab.zh.pkl', 
                                   '/veu4/usuaris29/mruiz/tfg-imagenes/train/vocab.es.pkl'],
                     use_dropout=params['use-dropout'][0])
    return validerr
Example #26
0
def main(job_id, params):
    print(params)
    validerr = train(
        datasets=['data/all.en.concat.shuf.gz', 'data/all.fr.concat.shuf.gz'],
        valid_datasets=[
            'data/newstest2011.en.tok', 'data/newstest2011.fr.tok'
        ],
        dictionaries=[
            'data/all.en.concat.gz.pkl', 'data/all.fr.concat.gz.pkl'
        ],
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        validFreq=5000,
        dispFreq=10,
        saveFreq=5000,
        sampleFreq=1000,
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #27
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=1000,
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        validFreq=100,
        dispFreq=10,
        saveFreq=100,
        sampleFreq=100,
        datasets=['../data/hal/train/tok/en', '../data/hal/train/tok/fr'],
        valid_datasets=['../data/hal/dev/tok/en', '../data/hal/dev/tok/fr'],
        dictionaries=[
            '../data/hal/train/tok/en.pkl', '../data/hal/train/tok/fr.pkl'
        ],
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #28
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=50,
                                        batch_size=32,
                                        valid_batch_size=32,
					datasets=['/ichec/home/users/%s/data/europarl-v7.fr-en.en.tok'%os.environ['USER'], 
					'/ichec/home/users/%s/data/europarl-v7.fr-en.fr.tok'%os.environ['USER']],
					valid_datasets=['/ichec/home/users/%s/data/newstest2011.en.tok'%os.environ['USER'], 
					'/ichec/home/users/%s/data/newstest2011.fr.tok'%os.environ['USER']],
					dictionaries=['/ichec/home/users/%s/data/europarl-v7.fr-en.en.tok.pkl'%os.environ['USER'], 
					'/ichec/home/users/%s/data/europarl-v7.fr-en.fr.tok.pkl'%os.environ['USER']],
                                        validFreq=5000,
                                        dispFreq=10,
                                        saveFreq=5000,
                                        sampleFreq=1000,
                                        use_dropout=params['use-dropout'][0])
    return validerr
Example #29
0
def main(job_id, params):
    print params
    basedir = '/data/lisatmp3/firatorh/nmt/europarlv7'
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     maxlen=15,
                     batch_size=32,
                     valid_batch_size=32,
                     datasets=[
                         '%s/europarl-v7.fr-en.fr.tok' % basedir,
                         '%s/europarl-v7.fr-en.en.tok' % basedir
                     ],
                     valid_datasets=[
                         '%s/newstest2011.fr.tok' % basedir,
                         '%s/newstest2011.en.tok' % basedir
                     ],
                     dictionaries=[
                         '%s/europarl-v7.fr-en.fr.tok.pkl' % basedir,
                         '%s/europarl-v7.fr-en.en.tok.pkl' % basedir
                     ],
                     validFreq=500000,
                     dispFreq=1,
                     saveFreq=100,
                     sampleFreq=50,
                     use_dropout=params['use-dropout'][0])
    return validerr
Example #30
0
def main(job_id, params):
    print params
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim_enc=params['dim_enc'], # multi layer
                     dim_dec=params['dim_dec'][0], 
                     dim_coverage=params['dim_coverage'][0],
                     down_sample=params['down_sample'],
                     dim_target=params['dim_target'][0],
                     dim_feature=params['dim_feature'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     gamma=params['alphas-gamma'][0],
                     optimizer=params['optimizer'][0], 
                     patience=15,
                     maxlen=params['maxlen'][0],
                     batch_size=8,
                     valid_batch_size=8,
                     validFreq=-1,
                     dispFreq=100,
                     saveFreq=-1,
                     sampleFreq=-1,
          datasets=['../data/online-train.pkl',
                    '../data/train_caption.txt',
                    '../data/align-online-train.pkl'],
          valid_datasets=['../data/online-test.pkl',
                    '../data/test_caption.txt'],
          dictionaries=['../data/dictionary.txt'],
                         use_dropout=params['use-dropout'][0])
    return validerr
Example #31
0
    def test_DPM(self):
        """
        Verifies that the correct cost is calculated for DPM.
        :return: 0 on success
        """
        logging.info("Starting Test for DPM..")
        working_dir = DATA_DIR + "DPM/"
        prepare_base_model(working_dir)
        cost, prepared_rewards, prepared_word_propensities, reweigh_sum = \
            train(saveto="%smodel.npz" % working_dir,
                  reload_=True,
                  shuffle_each_epoch=False,
                  datasets=DATA_SETS,
                  dictionaries=DICTIONARIES,
                  objective='CL',
                  cl_deterministic=True,
                  cl_log=LOG_PREFIX + ".json",
                  unittest=True)

        true_cost = -0.7047157462492611
        self.assertAlmostEqual(true_cost, cost)
        true_prepared_rewards = numpy.array([
            1., 0.8222672, 1., 0.8274377, 0.7813821, 0.7813821, 0.6673543, 1.,
            0.6093617, 1.
        ])
        numpy.testing.assert_almost_equal(true_prepared_rewards,
                                          prepared_rewards)
        true_prepared_word_propensities = numpy.zeros(shape=(22, 10))
        numpy.testing.assert_almost_equal(true_prepared_word_propensities,
                                          prepared_word_propensities)
        true_reweigh_sum = 0.0
        numpy.testing.assert_almost_equal(true_reweigh_sum, reweigh_sum)
        shutil.rmtree(working_dir)
        logging.info("Finished Test for DPM..")
        return 0
Example #32
0
    def test_DPM_T_OSL(self):
        """
        Verifies that the correct cost is calculated for DPM+T+OSL
        :return: 0 on success
        """
        logging.info("Starting Test for DPM+T+OSL..")
        working_dir = DATA_DIR + "DPM_T_OSL/"
        prepare_base_model(working_dir)
        cost, prepared_rewards, prepared_word_propensities, reweigh_sum = \
            train(saveto="%smodel.npz" % working_dir,
                  reload_=True,
                  shuffle_each_epoch=False,
                  datasets=DATA_SETS,
                  dictionaries=DICTIONARIES,
                  objective='CL',
                  cl_deterministic=True,
                  cl_log=LOG_PREFIX + ".json",
                  cl_external_reward=WORD_REWARD,
                  cl_reweigh=True,
                  cl_word_rewards=True,
                  unittest=True)

        true_cost = -1.1665413125898798
        self.assertAlmostEqual(true_cost, cost)
        true_prepared_rewards = numpy.array(
            [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 0., 1., 0., 1.],
             [1., 1., 1., 1., 0., 0., 0., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             [1., 0., 1., 1., 1., 1., 1., 1., 0., 1.],
             [1., 1., 1., 0., 1., 1., 0., 0., 0., 0.],
             [1., 1., 1., 1., 1., 1., 0., 0., 0., 0.],
             [1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],
             [1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],
             [1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],
             [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
             [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
             [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
             [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
             [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
             [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
        numpy.testing.assert_almost_equal(true_prepared_rewards,
                                          prepared_rewards)
        true_prepared_word_propensities = numpy.zeros(shape=(22, 10))
        numpy.testing.assert_almost_equal(true_prepared_word_propensities,
                                          prepared_word_propensities)
        true_reweigh_sum = 0.826496987098
        numpy.testing.assert_almost_equal(true_reweigh_sum, reweigh_sum)
        shutil.rmtree(working_dir)
        logging.info("Finished Test for DPM+T+OSL..")
        return 0
Example #33
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim_relation=params['dim_relation'][0],
        dim_enc=params['dim_enc'],  # multi layer
        dim_dec=params['dim_dec'][0],
        dim_coverage=params['dim_coverage'][0],
        down_sample=params['down_sample'],
        dim_attention=params['dim_attention'][0],
        dim_reattention=params['dim_reattention'][0],
        dim_target=params['dim_target'][0],
        dim_retarget=params['dim_retarget'][0],
        dim_feature=params['dim_feature'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        la=params['lambda-align'][0],
        lb=params['lambda-realign'][0],
        optimizer=params['optimizer'][0],
        patience=12,
        max_xlen=params['max_xlen'][0],
        max_ylen=params['max_ylen'][0],
        batch_size=8,
        valid_batch_size=8,
        validFreq=-1,
        validStart=-10,
        dispFreq=100,
        saveFreq=-1,
        sampleFreq=-1,
        datasets=[
            root_path + '9feature-train-dis-0.005-revise-pad-v5.pkl',
            root_path + '9feature-train-dis-0.005-revise-pad-v5-mask.pkl',
            root_path + 'train-label-r1.pkl',
            root_path + 'align-train-dis-0.005-revise-pad-v5-r1.pkl',
            root_path + 'related-align-train-dis-0.005-revise-pad-v5-r1.pkl'
        ],
        valid_datasets=[
            root_path + '9feature-valid-dis-0.005-revise-pad-v5.pkl',
            root_path + '9feature-valid-dis-0.005-revise-pad-v5-mask.pkl',
            root_path + 'test-label-r1.pkl',
            root_path + 'align-test-dis-0.005-revise-pad-v5-r1.pkl',
            root_path + 'related-align-test-dis-0.005-revise-pad-v5-r1.pkl'
        ],
        dictionaries=[
            root_path + 'dictionary.txt',
            root_path + '6relation_dictionary.txt',
        ],
        valid_output=[
            './result/symbol_relation/', './result/alignment/',
            './result/relation_alignment/'
        ],
        valid_result=['./result/valid.cer'],
        use_dropout=params['use-dropout'][0])
    return validerr
def main(job_id, params):
    re_load = False
    save_file_name = 'bpe2char_biscale_decoder_attc_adam'
    source_dataset = params['train_data_path'] + params['source_dataset']
    target_dataset = params['train_data_path'] + params['target_dataset']
    valid_source_dataset = params['dev_data_path'] + params[
        'valid_source_dataset']
    valid_target_dataset = params['dev_data_path'] + params[
        'valid_target_dataset']
    source_dictionary = params['train_data_path'] + params['source_dictionary']
    target_dictionary = params['train_data_path'] + params['target_dictionary']

    print params, params['save_path'], save_file_name
    validerr = train(
        max_epochs=int(params['max_epochs']),
        patience=int(params['patience']),
        dim_word=int(params['dim_word']),
        dim_word_src=int(params['dim_word_src']),
        save_path=params['save_path'],
        save_file_name=save_file_name,
        re_load=re_load,
        enc_dim=int(params['enc_dim']),
        dec_dim=int(params['dec_dim']),
        n_words=int(params['n_words']),
        n_words_src=int(params['n_words_src']),
        decay_c=float(params['decay_c']),
        lrate=float(params['learning_rate']),
        optimizer=params['optimizer'],
        maxlen=int(params['maxlen']),
        maxlen_trg=int(params['maxlen_trg']),
        maxlen_sample=int(params['maxlen_sample']),
        batch_size=int(params['batch_size']),
        valid_batch_size=int(params['valid_batch_size']),
        sort_size=int(params['sort_size']),
        validFreq=int(params['validFreq']),
        dispFreq=int(params['dispFreq']),
        saveFreq=int(params['saveFreq']),
        sampleFreq=int(params['sampleFreq']),
        clip_c=int(params['clip_c']),
        datasets=[source_dataset, target_dataset],
        valid_datasets=[valid_source_dataset, valid_target_dataset],
        dictionaries=[source_dictionary, target_dictionary],
        use_dropout=int(params['use_dropout']),
        source_word_level=int(params['source_word_level']),
        target_word_level=int(params['target_word_level']),
        layers=layers,
        save_every_saveFreq=1,
        use_bpe=1,
        init_params=init_params,
        build_model=build_model,
        build_sampler=build_sampler,
        gen_sample=gen_sample,
    )
    return validerr
def main(job_id, params):
    re_load = False
    save_file_name = 'bpe2bpe_two_layer_gru_decoder_adam'
    source_dataset = params['train_data_path'] + params['source_dataset']
    target_dataset = params['train_data_path'] + params['target_dataset']
    valid_source_dataset = params['dev_data_path'] + params['valid_source_dataset']
    valid_target_dataset = params['dev_data_path'] + params['valid_target_dataset']
    source_dictionary = params['train_data_path'] + params['source_dictionary']
    target_dictionary = params['train_data_path'] + params['target_dictionary']

    print params, params['save_path'], save_file_name
    validerr = train(
        max_epochs=int(params['max_epochs']),
        patience=int(params['patience']),
        dim_word=int(params['dim_word']),
        dim_word_src=int(params['dim_word_src']),
        save_path=params['save_path'],
        save_file_name=save_file_name,
        re_load=re_load,
        enc_dim=int(params['enc_dim']),
        dec_dim=int(params['dec_dim']),
        n_words=int(params['n_words']),
        n_words_src=int(params['n_words_src']),
        decay_c=float(params['decay_c']),
        lrate=float(params['learning_rate']),
        optimizer=params['optimizer'],
        maxlen=int(params['maxlen']),
        maxlen_trg=int(params['maxlen_trg']),
        maxlen_sample=int(params['maxlen_sample']),
        batch_size=int(params['batch_size']),
        valid_batch_size=int(params['valid_batch_size']),
        sort_size=int(params['sort_size']),
        validFreq=int(params['validFreq']),
        dispFreq=int(params['dispFreq']),
        saveFreq=int(params['saveFreq']),
        sampleFreq=int(params['sampleFreq']),
        clip_c=int(params['clip_c']),
        datasets=[source_dataset, target_dataset],
        valid_datasets=[valid_source_dataset, valid_target_dataset],
        dictionaries=[source_dictionary, target_dictionary],
        use_dropout=int(params['use_dropout']),
        source_word_level=int(params['source_word_level']),
        target_word_level=int(params['target_word_level']),
        layers=layers,
        save_every_saveFreq=1,
        use_bpe=1,
        init_params=init_params,
        build_model=build_model,
        build_sampler=build_sampler,
        gen_sample=gen_sample
    )
    return validerr
Example #36
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        bn_saveto=params['bn_model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim_dec=params['dim_dec'][0],
        dim_attention=params['dim_attention'][0],
        dim_coverage=params['dim_coverage'][0],
        kernel_coverage=params['kernel_coverage'],
        kernel_conv1=params['kernel_conv1'],
        stride_conv1=params['stride_conv1'],
        channel_conv1=params['channel_conv1'][0],
        GrowthRate=params['GrowthRate'][0],
        DenseBlock=params['DenseBlock'],
        Bottleneck=params['Bottleneck'][0],
        Transition=params['Transition'][0],
        dim_target=params['dim_target'][0],
        input_channels=params['input_channels'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=15,
        maxlen=params['maxlen'][0],
        maxImagesize=params['maxImagesize'][0],
        batch_Imagesize=500000,
        valid_batch_Imagesize=500000,
        batch_size=16,
        valid_batch_size=16,
        validFreq=-1,
        dispFreq=100,
        saveFreq=-1,
        sampleFreq=-1,
        datasets=['../data/offline-train.pkl', '../data/train_data_v1.txt'],
        valid_datasets=[
            '../data/offline-test.pkl', '../data/test_data_v1.txt'
        ],
        dictionaries=['../data/dictionary.txt'],
        valid_output=['./result/valid_decode_result.txt'],
        valid_result=['./result/valid.wer'],
        use_dropout=params['use-dropout'][0])
    return validerr
Example #37
0
def main(job_id, params):
    print params
    trainerr, validerr, testerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=50,
                                        batch_size=4,
                                        valid_batch_size=4,
                                        validFreq=5000,
                                        dispFreq=10,
                                        saveFreq=5000,
                                        sampleFreq=10,
                                        use_dropout=params['use-dropout'][0])
    return validerr
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim_chunk=params['dim_chunk'][0],
        dim_chunk_hidden=params['dim_chunk_hidden'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=1000,
        batch_size=2,
        valid_batch_size=2,
        validFreq=3,
        dispFreq=10,
        saveFreq=10,
        sampleFreq=10,
        maxlen_chunk=30,  # maximum length of the description
        maxlen_chunk_words=50,  # maximum length of the description
        datasets=[
            '/home/zhouh/workspace/python/nmtdata/small.ch',
            '/home/zhouh/workspace/python/nmtdata/small.en.chunked'
        ],
        valid_datasets=[
            '/home/zhouh/workspace/python/nmtdata/small.ch',
            '/home/zhouh/workspace/python/nmtdata/small.en.chunked'
        ],
        dictionaries=[
            '/home/zhouh/workspace/python/nmtdata/small.ch.pkl',
            '/home/zhouh/workspace/python/nmtdata/small.en.chunked.pkl'
        ],
        dictionary_chunk=
        '/home/zhouh/workspace/python/nmtdata/small.en.chunked.chunktag.pkl',
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #39
0
def main():
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    root_log_dir = "./logs/"
    exp_name = "encDecAtt_%s" % timestamp

    train_err, valid_err, test_err = train(
        dim_word=dim_word,
        dim=dim,
        encoder=encoder,
        decoder=decoder,
        hiero=None,  # 'gru_hiero', # or None
        patience=patience,
        max_epochs=max_epochs,
        dispFreq=dispFreq,
        decay_c=0.,
        alpha_c=0.,
        diag_c=0.,
        lrate=0.01,
        n_words_src=n_words_src,
        n_words=n_words,
        maxlen=maxlen,
        optimizer=optimizer,
        batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        saveto=saveto,
        validFreq=validFreq,
        saveFreq=saveFreq,
        sampleFreq=sampleFreq,
        dataset=dataset,
        dictionary=dictionary,
        dictionary_src=dictionary,
        use_dropout=False,
        reload_=reload_,
        correlation_coeff=0.1,
        clip_c=1.,
        dataset_=dataset_,
        use_context=use_context,
        dim_context=dim_context,
        dataset_size=dataset_size)
Example #40
0
def main(job_id, params):
    print(params)
    data_name = '../../data'
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        maxlen=40,
        batch_size=32,
        valid_batch_size=32,
        datasets=[
            data_name + '/q_train.txt', data_name + '/r_train.txt',
            data_name + '/s_train.txt'
        ],
        valid_datasets=[
            data_name + '/q_val.txt', data_name + '/r_val.txt',
            data_name + '/s_val.txt'
        ],
        dictionaries=[data_name + '/dict.pkl', data_name + '/dict.pkl'],
        validFreq=100,
        dispFreq=100,
        saveFreq=100,
        sampleFreq=100,
        use_dropout=params['use-dropout'][0],
        overwrite=True,
        max_epochs=30,
        senti_num=2,
        senti_dim=64,
        weight_d=1.,
        weight_h=1.,
        style_class=True,
        style_adv=False,
        adv_thre=1,
        patience=10000)
    return validerr
Example #41
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=1000,
        maxlen=50,
        batch_size=32,
        valid_batch_size=32,
        validFreq=100,
        dispFreq=100,
        saveFreq=1000,
        sampleFreq=1000,
        datasets=[
            '/home/chenhd/data/zh2en/tree/corpus.ch',
            '/home/chenhd/data/zh2en/tree/corpus.en'
        ],
        valid_datasets=[
            '/home/chenhd/data/zh2en/devntest/MT02/MT02.src',
            '/home/chenhd/data/zh2en/devntest/MT02/reference0'
        ],
        dictionaries=[
            '/home/chenhd/data/zh2en/tree/corpus.ch.pkl',
            '/home/chenhd/data/zh2en/tree/corpus.en.pkl'
        ],
        treeset=[
            '/home/chenhd/data/zh2en/tree/corpus.ch.tree',
            '/home/chenhd/data/zh2en/devntest/MT02/MT02.ce.tree'
        ],
        use_dropout=params['use-dropout'][0],
        # shuffle_each_epoch=True,
        overwrite=False)
    return validerr
Example #42
0
def main(job_id, params):
    print('timestamp {} {}'.format(
        'running', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    print(params)
    validerr = train(saveto=params['model'][0],
                     reload_=params['reload'][0],
                     dim_word=params['dim_word'][0],
                     dim=params['dim'][0],
                     n_words=params['n-words'][0],
                     n_words_src=params['n-words'][0],
                     decay_c=params['decay-c'][0],
                     clip_c=params['clip-c'][0],
                     lrate=params['learning-rate'][0],
                     optimizer=params['optimizer'][0],
                     patience=1000,
                     maxlen=50,
                     batch_size=80,
                     validFreq_fine=5000,
                     validFreq=5000,
                     val_burn_in=20000,
                     val_burn_in_fine=90000,
                     dispFreq=20,
                     saveFreq=2000,
                     sampleFreq=200,
                     datasets=[
                         '/home/ycli/resource/hw/ch.txt.shuffle',
                         '/home/ycli/resource/hw/en.txt.shuffle'
                     ],
                     valid_datasets=[
                         '/home/ycli/resource/hw/valid/valid_src',
                         '/home/ycli/resource/hw/valid/valid_trg',
                         './data/valid_out'
                     ],
                     dictionaries=[
                         '/home/ycli/resource/hw/vocab/vocab_src.pkl',
                         '/home/ycli/resource/hw/vocab/vocab_trg.pkl'
                     ],
                     use_dropout=params['use-dropout'][0],
                     overwrite=False)
    return validerr
Example #43
0
def main(job_id, params):
    print(params)
    validerr = train(
        saveto=params['model'][0],
        bn_saveto=params['bn_model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        kernel_Convenc=params['kernel_Convenc'],
        dim_ConvBlock=params['dim_ConvBlock'],
        layersNum_block=params['layersNum_block'],
        dim_dec=params['dim_dec'][0],
        dim_attention=params['dim_attention'][0],
        dim_coverage=params['dim_coverage'][0],
        kernel_coverage=params['kernel_coverage'],
        dim_target=params['dim_target'][0],
        input_channels=params['input_channels'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=15,
        maxlen=params['maxlen'][0],
        maxImagesize=params['maxImagesize'][0],
        batch_Imagesize=500000,
        valid_batch_Imagesize=500000,
        batch_size=8,
        valid_batch_size=8,
        validFreq=-1,
        dispFreq=100,
        saveFreq=-1,
        sampleFreq=-1,
        datasets=['../data/offline-train.pkl', '../data/train_caption.txt'],
        valid_datasets=[
            '../data/offline-test.pkl', '../data/test_caption.txt'
        ],
        dictionaries=['../data/dictionary.txt'],
        valid_output=['./result/valid_decode_result.txt'],
        valid_result=['./result/valid.wer'],
        use_dropout=params['use-dropout'][0])
    return validerr
Example #44
0
def main(job_id, params):
    print params
    validerr = train(
        saveto=params['model'][0],
        reload_=params['reload'][0],
        dim_word=params['dim_word'][0],
        dim_chunk=params['dim_chunk'][0],
        dim_chunk_hidden=params['dim_chunk_hidden'][0],
        dim=params['dim'][0],
        n_words=params['n-words'][0],
        n_words_src=params['n-words'][0],
        decay_c=params['decay-c'][0],
        clip_c=params['clip-c'][0],
        lrate=params['learning-rate'][0],
        optimizer=params['optimizer'][0],
        patience=10000,
        batch_size=32,
        valid_batch_size=32,
        validFreq=100,
        dispFreq=10,
        saveFreq=1000,
        sampleFreq=100,
        maxlen_chunk_words=50,  # maximum length of the description
        datasets=[
            '/home/zhouh/Data/nmt/hms.ch.filter',
            '/home/zhouh/Data/nmt/hms.en.filter.chunked'
        ],
        valid_datasets=[
            '/home/zhouh/Data/nmt/devntest/MT02/MT02.src',
            '/home/zhouh/Data/nmt/devntest/MT02/reference0.tag.chunked.chunked'
        ],
        dictionaries=[
            '/home/zhouh/Data/nmt/hms.ch.filter.pkl',
            '/home/zhouh/Data/nmt/hms.en.filter.chunked.pkl'
        ],
        dictionary_chunk=
        '/home/zhouh/Data/nmt/hms.en.filter.chunked.chunktag.pkl',
        use_dropout=params['use-dropout'][0],
        overwrite=False)
    return validerr
Example #45
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words'][0],
                                        decay_c=params['decay-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=20,
                                        batch_size=160,
                                        valid_batch_size=16,
                                        validFreq=1000,
                                        dispFreq=1,
                                        saveFreq=1000,
                                        sampleFreq=1000,
                                        dataset='mydata', 
                                        dictionary='v_dst_wi.pkl',
                                        dictionary_src = 'v_src_wi.pkl',
                                        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #46
0
def main():
    logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO)
    args = setup_args()
    logging.info(args)

    validerr = train(
        saveto=args.model + ".npz",
        reload_=False,
        dim=args.dimhidden,
        dim_word=args.dimword,
        n_words=args.targetwords,
        n_words_src=args.srcwords,
        decay_c=args.decay,
        clip_c=args.clipc,
        alpha_c=args.alphac,
        lrate=args.lr,
        optimizer="adam",
        patience=1000,
        maxlen=args.maxlen,
        batch_size=args.batch,
        valid_batch_size=args.batch,
        validFreq=args.validfreq,
        dispFreq=args.dispfreq,
        saveFreq=args.savefreq,
        sampleFreq=args.samplefreq,
        baseDir=args.basedir,
        word2vecFile=args.wordvec,
        datasets=["train_src.txt", "train_target.txt"],
        valid_datasets=["valid_src.txt", "valid_target.txt"],
        # dictionaries=['src.txt.pkl', 'target.txt.pkl'],
        dictionaries=["all.txt.pkl"],
        use_dropout=False,
        overwrite=True,
    )

    logging.info("FINAL Validation error: " + str(validerr))
Example #47
0
def main(job_id, params):
    print 'Anything printed here will end up in the output directory for job #%d' % job_id
    print params
    trainerr, validerr, testerr = train(saveto=params['model'][0],
                                        reload_=params['reload'][0],
                                        dim_word=params['dim_word'][0],
                                        dim=params['dim'][0],
                                        n_words=params['n-words'][0],
                                        n_words_src=params['n-words-src'][0],
                                        decay_c=params['decay-c'][0],
                                        alpha_c=params['alpha-c'][0],
                                        lrate=params['learning-rate'][0],
                                        optimizer=params['optimizer'][0], 
                                        maxlen=20,
                                        batch_size=16,
                                        valid_batch_size=16,
                                        validFreq=1000,
                                        dispFreq=1,
                                        saveFreq=500,
                                        sampleFreq=10,
                                        dataset='iwslt14zhen', 
                                        dictionary='/data/lisatmp3/firatorh/nmt/zh-en_lm/trainedModels/unionFinetuneRnd/union_dict.pkl',
                                        use_dropout=True if params['use-dropout'][0] else False)
    return validerr
Example #48
0

if __name__ == '__main__':
    validerr = train(saveto='model/model.npz',
                    reload_=True,
                    dim_word=500,
                    dim=1024,
                    n_words=VOCAB_SIZE,
                    n_words_src=VOCAB_SIZE,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.0001,
                    optimizer='adadelta',
                    maxlen=50,
                    batch_size=80,
                    valid_batch_size=80,
                    datasets=[DATA_DIR + '/corpus.bpe.' + SRC, DATA_DIR + '/corpus.bpe.' + TGT],
                    valid_datasets=[DATA_DIR + '/newsdev2016.bpe.' + SRC, DATA_DIR + '/newsdev2016.bpe.' + TGT],
                    dictionaries=[DATA_DIR + '/corpus.bpe.' + SRC + '.json',DATA_DIR + '/corpus.bpe.' + TGT + '.json'],
                    validFreq=10000,
                    dispFreq=1000,
                    saveFreq=30000,
                    sampleFreq=10000,
                    use_dropout=False,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
                    external_validation_script='validate.sh')
    print validerr
Example #49
0
trainerr, validerr, testerr = train(saveto=modelName,
                                    reload_=False,
                                    dim_word=dim_word,
                                    dim=dim_model,
                                    encoder='gru',
                                    decoder='gru_cond_double',
                                    # decoder='gru_cond',
                                    hiero=None, #'gru_hiero', # or None
                                    max_epochs=100,
                                    n_words_src=n_words_src,
                                    n_words=n_words_trg,
                                    optimizer='adadelta',
                                    decay_c=0.,
                                    alpha_c=0.,
                                    diag_c=0.,# not used with adadelta
                                    lrate=lr,
                                    patience=10,
                                    maxlen=50,
                                    batch_size=batch_size,
                                    valid_batch_size=batch_size,
                                    validFreq=nb_batch_epoch, # freq in batch of computing cost for train, valid and test
                                    dispFreq=nb_batch_epoch, # freq of diplaying the cost of one batch (e.g.: 1 is diplaying the cost of each batch)
                                    saveFreq=nb_batch_epoch, # freq of saving the model per batch
                                    sampleFreq=nb_batch_epoch, # freq of sampling per batch
                                    dataset=dataset,
                                    dictionary=dictionary_trg,
                                    dictionary_src=dictionary_src,
                                    use_dropout=False,
                                    clip_c=1.)