Esempio n. 1
0
def get_config_EsFr2En_single():
    cgs = ['es_en', 'fr_en']

    enc_ids, dec_ids = get_enc_dec_ids_mSrc(cgs)

    # Model related
    config = prototype_config_mSrc(cgs)
    config['saveto'] = 'esfr2en_single'
    config['batch_sizes'] = get_odict(cgs, 80)

    # Convenience basedirectory
    basedir = 'dl4mt-multi-src/data'

    # Vocabulary/dataset related
    config['src_vocabs'] = get_paths(enc_ids, paths.src_vocabs, basedir)
    config['trg_vocabs'] = get_paths(dec_ids, paths.trg_vocabs, basedir)
    config['src_vocab_sizes'] = get_odict_pair(enc_ids, [20624, 20335])
    config['trg_vocab_sizes'] = get_odict(dec_ids, 20212)

    # Dataset related
    config['src_datas'] = get_paths(cgs, paths.src_datas, basedir)
    config['trg_datas'] = get_paths(cgs, paths.trg_datas, basedir)

    # Early stopping based on bleu related
    config['save_freq'] = 5000
    config['val_burn_in'] = 1

    # Validation set for log probs related
    config['log_prob_sets'] = get_paths(cgs, paths.log_prob_sets, basedir)

    return ReadOnlyDict(config)
Esempio n. 2
0
def get_config_multiWay():

    cgs = ['fi_en', 'de_en', 'en_de']
    enc_ids, dec_ids = get_enc_dec_ids(cgs)

    # Model related
    config = prototype_config_multiCG_08(cgs)
    config['saveto'] = 'multiWay'

    # Vocabulary/dataset related
    basedir = ''
    config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir)
    config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir)
    config['src_datas'] = get_paths(cgs, src_datas, basedir)
    config['trg_datas'] = get_paths(cgs, trg_datas, basedir)

    # Early stopping based on bleu related
    config['save_freq'] = 5000
    config['bleu_script'] = basedir + '/multi-bleu.perl'
    config['val_sets'] = get_paths(cgs, val_sets_src, basedir)
    config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir)
    config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto'])
    config['val_burn_in'] = 1

    # Validation set for log probs related
    config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir)

    return ReadOnlyDict(config)
Esempio n. 3
0
def get_config_multiWay():

    cgs = ['fi_en', 'de_en', 'en_de']
    enc_ids, dec_ids = get_enc_dec_ids(cgs)

    # Model related
    config = prototype_config_multiCG_08(cgs)
    config['saveto'] = 'multiWay'

    # Vocabulary/dataset related
    basedir = ''
    config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir)
    config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir)
    config['src_datas'] = get_paths(cgs, src_datas, basedir)
    config['trg_datas'] = get_paths(cgs, trg_datas, basedir)

    # Early stopping based on bleu related
    config['save_freq'] = 5000
    config['bleu_script'] = basedir + '/multi-bleu.perl'
    config['val_sets'] = get_paths(cgs, val_sets_src, basedir)
    config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir)
    config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto'])
    config['val_burn_in'] = 1

    # Validation set for log probs related
    config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir)

    return ReadOnlyDict(config)
Esempio n. 4
0
def get_config_single():

    cgs = ['de_en']
    config = prototype_config_multiCG_08(cgs)
    enc_ids, dec_ids = get_enc_dec_ids(cgs)
    config['saveto'] = 'single'

    basedir = ''
    config['batch_sizes'] = OrderedDict([('de_en', 80)])
    config['schedule'] = OrderedDict([('de_en', 12)])
    config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir)
    config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir)
    config['src_datas'] = get_paths(cgs, src_datas, basedir)
    config['trg_datas'] = get_paths(cgs, trg_datas, basedir)
    config['save_freq'] = 5000
    config['val_burn_in'] = 60000
    config['bleu_script'] = basedir + '/multi-bleu.perl'
    config['val_sets'] = get_paths(cgs, val_sets_src, basedir)
    config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir)
    config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto'])
    config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir)

    return ReadOnlyDict(config)
Esempio n. 5
0
def get_config_single():

    cgs = ['de_en']
    config = prototype_config_multiCG_08(cgs)
    enc_ids, dec_ids = get_enc_dec_ids(cgs)
    config['saveto'] = 'single'

    basedir = ''
    config['batch_sizes'] = OrderedDict([('de_en', 80)])
    config['schedule'] = OrderedDict([('de_en', 12)])
    config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir)
    config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir)
    config['src_datas'] = get_paths(cgs, src_datas, basedir)
    config['trg_datas'] = get_paths(cgs, trg_datas, basedir)
    config['save_freq'] = 5000
    config['val_burn_in'] = 60000
    config['bleu_script'] = basedir + '/multi-bleu.perl'
    config['val_sets'] = get_paths(cgs, val_sets_src, basedir)
    config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir)
    config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto'])
    config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir)

    return ReadOnlyDict(config)