Esempio n. 1
0
# lattice とUnigramをimport するために必要
import sys
sys.path.append("../src/")
from UnigramTrainerAlign import train_align

if __name__ == "__main__":
    arg_en_allA = {
        "file": "../corpus/train50K.en",
        "voc": "../test50K/voc/align.allA.en.voc",
        "shrinking_rate": 0.75,
        "use_original_make_seed": True,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    arg_ja_allA = {
        "file": "../corpus/train50K.jap",
        "voc": "../test50K/voc/align.allA.jap.voc",
        "shrinking_rate": 0.75,
        "use_original_make_seed": True,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    train_align(arg_en_allA, arg_ja_allA, allA=True)
Esempio n. 2
0
        "seed_sentence_piece_size": 1e5
    }
    arg_ja_allA = {
        "file": "../corpus/train.jap",
        "voc": "./res_voc/align.allA.jap.voc",
        "shrinking_rate": 0.75,
        "use_original_make_seed": True,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    arg_en_alter = {
        "file": "../corpus/train.en",
        "voc": "./res_voc/align.alter.en.voc",
        "shrinking_rate": 0.75,
        "desired_voc_size": 8000,
        "use_original_make_seed": True,
        "seed_sentence_piece_size": 1e5
    }
    arg_ja_alter = {
        "file": "../corpus/train.jap",
        "voc": "./res_voc/align.alter.jap.voc",
        "shrinking_rate": 0.75,
        "use_original_make_seed": True,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    #train_align(arg_mini_en,arg_mini_ja,alter=True)
    #train_align(arg_en,arg_ja)
    #train_align(arg_en_allA,arg_ja_allA,allA=True)
    train_align(arg_en_alter, arg_ja_alter, alter=True)
Esempio n. 3
0
# lattice とUnigramをimport するために必要
import sys
sys.path.append("../src/")
from UnigramTrainerAlign import train_align

if __name__ == "__main__":
    arg_en = {
        "file": "../corpus/train.en",
        "voc": "../testFull/voc/align.alpha001.en.voc",
        "use_original_make_seed": True,
        "shrinking_rate": 0.75,
        "vocab_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    arg_ja = {
        "file": "../corpus/train.jap",
        "voc": "../testFull/voc/align.alpha001.jap.voc",
        "use_original_make_seed": True,
        "shrinking_rate": 0.75,
        "vocab_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    train_align(arg_en, arg_ja, alpha=0.01)
Esempio n. 4
0
# lattice とUnigramをimport するために必要
import sys
sys.path.append("../src/")
from UnigramTrainerAlign import train_align

if __name__ == "__main__":
    arg_en = {
        "file": "../corpus/train50K.en",
        "voc": "../test50K/voc/align.en.voc",
        "use_original_make_seed": True,
        "shrinking_rate": 0.75,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    arg_ja = {
        "file": "../corpus/train50K.jap",
        "voc": "../test50K/voc/align.jap.voc",
        "use_original_make_seed": True,
        "shrinking_rate": 0.75,
        "desired_voc_size": 8000,
        "seed_sentence_piece_size": 1e5
    }
    train_align(arg_en, arg_ja)
Esempio n. 5
0
# lattice とUnigramをimport するために必要
import sys
sys.path.append("../src/")
from UnigramTrainerAlign import train_align

debug = True
debug_dir = "./debug_alpha1/"
if __name__ == "__main__":
    arg_mini_en = {
        "file": "../corpus/train5K.en",
        "voc": "./res_voc/dummy.en.voc",
        "use_original_make_seed": True,
        "vocab_size": 4000,
        "debug": debug,
        "debug_dir": debug_dir,
    }
    arg_mini_ja = {
        "file": "../corpus/train5K.jap",
        "voc": "./res_voc/dummy.jap.voc",
        "use_original_make_seed": True,
        "debug": debug,
        "vocab_size": 4000,
        "debug_dir": debug_dir,
    }
    train_align(arg_mini_en, arg_mini_ja, debug=debug, alpha=1)