Exemple #1
0
def generate_lm (easy_config, training_filename) :
  command1 = ("export IRSTLM=" + easy_config.irstlm_path + "; " + easy_config.irstlm_path + "bin/build-lm.sh"\
    + " -n 5"\
    + " -i " + os.path.join(easy_config.easy_lm, training_filename + ".sb." + exp_config["target_id"])\
    + " -t ./tmp -p -s improved-kneser-ney -o " + os.path.join(easy_config.easy_lm, training_filename + ".lm." + exp_config["target_id"]))
  write_step (command1, easy_config)
  os.system(command1)
Exemple #2
0
def limiting_sentence_length (easy_config, training_filename) :
  command1 = easy_config.mosesdecoder_path + "scripts/training/clean-corpus-n.perl "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".true " + exp_config["source_id"] + " " + exp_config["target_id"])\
    + " " + os.path.join(easy_config.easy_corpus, training_filename +".clean  1 "\
    + exp_config["sentence_length"])
  write_step (command1, easy_config)
  os.system(command1)
Exemple #3
0
def limiting_sentence_length(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "scripts/training/clean-corpus-n.perl "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".true " + exp_config["source_id"] + " " + exp_config["target_id"])\
      + " " + os.path.join(easy_config.easy_corpus, training_filename +".clean  1 "\
      + exp_config["sentence_length"])
    write_step(command1, easy_config)
    os.system(command1)
Exemple #4
0
def t_filter_model_given_input(easy_config, path, filename):
    command1 = easy_config.mosesdecoder_path + "scripts/training/filter-model-given-input.pl "\
      + " " + os.path.join(path, "filtered-" + filename)\
      + " " + easy_config.easy_tuning + "/moses.ini "\
      + " " + os.path.join(path,filename)
    write_step(command1, easy_config)
    os.system(command1)
Exemple #5
0
def generate_lm(easy_config, training_filename):
    command1 = ("export IRSTLM=" + easy_config.irstlm_path + "; " + easy_config.irstlm_path + "bin/build-lm.sh"\
      + " -n 5"\
      + " -i " + os.path.join(easy_config.easy_lm, training_filename + ".sb." + exp_config["target_id"])\
      + " -t ./tmp -p -s improved-kneser-ney -o " + os.path.join(easy_config.easy_lm, training_filename + ".lm." + exp_config["target_id"]))
    write_step(command1, easy_config)
    os.system(command1)
Exemple #6
0
def t_filter_model_given_input (easy_config, path, filename) :
  command1 = easy_config.mosesdecoder_path + "scripts/training/filter-model-given-input.pl "\
    + " " + os.path.join(path, "filtered-" + filename)\
    + " " + easy_config.easy_tuning + "/moses.ini "\
    + " " + os.path.join(path,filename) 
  write_step (command1, easy_config)
  os.system(command1)
Exemple #7
0
def shuff(easy_config, training_filename):
  command1 = "python " + easy_config.nmt_path + "preprocess/shuffle-hdf5.py " \
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["source_id"] + ".h5")\
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["target_id"] + ".h5")\
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["source_id"] + ".shuf.h5")\
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["target_id"] + ".shuf.h5")
  write_step (command1, easy_config)
  os.system(command1)
Exemple #8
0
def truecaser(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "scripts/recaser/train-truecaser.perl --model "\
      + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"]) + " --corpus "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["source_id"])
    command2 = easy_config.mosesdecoder_path + "scripts/recaser/train-truecaser.perl --model "\
      + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"]) + " --corpus "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #9
0
def hdf5(easy_config, training_filename):
  command1 = "python " + easy_config.nmt_path + "preprocess/convert-pkl2hdf5.py " \
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["source_id"] + ".pkl")\
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["source_id"] + ".h5")
  command2 = "python " + easy_config.nmt_path + "preprocess/convert-pkl2hdf5.py " \
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["target_id"] + ".pkl")\
    + " " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["target_id"] + ".h5")
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #10
0
def truecaser (easy_config, training_filename) :
  command1 = easy_config.mosesdecoder_path + "scripts/recaser/train-truecaser.perl --model "\
    + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"]) + " --corpus "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["source_id"])
  command2 = easy_config.mosesdecoder_path + "scripts/recaser/train-truecaser.perl --model "\
    + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"]) + " --corpus "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)    
  write_step (command2, easy_config)
  os.system(command2)    
Exemple #11
0
def prepare_neural_language_model(easy_config):
    command1 = (easy_config.nplm_path + "bin/prepareNeuralLM " +
                " --train_text " + easy_config.easy_nplm + training_filename +
                ".true." + exp_config["target_id"] + " --ngram_size 5 " +
                " --vocab_size 20000 " + " --write_words_file " +
                easy_config.easy_nplm + "words " + " --train_file " +
                easy_config.easy_nplm + "train.ngrams " +
                " --validation_size 500 " + " --validation_file " +
                easy_config.easy_nplm + "validation.ngrams " + " >& " +
                easy_config.easy_nplm + "prepareout.out &")
    write_step(command1, easy_config)
    os.system(command1)
Exemple #12
0
def prepare_neural_language_model (easy_config) :
  command1 = (easy_config.nplm_path + "bin/prepareNeuralLM " 
    + " --train_text " + easy_config.easy_nplm + training_filename  + ".true." + exp_config["target_id"]
    + " --ngram_size 5 " 
    + " --vocab_size 20000 "  
    + " --write_words_file " + easy_config.easy_nplm + "words " 
    + " --train_file " + easy_config.easy_nplm + "train.ngrams " 
    + " --validation_size 500 "
    + " --validation_file " + easy_config.easy_nplm + "validation.ngrams " 
    + " >& " + easy_config.easy_nplm + "prepareout.out &")
  write_step (command1, easy_config)
  os.system(command1)
Exemple #13
0
def invert(easy_config, training_filename):
  print "-----------  invert ------------"
  command1 = "python " + easy_config.nmt_path + "preprocess/invert-dict.py " \
    + " " + os.path.join(easy_config.easy_corpus, "vocab." + exp_config["source_id"] + ".pkl")\
    + " " + os.path.join(easy_config.easy_corpus, "ivocab." + exp_config["source_id"] + ".pkl")
  command2 = "python " + easy_config.nmt_path + "preprocess/invert-dict.py " \
    + " " + os.path.join(easy_config.easy_corpus, "vocab." + exp_config["target_id"] + ".pkl")\
    + " " + os.path.join(easy_config.easy_corpus, "ivocab." + exp_config["target_id"] + ".pkl")
  write_step (command1, easy_config)
  os.system (command1)
  write_step (command2, easy_config)
  os.system (command2)
Exemple #14
0
def tuning_process (easy_config, devfilename) :
  command1 = "nohup nice " + easy_config.mosesdecoder_path + "scripts/training/mert-moses.pl "\
    + "--decoder-flags=\"-threads "+exp_config["threads"]+"\""\
    + " -threads " + exp_config["threads"]\
    + " -maximum-iterations " + exp_config["tuning_max_iterations"]\
    + " -working-dir " + easy_config.easy_tuning\
    + " " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["source_id"])\
    + " " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["target_id"])\
    + " " + easy_config.mosesdecoder_path + "bin/moses_chart " + os.path.join(easy_config.easy_train,"model/moses.ini ")\
    + " --mertdir " + easy_config.mosesdecoder_path + "bin/ &> " + os.path.join(easy_config.easy_tuning, "mert.out") + " &"
  write_step (command1, easy_config)
  os.system(command1)
Exemple #15
0
def tuning_process(easy_config, devfilename):
    command1 = "nohup nice " + easy_config.mosesdecoder_path + "scripts/training/mert-moses.pl "\
      + "--decoder-flags=\"-threads "+exp_config["threads"]+"\""\
      + " -threads " + exp_config["threads"]\
      + " -maximum-iterations " + exp_config["tuning_max_iterations"]\
      + " -working-dir " + easy_config.easy_tuning\
      + " " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["source_id"])\
      + " " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["target_id"])\
      + " " + easy_config.mosesdecoder_path + "bin/moses_chart " + os.path.join(easy_config.easy_train,"model/moses.ini ")\
      + " --mertdir " + easy_config.mosesdecoder_path + "bin/ &> " + os.path.join(easy_config.easy_tuning, "mert.out") + " &"
    write_step(command1, easy_config)
    os.system(command1)
Exemple #16
0
def extract_training (easy_config, training_filename) :
  command1 = easy_config.mosesdecoder_path + "scripts/training/bilingual-lm/extract_training.py"\
    + " --working-dir " + easy_config.easy_bnplm\
    + " --corpus " + os.path.join(easy_config.easy_corpus, training_filename + ".clean")\
    + " --source-language " + exp_config["source_id"]\
    + " --target-language " + exp_config["target_id"]\
    + " --align " + os.path.join(easy_config.easy_train, "model/aligned.grow-diag-final-and")\
    + " --prune-target-vocab " + exp_config["target_vocb"]\
    + " --prune-source-vocab " + exp_config["source_vocb"]\
    + " --target-context " + exp_config["bnplm_target_context"]\
    + " --source-context " + exp_config["bnplm_source_context"]
  write_step (command1, easy_config)
  os.system(command1)
Exemple #17
0
def tuning_truecase (easy_config, devfilename) :
  command1 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
    + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"])\
    + " < " + os.path.join(easy_config.easy_tuning, devfilename + ".tok." + exp_config["source_id"])\
    + " > " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["source_id"])
  command2 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
    + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"])\
    + " < " + os.path.join(easy_config.easy_tuning, devfilename + ".tok." + exp_config["target_id"])\
    + " > " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #18
0
def tuning_truecase(easy_config, devfilename):
    command1 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
      + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"])\
      + " < " + os.path.join(easy_config.easy_tuning, devfilename + ".tok." + exp_config["source_id"])\
      + " > " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["source_id"])
    command2 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
      + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"])\
      + " < " + os.path.join(easy_config.easy_tuning, devfilename + ".tok." + exp_config["target_id"])\
      + " > " + os.path.join(easy_config.easy_tuning, devfilename + ".true." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #19
0
def extract_training(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "scripts/training/bilingual-lm/extract_training.py"\
      + " --working-dir " + easy_config.easy_bnplm\
      + " --corpus " + os.path.join(easy_config.easy_corpus, training_filename + ".clean")\
      + " --source-language " + exp_config["source_id"]\
      + " --target-language " + exp_config["target_id"]\
      + " --align " + os.path.join(easy_config.easy_train, "model/aligned.grow-diag-final-and")\
      + " --prune-target-vocab " + exp_config["target_vocb"]\
      + " --prune-source-vocab " + exp_config["source_vocb"]\
      + " --target-context " + exp_config["bnplm_target_context"]\
      + " --source-context " + exp_config["bnplm_source_context"]
    write_step(command1, easy_config)
    os.system(command1)
Exemple #20
0
def train_neural_network(easy_config):
    command1 = (easy_config.nplm_path + "bin/trainNeuralNetwork " +
                " --train_file " + easy_config.easy_nplm + "train.ngrams " +
                " --validation_file " + easy_config.easy_nplm +
                "validation.ngrams " + " --num_epochs 30 " +
                " --input_words_file " + easy_config.easy_nplm + "words " +
                " --model_prefix " + easy_config.easy_nplm + "model " +
                " --input_embedding_dimension 150 " + " --num_hidden 0" +
                " --output_embedding_dimension 750 " + " --num_threads " +
                exp_config["threads"] + " >& " + easy_config.easy_nplm +
                "nplmtrain.out &")
    write_step(command1, easy_config)
    os.system(command1)
Exemple #21
0
def translation_model(easy_config, training_filename):
    command1 = "nohup nice " + easy_config.mosesdecoder_path + "scripts/training/train-model.perl "\
      + " -mgiza -mgiza-cpus " + exp_config["threads"] + " -cores 2 "\
      + " -root-dir " + easy_config.easy_train\
      + " -corpus " + " " + os.path.join(easy_config.easy_corpus, training_filename + ".clean")\
      + " -f " + exp_config["source_id"] + " -e " + exp_config["target_id"]\
      + " -alignment grow-diag-final-and "\
      + " " + exp_config["phrase"]\
      + " msd-bidirectional-fe -lm 0:"+exp_config["n-gram"]+":"\
      + os.path.join(easy_config.easy_lm, training_filename + ".blm." + exp_config["target_id"]) + ":8 "\
      + " -external-bin-dir " + easy_config.giza_path + "bin"\
      + " >& " + os.path.join(easy_config.easy_train, "training.out") + " &"
    write_step(command1, easy_config)
    os.system(command1)
Exemple #22
0
def translation_model (easy_config, training_filename) :
  command1 = "nohup nice " + easy_config.mosesdecoder_path + "scripts/training/train-model.perl "\
    + " -mgiza -mgiza-cpus " + exp_config["threads"] + " -cores 2 "\
    + " -root-dir " + easy_config.easy_train\
    + " -corpus " + " " + os.path.join(easy_config.easy_corpus, training_filename + ".clean")\
    + " -f " + exp_config["source_id"] + " -e " + exp_config["target_id"]\
    + " -alignment grow-diag-final-and "\
    + " " + exp_config["phrase"]\
    + " msd-bidirectional-fe -lm 0:"+exp_config["n-gram"]+":"\
    + os.path.join(easy_config.easy_lm, training_filename + ".blm." + exp_config["target_id"]) + ":8 "\
    + " -external-bin-dir " + easy_config.giza_path + "bin"\
    + " >& " + os.path.join(easy_config.easy_train, "training.out") + " &"
  write_step (command1, easy_config)
  os.system(command1)
Exemple #23
0
def t_truecasing (easy_config, testfilename) :
  command1 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
     + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"])\
    + " < " + os.path.join(easy_config.easy_evaluation, testfilename  + ".tok." + exp_config["source_id"])\
    + " > " + os.path.join(easy_config.easy_evaluation, testfilename  + ".true." + exp_config["source_id"])\
    # + " > " + easy_config.easy_evaluation + testfilename  + ".translated.true." + exp_config["target_id"])
  command2 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
     + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"])\
    + " < " + os.path.join(easy_config.easy_evaluation, testfilename  + ".tok." + exp_config["target_id"])\
    + " > " + os.path.join(easy_config.easy_evaluation, testfilename  + ".true." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #24
0
def prepare_corpus (easy_config) :
  command1 = (easy_config.mosesdecoder_path + "scripts/tokenizer/tokenizer.perl -l " + exp_config["target_id"] 
    + " -threads " + exp_config["threads"]
    + " -no-escape 1 "
    + " < " + exp_config["training_corpus"] + training_filename + "." + exp_config["target_id"]
    + " > " + easy_config.easy_nplm + training_filename + ".tok." + exp_config["target_id"])
  command2 = (easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model " 
    + " " + easy_config.easy_truecaser + "truecase-model." + exp_config["target_id"] 
    + " < " + easy_config.easy_nplm + training_filename + ".tok." + exp_config["target_id"] 
    + " > " + easy_config.easy_nplm + training_filename + ".true." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #25
0
def train_neural_network (easy_config) :
  command1 = (easy_config.nplm_path + "bin/trainNeuralNetwork " 
    + " --train_file " + easy_config.easy_nplm + "train.ngrams " 
    + " --validation_file " + easy_config.easy_nplm + "validation.ngrams " 
    + " --num_epochs 30 "
    + " --input_words_file " + easy_config.easy_nplm + "words " 
    + " --model_prefix " + easy_config.easy_nplm + "model " 
    + " --input_embedding_dimension 150 "  
    + " --num_hidden 0" 
    + " --output_embedding_dimension 750 "
     + " --num_threads "+ exp_config["threads"] 
    + " >& " + easy_config.easy_nplm + "nplmtrain.out &")
  write_step (command1, easy_config)
  os.system(command1)
Exemple #26
0
def tokenisation (easy_config, training_filename) :
  command1 = easy_config.mosesdecoder_path + "scripts/tokenizer/tokenizer.perl -l " + exp_config["source_id"]\
    + " -threads " + exp_config["threads"]\
    + " -no-escape 1 "\
    + " < " + exp_config["training_corpus"] + training_filename + "." + exp_config["source_id"] + " > "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["source_id"])
  command2 = easy_config.mosesdecoder_path + "scripts/tokenizer/tokenizer.perl -l " + exp_config["target_id"]\
    + " -threads " + exp_config["threads"]\
    + " -no-escape 1 "\
    + " < " + exp_config["training_corpus"] + training_filename + "." + exp_config["target_id"] + " > "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #27
0
def t_truecasing(easy_config, testfilename):
    command1 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
       + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["source_id"])\
      + " < " + os.path.join(easy_config.easy_evaluation, testfilename  + ".tok." + exp_config["source_id"])\
      + " > " + os.path.join(easy_config.easy_evaluation, testfilename  + ".true." + exp_config["source_id"])\
    # + " > " + easy_config.easy_evaluation + testfilename  + ".translated.true." + exp_config["target_id"])

    command2 = easy_config.mosesdecoder_path + "scripts/recaser/truecase.perl --model"\
       + " " + os.path.join(easy_config.easy_truecaser, "truecase-model." + exp_config["target_id"])\
      + " < " + os.path.join(easy_config.easy_evaluation, testfilename  + ".tok." + exp_config["target_id"])\
      + " > " + os.path.join(easy_config.easy_evaluation, testfilename  + ".true." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #28
0
def run_test(easy_config, testfilename):
    command1 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
      + " -threads "+exp_config["threads"]\
      + " -f " + os.path.join(easy_config.easy_evaluation, "filtered-" +testfilename+ ".true." + exp_config["source_id"] + "/moses.ini ")\
      + " < " + os.path.join(easy_config.easy_evaluation, testfilename + ".true." + exp_config["source_id"])\
      + " > " + os.path.join(easy_config.easy_evaluation, testfilename + ".translated." + exp_config["target_id"])\
      + " 2> " + os.path.join(easy_config.easy_evaluation, testfilename + ".out") + " "
    command2 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
      + " -lc " + os.path.join(easy_config.easy_evaluation, testfilename + ".true." + exp_config["target_id"])\
      + " < " + os.path.join(easy_config.easy_evaluation, testfilename + ".translated." + exp_config["target_id"])
    # + " < " + easy_config.easy_evaluation + testfilename + ".translated." + exp_config["target_id"] + ".9"
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #29
0
def run_test (easy_config, testfilename) :
  command1 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
    + " -threads "+exp_config["threads"]\
    + " -f " + os.path.join(easy_config.easy_evaluation, "filtered-" +testfilename+ ".true." + exp_config["source_id"] + "/moses.ini ")\
    + " < " + os.path.join(easy_config.easy_evaluation, testfilename + ".true." + exp_config["source_id"])\
    + " > " + os.path.join(easy_config.easy_evaluation, testfilename + ".translated." + exp_config["target_id"])\
    + " 2> " + os.path.join(easy_config.easy_evaluation, testfilename + ".out") + " "
  command2 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
    + " -lc " + os.path.join(easy_config.easy_evaluation, testfilename + ".true." + exp_config["target_id"])\
    + " < " + os.path.join(easy_config.easy_evaluation, testfilename + ".translated." + exp_config["target_id"])
    # + " < " + easy_config.easy_evaluation + testfilename + ".translated." + exp_config["target_id"] + ".9"
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #30
0
def train_nplm (easy_config, training_filename) : 
  command1 = easy_config.mosesdecoder_path + "scripts/training/bilingual-lm/train_nplm.py"\
    + " --working-dir " + easy_config.easy_bnplm\
    + " --corpus " + os.path.join(easy_config.easy_corpus, training_filename + ".clean ")\
    + " --nplm-home " + easy_config.nplm_path\
    + " --ngram-size " + exp_config["bnplm_ngram"]\
    + " --epochs " + exp_config["bnplm_epochs"]\
    + " --learning-rate " + exp_config["bnplm_learning_rate"]\
    + " --hidden "+ exp_config["bnplm_hidden"]\
    + " --input-embedding "+ exp_config["bnplm_input_embedding"]\
    + " --output-embedding " + exp_config["bnplm_hidden"]\
    + " --threads " + exp_config["threads"]\
    + " &> " + os.path.join(easy_config.easy_bnplm, "nplm.out") + " &"
  write_step (command1, easy_config)
  os.system(command1)
Exemple #31
0
def tokenisation(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "scripts/tokenizer/tokenizer.perl -l " + exp_config["source_id"]\
      + " -threads " + exp_config["threads"]\
      + " -no-escape 1 "\
      + " < " + exp_config["training_corpus"] + training_filename + "." + exp_config["source_id"] + " > "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["source_id"])
    command2 = easy_config.mosesdecoder_path + "scripts/tokenizer/tokenizer.perl -l " + exp_config["target_id"]\
      + " -threads " + exp_config["threads"]\
      + " -no-escape 1 "\
      + " < " + exp_config["training_corpus"] + training_filename + "." + exp_config["target_id"] + " > "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".tok." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #32
0
def train_nplm(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "scripts/training/bilingual-lm/train_nplm.py"\
      + " --working-dir " + easy_config.easy_bnplm\
      + " --corpus " + os.path.join(easy_config.easy_corpus, training_filename + ".clean ")\
      + " --nplm-home " + easy_config.nplm_path\
      + " --ngram-size " + exp_config["bnplm_ngram"]\
      + " --epochs " + exp_config["bnplm_epochs"]\
      + " --learning-rate " + exp_config["bnplm_learning_rate"]\
      + " --hidden "+ exp_config["bnplm_hidden"]\
      + " --input-embedding "+ exp_config["bnplm_input_embedding"]\
      + " --output-embedding " + exp_config["bnplm_hidden"]\
      + " --threads " + exp_config["threads"]\
      + " &> " + os.path.join(easy_config.easy_bnplm, "nplm.out") + " &"
    write_step(command1, easy_config)
    os.system(command1)
Exemple #33
0
def test_on_train (easy_config) :
  filename = "OF.clean." + exp_config["source_id"]
  t_filter_model_given_input(easy_config, easy_config.easy_overfitting, filename)
  command1 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
    + " -threads " + exp_config["threads"]\
    + " -f " + os.path.join(easy_config.easy_overfitting, "filtered-"+filename+"/moses.ini ")\
    + " < " + os.path.join(easy_config.easy_overfitting, filename)\
    + " > " + os.path.join(easy_config.easy_overfitting, "OF.translated." + exp_config["target_id"])\
    + " 2> " + os.path.join(easy_config.easy_overfitting, "OF.clean.out") + " "
  command2 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
    + " -lc " + os.path.join(easy_config.easy_overfitting, "OF.clean." + exp_config["target_id"])\
    + " < " + os.path.join(easy_config.easy_overfitting, "OF.translated." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
  write_step (command2, easy_config)
  os.system(command2)
Exemple #34
0
def pkl (easy_config, training_filename):
  command1 = "python " + easy_config.nmt_path + "preprocess/preprocess.py "\
    + os.path.join(easy_config.easy_corpus, training_filename  + ".clean." + exp_config["source_id"])\
    + " -d " + os.path.join(easy_config.easy_corpus, "vocab." + exp_config["source_id"] + ".pkl")\
    + " -v " + exp_config["source_vocb"]\
    + " -b " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["source_id"] + ".pkl")\
    + " -p " #+ os.path.join(easy_config.easy_corpus, "*"+exp_config["source_id"]+".txt.gz"
  command2 = "python " + easy_config.nmt_path + "preprocess/preprocess.py " \
    + os.path.join(easy_config.easy_corpus, training_filename  + ".clean." + exp_config["target_id"])\
    + " -d " + os.path.join(easy_config.easy_corpus, "vocab." + exp_config["target_id"] + ".pkl")\
    + " -v " + exp_config["target_vocb"]\
    + " -b " + os.path.join(easy_config.easy_corpus, "binarized_text." + exp_config["target_id"] + ".pkl")\
    + " -p " #+ os.path.join(easy_config.easy_corpus, "*"+exp_config["source_id"]+".txt.gz"
  write_step (command1, easy_config)
  os.system (command1)
  write_step (command2, easy_config)
  os.system (command2)
Exemple #35
0
def test_on_train(easy_config):
    filename = "OF.clean." + exp_config["source_id"]
    t_filter_model_given_input(easy_config, easy_config.easy_overfitting,
                               filename)
    command1 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
      + " -threads " + exp_config["threads"]\
      + " -f " + os.path.join(easy_config.easy_overfitting, "filtered-"+filename+"/moses.ini ")\
      + " < " + os.path.join(easy_config.easy_overfitting, filename)\
      + " > " + os.path.join(easy_config.easy_overfitting, "OF.translated." + exp_config["target_id"])\
      + " 2> " + os.path.join(easy_config.easy_overfitting, "OF.clean.out") + " "
    command2 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
      + " -lc " + os.path.join(easy_config.easy_overfitting, "OF.clean." + exp_config["target_id"])\
      + " < " + os.path.join(easy_config.easy_overfitting, "OF.translated." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #36
0
def prepare_corpus(easy_config):
    command1 = (easy_config.mosesdecoder_path +
                "scripts/tokenizer/tokenizer.perl -l " +
                exp_config["target_id"] + " -threads " +
                exp_config["threads"] + " -no-escape 1 " + " < " +
                exp_config["training_corpus"] + training_filename + "." +
                exp_config["target_id"] + " > " + easy_config.easy_nplm +
                training_filename + ".tok." + exp_config["target_id"])
    command2 = (easy_config.mosesdecoder_path +
                "scripts/recaser/truecase.perl --model " + " " +
                easy_config.easy_truecaser + "truecase-model." +
                exp_config["target_id"] + " < " + easy_config.easy_nplm +
                training_filename + ".tok." + exp_config["target_id"] + " > " +
                easy_config.easy_nplm + training_filename + ".true." +
                exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
    write_step(command2, easy_config)
    os.system(command2)
Exemple #37
0
def cross_corpus(id1, mt_type, tag, easy_config):
  command1 = "python " + easy_config.nmt_path + "sample.py"\
    + " --beam-search "\
    + " --beam-size 12"\
    + " --state " + easy_config.easy_workspace + "nmt/" + id1 + "/" + "search_state.pkl "\
    + " --source " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["source_id"]\
    + " --trans " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]\
    + " " + easy_config.easy_workspace + "nmt/" + id1 + "/" + "search_model.npz"\
    + " >& " + easy_config.easy_evaluation + id1 + "_out.txt &"
  command2 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
    + " -threads " + exp_config["threads"]\
    + " -f " + easy_config.easy_workspace + "tuning/" + id1 + "/"+  "moses.ini "\
    + " < " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["source_id"]\
    + " > " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]\
    + " 2> " + easy_config.easy_evaluation + id1 + "_out.txt"
  command3 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
    + " -lc " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["target_id"]\
    + " < " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]
  if tag == "tr" and mt_type == "nmt":
    write_step(command1, easy_config)
    os.system(command1)
  elif tag == "tr" and mt_type == "smt":
    write_step(command2, easy_config)
    os.system(command2)
  if Tag == "te":
    write_step(comMand3)
    os.system(command3)
Exemple #38
0
def cross_corpus(id1, mt_type, tag, easy_config):
    command1 = "python " + easy_config.nmt_path + "sample.py"\
      + " --beam-search "\
      + " --beam-size 12"\
      + " --state " + easy_config.easy_workspace + "nmt/" + id1 + "/" + "search_state.pkl "\
      + " --source " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["source_id"]\
      + " --trans " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]\
      + " " + easy_config.easy_workspace + "nmt/" + id1 + "/" + "search_model.npz"\
      + " >& " + easy_config.easy_evaluation + id1 + "_out.txt &"
    command2 = "nohup nice " + easy_config.mosesdecoder_path + "bin/moses "\
      + " -threads " + exp_config["threads"]\
      + " -f " + easy_config.easy_workspace + "tuning/" + id1 + "/"+  "moses.ini "\
      + " < " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["source_id"]\
      + " > " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]\
      + " 2> " + easy_config.easy_evaluation + id1 + "_out.txt"
    command3 = easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\
      + " -lc " + easy_config.easy_evaluation + testfilename + ".true." + exp_config["target_id"]\
      + " < " + easy_config.easy_evaluation + testfilename + ".translated." + id1 + "." + exp_config["target_id"]
    if tag == "tr" and mt_type == "nmt":
        write_step(command1, easy_config)
        os.system(command1)
    elif tag == "tr" and mt_type == "smt":
        write_step(command2, easy_config)
        os.system(command2)
    if Tag == "te":
        write_step(comMand3)
        os.system(command3)
Exemple #39
0
def generate_sb (easy_config, training_filename) :
  command1 = easy_config.irstlm_path + "bin/add-start-end.sh < "\
    + " " + os.path.join(easy_config.easy_corpus, training_filename + ".true." + exp_config["target_id"])\
    + " > " + os.path.join(easy_config.easy_lm, training_filename + ".sb." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
Exemple #40
0
def generate_blm (easy_config, training_filename) :
  command1 = easy_config.mosesdecoder_path + "bin/build_binary -i "\
    + " " + os.path.join(easy_config.easy_lm, training_filename + ".arpa." + exp_config["target_id"])\
    + " " + os.path.join(easy_config.easy_lm, training_filename + ".blm." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)
Exemple #41
0
def generate_blm(easy_config, training_filename):
    command1 = easy_config.mosesdecoder_path + "bin/build_binary -i "\
      + " " + os.path.join(easy_config.easy_lm, training_filename + ".arpa." + exp_config["target_id"])\
      + " " + os.path.join(easy_config.easy_lm, training_filename + ".blm." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
Exemple #42
0
def generate_sb(easy_config, training_filename):
    command1 = easy_config.irstlm_path + "bin/add-start-end.sh < "\
      + " " + os.path.join(easy_config.easy_corpus, training_filename + ".true." + exp_config["target_id"])\
      + " > " + os.path.join(easy_config.easy_lm, training_filename + ".sb." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
Exemple #43
0
def generate_arpa(easy_config, training_filename):
    command1 = easy_config.irstlm_path + "bin/compile-lm --text=yes "\
      + " " + os.path.join(easy_config.easy_lm, training_filename + ".lm." + exp_config["target_id"] + ".gz")\
      + " " + os.path.join(easy_config.easy_lm, training_filename + ".arpa." + exp_config["target_id"])
    write_step(command1, easy_config)
    os.system(command1)
Exemple #44
0
def overfitting_prepare(easy_config, training_filename, sampling_base = 30):
  easycorpus.sampling_file(os.path.join(easy_config.easy_corpus, training_filename+".clean."+exp_config["source_id"]), 
    easy_config.easy_overfitting+"/OF.clean."+exp_config["source_id"], sampling_base)
  easycorpus.sampling_file(os.path.join(easy_config.easy_corpus, training_filename+".clean."+exp_config["target_id"]), 
    easy_config.easy_overfitting+"/OF.clean."+exp_config["target_id"], sampling_base)
  write_step("overfitting_prepare", easy_config)
Exemple #45
0
def generate_arpa (easy_config, training_filename) :
  command1 = easy_config.irstlm_path + "bin/compile-lm --text=yes "\
    + " " + os.path.join(easy_config.easy_lm, training_filename + ".lm." + exp_config["target_id"] + ".gz")\
    + " " + os.path.join(easy_config.easy_lm, training_filename + ".arpa." + exp_config["target_id"])
  write_step (command1, easy_config)
  os.system(command1)