def nmt_check_overfitting(easy_config): training_filename = utils.get_filename(exp_config["training_corpus"]) import commands lines = int(commands.getstatusoutput('wc -l ' + os.path.join(easy_config.easy_corpus, training_filename+'.clean.'+exp_config["source_id"]))[1].split(' ')[0]) print "pairs ", lines # return from nmt import overfitting_prepare base = 10 if lines / 3000 > base: base = lines / 3000 overfitting_prepare(easy_config, training_filename, base) command1 = "python " + easy_config.nmt_path + "sample.py"\ + " --beam-search "\ + " --beam-size 12"\ + " --state " + os.path.join(easy_config.easy_train, "search_state.pkl")\ + " --source " + os.path.join(easy_config.easy_overfitting,"OF.clean." + exp_config["source_id"])\ + " --trans " + os.path.join(easy_config.easy_overfitting, "ontrain." + exp_config["target_id"])\ + " " + os.path.join(easy_config.easy_train, "search_model.npz") command2 = (easy_config.mosesdecoder_path + "scripts/generic/multi-bleu.perl "\ + " -lc " + easy_config.easy_overfitting + "/OF.clean." + exp_config["target_id"]\ + " < " + easy_config.easy_overfitting + "/ontrain." + exp_config["target_id"]) write_step(command1, easy_config) os.system(command1) write_step(command2, easy_config) os.system(command2)
def smt_check_train(easy_config): training_filename = utils.get_filename(exp_config["training_corpus"]) import commands lines = int(commands.getstatusoutput('wc -l ' + os.path.join(easy_config.easy_corpus, training_filename+'.clean.'+exp_config["source_id"])) [1].split(' ')[0]) print "pairs ", lines # return from nmt import overfitting_prepare base = 10 if lines / 3000 > base: base = lines / 3000 overfitting_prepare(easy_config, training_filename, base) test_on_train(easy_config)