def main(argv): # take parameters r_given = False rfilename = "" t_given = False tfilename = "" n_given = False nval = 3 o_given = False opref = "" verbose = False try: opts, args = getopt.getopt( sys.argv[1:], "hr:t:n:o:v", ["help", "rawfn=", "tokfn=", "nval=", "opref="]) except getopt.GetoptError: print_help() sys.exit(2) if (len(opts) == 0): print_help() sys.exit() else: for opt, arg in opts: if opt in ("-h", "--help"): print_help() sys.exit() elif opt in ("-r", "--rawfn"): rfilename = arg r_given = True elif opt in ("-t", "--tokfn"): tfilename = arg t_given = True elif opt in ("-n", "--nval"): nval = int(arg) n_given = True elif opt in ("-o", "--opref"): opref = arg o_given = True elif opt in ("-v", "--verbose"): verbose = True # check parameters if (r_given == False): print >> sys.stderr, "Error! -r parameter not given" sys.exit(2) if (o_given == False): print >> sys.stderr, "Error! -o parameter not given" sys.exit(2) # print parameters if (r_given == True): print >> sys.stderr, "r is %s" % (rfilename) if (t_given == True): print >> sys.stderr, "t is %s" % (tfilename) if (n_given == True): print >> sys.stderr, "n is %d" % (nval) if (o_given == True): print >> sys.stderr, "o is %s" % (opref) # open files if (r_given == True): # open file rfile = io.open(rfilename, 'r', encoding="utf-8") if (t_given == True): # open file tfile = io.open(tfilename, 'r', encoding="utf-8") # train translation model print >> sys.stderr, "Training translation model..." tmodel = smtpr.TransModel() if (t_given == True): tmodel.train_tok_tm_par_files(rfile, tfile, verbose) else: tmodel.train_tok_tm(rfile, verbose) # print translation model tmfile = io.open(opref + ".tm", 'w', encoding='utf-8') tmodel.print_model_to_file(tmfile) # reopen files rfile.close() rfile = io.open(rfilename, 'r', encoding="utf-8") if (t_given == True): tfile.close() tfile = io.open(tfilename, 'r', encoding="utf-8") # train language model print >> sys.stderr, "Training language model..." lmodel = smtpr.LangModel() if (t_given == True): lmodel.train_tok_lm_par_files(rfile, tfile, nval, verbose) else: lmodel.train_tok_lm(rfile, nval, verbose) # print language model lmfile = io.open(opref + ".lm", 'w', encoding='utf-8') lmodel.print_model_to_file(lmfile)
def main(argv): # take parameters f_given = False filename = "" m_given = False mpref = "" i_given = False ival = smtpr._global_lm_interp_prob w_given = False weights = [1, 1, 1, 1] verbose = False try: opts, args = getopt.getopt( sys.argv[1:], "hf:m:i:w:v", ["help", "filename=", "mpref=", "interp=", "weights="]) except getopt.GetoptError: print_help() sys.exit(2) if (len(opts) == 0): print_help() sys.exit() else: for opt, arg in opts: if opt in ("-h", "--help"): print_help() sys.exit() elif opt in ("-f", "--filename"): filename = arg f_given = True elif opt in ("-m", "--mpref"): mpref = arg m_given = True elif opt in ("-i", "--interp"): ival = float(arg) i_given = True elif opt in ("-w", "--weights"): weight_str = arg weight_str_array = weight_str.split() weights = [] for i in range(len(weight_str_array)): weights.append(float(weight_str_array[i])) w_given = True elif opt in ("-v", "--verbose"): verbose = True # check parameters if (m_given == False): print >> sys.stderr, "Error! -m parameter not given" sys.exit(2) # print parameters if (f_given == True): print >> sys.stderr, "f is %s" % (filename) if (m_given == True): print >> sys.stderr, "m is %s" % (mpref) if (i_given == True): print >> sys.stderr, "i is %f" % (ival) if (w_given == True): print >> sys.stderr, "w is \"%s\"" % (weight_str) # open files if (f_given == True): # open file file = io.open(filename, 'r', encoding="utf-8") else: # fallback to stdin file = io.open(sys.stdin.fileno(), 'r', encoding='utf8') # load translation model tmodel = smtpr.TransModel() tmfilename = mpref + ".tm" tmfile = io.open(tmfilename, 'r', encoding="utf-8") print >> sys.stderr, "Loading translation model from file", tmfilename, "..." tmodel.load(tmfile) # load language model lmodel = smtpr.LangModel() lmfilename = mpref + ".lm" lmfile = io.open(lmfilename, 'r', encoding="utf-8") print >> sys.stderr, "Loading language model from file", lmfilename, "..." lmodel.load(lmfile) lmodel.set_interp_prob(ival) # translate (detokenize) decoder = smtpr.Decoder(tmodel, lmodel, weights) print >> sys.stderr, "Recasing..." decoder.recase(file, verbose)