def main(): usage = "Usage: %prog [-h] [-r|-t|-l] [[sub_options] arg]"+"\n\nUpdate:7/6/2016.17:31" parser = OptionParser(usage,add_help_option=False) parser.add_option("-h", "--help", action="help",help="Display Help Menu Again.") group = OptionGroup(parser, "Prepare-Training-Save Options") group.add_option("-r","--run",dest="train", action="store_true", help="Start Prepare-Training-Save.",default=False) ## group.add_option("--rs",dest="sourcefile_r",default="zh-en/10000.ted.zh", help="Sourcefile name.[default: %default]") group.add_option("--rt",dest="targetfile_r",default="zh-en/10000.ted.en", help="Targetfile name.[default: %default]") group.add_option("--rlm",dest="lexical",default="zh-en/zh2e_f", help="Lexical_Model name.[default: %default]") group.add_option("--rm",dest="outputmodel",default="object/model.p", help="Output maximum entropy model name.[default: %default]") group.add_option("--rf",dest="outputfeature",default="no", help="Output featuresets name.[default: %default]") group.add_option("--re",dest="testset",default=0.02,type="float", help="The percentage of testset size.[default: %default]") group.add_option("--rc",dest="cores",default=12,type="int", help="The amount of cores.[default: %default]") #group.add_option("--ro",dest="order",default="e2f", help="The order of lexical model.[default: %default]") group.add_option("--rTFPN",dest="TFPN",default=1000,type="int", help="The amount of testset for TFPN.[default: %default]") ## parser.add_option_group(group) group2 = OptionGroup(parser, "Load-Test-Find Options") group2.add_option("-t","--test",dest="test", action="store_true", help="Start Load-Test-Find. ",default=False) group2.add_option("--ts",dest="sourcefile_t",default="zh-en/2000.test.zh", help="Sourcefile name.[default: %default]") group2.add_option("--tt",dest="targetfile_t",default="zh-en/2000.test.en", help="Targetfile name.[default: %default]") group2.add_option("--tlm",dest="lexical",default="zh-en/zh2e_f", help="Lexical_Model name.[default: %default]") #group2.add_option("--tor",dest="order",default="e2f", help="The order of lexical model.[default: %default]") group2.add_option("--tm",dest="model",default="object/model.p", help="maximum entropy model name.[default: %default]") group2.add_option("--tw",dest="wrong_rate_t",default=0.95,type="float", help="WRONG RATE greater than.[default: %default]") group2.add_option("--to",dest="ok_rate_t",default=0.95,type="float", help="OK RATE greater than.[default: %default]") group2.add_option("--tc",dest="cores",default=12,type="int", help="The amount of cores.[default: %default]") group2.add_option("--tf",dest="outputfile",default="error/2000.test", help="Outputfile(ok,mid,wrong) ,total 3 files.[default: %default]") group2.add_option("--te",dest="testset",default=0.07,type="float", help="The percentage of testset size.[default: %default]") group2.add_option("--ts_text",dest="sourcefile_t_origin",default="", help="unprocess of sourcefile name.[default: %default]") group2.add_option("--tt_text",dest="targetfile_t_origin",default="", help="unprocess of targetfile name.[default: %default]") parser.add_option_group(group2) group4 = OptionGroup(parser, "Load-Test-Match Options") group4.add_option("-m","--match",dest="match", action="store_true", help="Start Load-Test-Match. ",default=False) group4.add_option("--ms",dest="sourcefile_m",default="zh-en/200.test.zh", help="Sourcefile name.[default: %default]") group4.add_option("--mt",dest="targetfile_m",default="zh-en/200.test.en", help="Targetfile name.[default: %default]") group4.add_option("--mlm",dest="lexical",default="zh-en/zh2e_f", help="Lexical_Model name.[default: %default]") #group2.add_option("--tor",dest="order",default="e2f", help="The order of lexical model.[default: %default]") group4.add_option("--mm",dest="model",default="object/model.p", help="maximum entropy model name.[default: %default]") group4.add_option("--mh",dest="ok_rate",default=0.95,type="float", help="OK RATE greater than.[default: %default]") group4.add_option("--mw",dest="win_size",default=0.06,type="float", help="Windows size = % of corpus.[default: %default]") group4.add_option("--mc",dest="cores",default=12,type="int", help="The amount of cores.[default: %default]") group4.add_option("--mo",dest="outputfile_m",default="match/1000.test.match", help="Outputfile name.[default: %default]") group4.add_option("--m_sort",dest="m_sort",default=True, action="store_false",help="Disable sort the original file.[default: %default]") group4.add_option("--ms_text",dest="sourcefile_t_origin",default="", help="unprocess of sourcefile name.[default: %default]") group4.add_option("--mt_text",dest="targetfile_t_origin",default="", help="unprocess of targetfile name.[default: %default]") parser.add_option_group(group4) group3 = OptionGroup(parser, "Lexfilter Options") group3.add_option("-l", "--lexfilter",dest="lex", action="store_true", help="Start filtering lexical model.",default=False) group3.add_option("--ll",dest="ori_lex",default="zh-en/zh2e", help="Original Lexical Model name.[default: %default]") group3.add_option("--lo",dest="order",default="e2f", help="The order of lexical model.[default: %default]") group3.add_option("--lh",dest="threshold",default=0.01,type="float", help="Filter Word's Pro is less than.[default: %default]") group3.add_option("--lt",dest="top",default=False, action="store_true",help="Enable Filter Top 10 Words.[default: %default]") group3.add_option("--ls",dest="symbol",default=True, action="store_false",help="Disable filter symbol.[default: %default]") group3.add_option("--lc",dest="chinese",default=True, action="store_false",help="Disable filter chinese.[default: %default]") parser.add_option_group(group3) (options, args) = parser.parse_args() ''' print 'args', print args ''' ''' print 'options', print options ''' tmp_c = int(options.train) + int(options.test) + int(options.lex) + int(options.match) if tmp_c > 1: parser.error("you only can pick one of the main actions.") elif tmp_c == 0: parser.error("you have atleast pick one of the main actions.") ''' if len(args) != 1: parser.error("incorrect number of arguments") if options.verbose: print "reading %s..." % options.filename ''' if options.train: #this guy run train option. max_args = {'zh_dir':options.sourcefile_r,'en_dir':options.targetfile_r, 'cores':options.cores,'lex_table':options.lexical,'len_test_sets':options.testset} m_c.auto(max_args) ##TFPN m_c.TFPN(options.TFPN,0) ##after training model save_args = {'class_output_file':options.outputmodel,'f_sets_output_file':options.outputfeature,'id_f':0} m_c.save(save_args) elif options.test: pre_args = {'class_file':options.model,'f_sets_file':'no'} m_c.load(pre_args) find_args = {'zh_dir':options.sourcefile_t,'en_dir':options.targetfile_t, 'wrong_rate':options.wrong_rate_t,'ok_rate':options.ok_rate_t,'cores':options.cores,'output':options.outputfile,'lex_table':options.lexical ,'len_test_sets':options.testset,'targetfile_t_origin':options.targetfile_t_origin,'sourcefile_t_origin':options.sourcefile_t_origin} #omg i will change it later m_c.find_wrong(find_args) elif options.match: pre_args = {'class_file':options.model,'f_sets_file':'no'} m_c.load(pre_args) match_args = {'zh_dir':options.sourcefile_m,'en_dir':options.targetfile_m, 'ok_rate':options.ok_rate,'cores':options.cores,'output':options.outputfile_m,'lex_table':options.lexical ,'win_size':options.win_size,'sort':options.m_sort,'targetfile_t_origin':options.targetfile_t_origin,'sourcefile_t_origin':options.sourcefile_t_origin} #omg i will change it later m_c.find_match(match_args) elif options.lex: lex_args = {'filename':options.ori_lex,'less_than':options.threshold,'order':options.order,'top':options.top,'symbol':options.symbol,'chinese':options.chinese} m_c.l(lex_args)
def main(): usage = "Usage: %prog [-h] [-r|-t|-l] [[sub_options] arg]" + "\n\nUpdate:15/9/2015.20:35" parser = OptionParser(usage, add_help_option=False) parser.add_option("-h", "--help", action="help", help="Display Help Menu Again.") group4 = OptionGroup(parser, "Load-Match(Wiki) Options") group4.add_option("-m", "--match", dest="match", action="store_true", help="Start Load-Test-Match. ", default=False) group4.add_option( "--ms", dest="sourcefile_m", default="zh-en/200.test.zh", help="Sourcefile name.[default: %default]" ) group4.add_option( "--mt", dest="targetfile_m", default="zh-en/200.test.en", help="Targetfile name.[default: %default]" ) group4.add_option("--mlm", dest="lexical", default="zh-en/zh2e_f", help="Lexical_Model name.[default: %default]") # group2.add_option("--tor",dest="order",default="e2f", help="The order of lexical model.[default: %default]") group4.add_option( "--mm", dest="model", default="object/model.p", help="maximum entropy model name.[default: %default]" ) group4.add_option( "--mh", dest="ok_rate", default=0.95, type="float", help="OK RATE greater than.[default: %default]" ) # group4.add_option("--mw",dest="win_size",default=0.06,type="float", help="Windows size = % of corpus.[default: %default]") group4.add_option("--mc", dest="cores", default=12, type="int", help="The amount of cores.[default: %default]") group4.add_option( "--mo", dest="outputfile_m", default="match/1000.test.match", help="Outputfile name.[default: %default]" ) group4.add_option( "--m_sort", dest="m_sort", default=True, action="store_false", help="Disable sort the original file.[default: %default]", ) parser.add_option_group(group4) (options, args) = parser.parse_args() """ print 'args', print args """ """ print 'options', print options """ tmp_c = int(options.match) if tmp_c > 1: parser.error("you only can pick one of the main actions.") elif tmp_c == 0: parser.error("you have atleast pick one of the main actions.") """ if len(args) != 1: parser.error("incorrect number of arguments") if options.verbose: print "reading %s..." % options.filename """ if options.match: pre_args = {"class_file": options.model, "f_sets_file": "no"} m_c.load(pre_args) match_args = { "zh_dir": options.sourcefile_m, "en_dir": options.targetfile_m, "ok_rate": options.ok_rate, "cores": options.cores, "output": options.outputfile_m, "lex_table": options.lexical, "sort": options.m_sort, } # omg i will change it later m_c.find_wiki_match(match_args)