def training_job(corpus_fname, k, p, seed, static, dev_fname, model_out_prefix): sents = [s for s in io.conll_to_sents(file(corpus_fname)) if isprojective.is_projective(s)] print "training ",corpus_fname,k,p,seed,len(sents) explore = ExplorePolicy(k,p) TRAIN_OUT_FILE = "%s-ef.kps-k%s-p%s-seed%s" % (model_out_prefix, k, p, seed) if static: TRAIN_OUT_FILE = "%s-ef.kps-static-seed%s" % (model_out_prefix, seed) explore=None model = Model("features/znp.py", "%s.weights" % TRAIN_OUT_FILE) model.save("%s.model" % TRAIN_OUT_FILE) random.seed(seed) train(sents, model, dev=None, ITERS=20,save_every=None,explore_policy=explore,shuffle_sents=True) print "training of",corpus_fname,k,p,seed,"done" print "parsing" parsed = parse_corpus(dev_fname, TRAIN_OUT_FILE + ".weights.FINAL", "features/znp.py") outf = file(TRAIN_OUT_FILE + ".dev.parsed","w") for sent in parsed: io.out_conll(sent, outf, parent='pparent',prel='pprel') uas,las,complete = eval(parsed) puas,plas,complete = eval(parsed,ignore_punct=True) outf.close() outf = file(TRAIN_OUT_FILE + ".dev.scores","w") print >> outf, "UAS:",uas,"LAS:",las,"NP_UAS:",puas,"NP_LAS:",plas outf.close() print "deleting" os.unlink(TRAIN_OUT_FILE + ".weights.FINAL") os.unlink(TRAIN_OUT_FILE + ".model")
type="int", default=20) parser.add_option("--every", dest="save_every", action="store", type="int", default=1) opts, args = parser.parse_args() if len(args) < 1 or not (opts.model_file or opts.features_file): parser.print_usage() sys.exit(1) TRAIN_FILE = args[0] DEV_FILE = args[1] if len(args) > 1 else None FEATURES = opts.features_file MODEL = opts.model_file model = Model(FEATURES, "%s.weights" % MODEL) model.save("%s.model" % MODEL) dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else [] train_sents = list(io.conll_to_sents(file(TRAIN_FILE))) print len(train_sents) train_sents = [s for s in train_sents if isprojective.is_projective(s)] print len(train_sents) train(train_sents, model, dev, opts.iters, save_every=opts.save_every)
train_sents = list(train_sents) gold_sents = list(gold_sents) print len(train_sents), len(gold_sents) assert len(train_sents) == len(gold_sents) random.seed(opts.random_seed) if opts.follow_incorrect: explore = ExplorePolicy(2, 0.9) # almost always else: explore = None if (opts.labeled): from easyfirst import train_labeled train_labeled(train_sents, gold_sents, model, dev, opts.iters, save_every=opts.save_every, explore_policy=explore, shuffle_sents=True) else: train(attachonly, train_sents, gold_sents, model, dev, opts.iters, save_every=opts.save_every, explore_policy=explore, shuffle_sents=True)
parser.add_option("-o","--model",dest="model_file") parser.add_option("-f","--features",dest="features_file",default="None") parser.add_option("--iters",dest="iters",action="store",type="int",default=20) parser.add_option("--every",dest="save_every",action="store",type="int",default=1) opts, args = parser.parse_args() if len(args)<1 or not (opts.model_file or opts.features_file): parser.print_usage() sys.exit(1) TRAIN_FILE = args[0] DEV_FILE = args[1] if len(args)>1 else None FEATURES = opts.features_file MODEL = opts.model_file model = Model(FEATURES, "%s.weights" % MODEL) model.save("%s.model" % MODEL) dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else [] train_sents = list(io.conll_to_sents(file(TRAIN_FILE))) print len(train_sents) train_sents = [s for s in train_sents if isprojective.is_projective(s)] print len(train_sents) train(train_sents, model, dev, opts.iters,save_every=opts.save_every)
dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else [] train_sents = list(io.conll_to_sents(file(TRAIN_FILE))) print len(train_sents) train_sents = [s for s in train_sents if isprojective.is_projective(s)] print len(train_sents) random.seed(opts.random_seed) if opts.follow_incorrect: explore = ExplorePolicy(2, 0.9) # almost always else: explore = None if (opts.labeled): from easyfirst import train_labeled train_labeled(train_sents, model, dev, opts.iters, save_every=opts.save_every, explore_policy=explore, shuffle_sents=True) else: train(train_sents, model, dev, opts.iters, save_every=opts.save_every, explore_policy=explore, shuffle_sents=True)