コード例 #1
0
def training_job(corpus_fname, k, p, seed, static, dev_fname, model_out_prefix):
   sents = [s for s in io.conll_to_sents(file(corpus_fname)) if isprojective.is_projective(s)]
   print "training ",corpus_fname,k,p,seed,len(sents)

   explore = ExplorePolicy(k,p)
   TRAIN_OUT_FILE = "%s-ef.kps-k%s-p%s-seed%s" % (model_out_prefix, k, p, seed)
   if static:
      TRAIN_OUT_FILE = "%s-ef.kps-static-seed%s" % (model_out_prefix, seed)
      explore=None

   model = Model("features/znp.py", "%s.weights" % TRAIN_OUT_FILE)
   model.save("%s.model" % TRAIN_OUT_FILE)
   random.seed(seed)
   train(sents, model, dev=None, ITERS=20,save_every=None,explore_policy=explore,shuffle_sents=True)
   print "training of",corpus_fname,k,p,seed,"done"
   print "parsing"

   parsed = parse_corpus(dev_fname, TRAIN_OUT_FILE + ".weights.FINAL", "features/znp.py")
   outf = file(TRAIN_OUT_FILE + ".dev.parsed","w")
   for sent in parsed:
      io.out_conll(sent, outf, parent='pparent',prel='pprel')
   uas,las,complete = eval(parsed)
   puas,plas,complete = eval(parsed,ignore_punct=True)
   outf.close()
   outf = file(TRAIN_OUT_FILE + ".dev.scores","w")
   print >> outf, "UAS:",uas,"LAS:",las,"NP_UAS:",puas,"NP_LAS:",plas
   outf.close()

   print "deleting"
   os.unlink(TRAIN_OUT_FILE + ".weights.FINAL")
   os.unlink(TRAIN_OUT_FILE + ".model")
コード例 #2
0
def training_job(corpus_fname, k, p, seed, static, dev_fname,
                 model_out_prefix):
    from training import online_greedy_train
    sents = [
        s for s in io.conll_to_sents(file(corpus_fname))
        if isprojective.is_projective(s)
    ]
    print "training ", corpus_fname, k, p, seed, len(sents)
    labels = set()
    for sent in sents:
        for tok in sent:
            if tok['prel'] == '_': tok['prel'] = 'dep'
            #tok['prel'] = 'dep'
            labels.add(tok['prel'])

    oracle = ArcHybridStaticOracle() if static else ArcHybridDynamicOracle()
    explore = None if static else ExplorePolicy(k, p)
    print "start"
    feature_extractor = features.extractors.get("hybrid.1")
    action_map, params = online_greedy_train(
        sents,
        transition_system=ArcHybridState,
        oracle=oracle,
        feature_extractor=feature_extractor,
        labels=labels,
        iterations=15,
        explore_policy=ExplorePolicy(k, p),
        random_seed=seed,
        shuffle_corpus=True)
    print "end"
    params.finalize()
    TRAIN_OUT_FILE = "%s-hybrid-k%s-p%s-seed%s" % (model_out_prefix, k, p,
                                                   seed)
    if static:
        TRAIN_OUT_FILE = "%s-hybrid-static-seed%s" % (model_out_prefix, seed)
    params.dump(file(TRAIN_OUT_FILE, "w"), sparse=True)
    pickle.dump(action_map, file(TRAIN_OUT_FILE + ".amap", "w"))
    print "training of", corpus_fname, k, p, seed, "done"

    print "parsing"
    parsed = parse_corpus(dev_fname, TRAIN_OUT_FILE, feature_extractor,
                          ArcHybridState)
    print "writing"
    outf = file(TRAIN_OUT_FILE + ".dev.parsed", "w")
    for sent in parsed:
        io.out_conll(sent, outf, parent='pparent', prel='pprel')
    uas, las, complete = eval(parsed)
    puas, plas, complete = eval(parsed, ignore_punct=True)
    outf.close()
    outf = file(TRAIN_OUT_FILE + ".dev.scores", "w")
    print >> outf, "UAS:", uas, "LAS:", las, "NP_UAS:", puas, "NP_LAS:", plas
    print "UAS:", uas, "LAS:", las, "NP_UAS:", puas, "NP_LAS:", plas
    outf.close()

    print "deleting"
    os.unlink(TRAIN_OUT_FILE)
    os.unlink(TRAIN_OUT_FILE + ".amap")
コード例 #3
0
ファイル: BEParser.py プロジェクト: binnd/BEParser2
def main():
    model_dir = 'test_model'
    train_file = 'data'
    test_file = 'data'
    beam_size = 1
    output_file = None
    iter = 200
    is_train = False
    if is_train:
        train_data = list(read_corpus(train_file))
        print len(train_data)
        train_sents = [s for s in train_data if isprojective.is_projective(s)]
        print len(train_sents)
        train(model_dir, train_sents, iter, beam_size)
    else:
        test_data = list(read_corpus(test_file))
        test(model_dir, test_data, output_file, beam_size, str(iter))
コード例 #4
0
                  type="int",
                  default=20)
parser.add_option("--every",
                  dest="save_every",
                  action="store",
                  type="int",
                  default=1)

opts, args = parser.parse_args()

if len(args) < 1 or not (opts.model_file or opts.features_file):
    parser.print_usage()
    sys.exit(1)

TRAIN_FILE = args[0]
DEV_FILE = args[1] if len(args) > 1 else None
FEATURES = opts.features_file
MODEL = opts.model_file

model = Model(FEATURES, "%s.weights" % MODEL)
model.save("%s.model" % MODEL)

dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else []

train_sents = list(io.conll_to_sents(file(TRAIN_FILE)))
print len(train_sents)
train_sents = [s for s in train_sents if isprojective.is_projective(s)]
print len(train_sents)

train(train_sents, model, dev, opts.iters, save_every=opts.save_every)
コード例 #5
0
attachonly = False
if TRAIN_FILE[-3:] == "E00":
    print TRAIN_FILE[-3:]
    attachonly = True

model = Model(FEATURES, "%s.weights" % MODEL)
model.save("%s.model" % MODEL)

dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else []

train_sents = list(io.conll_to_sents(file(TRAIN_FILE)))
gold_sents = list(io.conll_to_sents(file(GOLD_FILE)))
print len(train_sents), len(gold_sents)
nonproj = [(s_g, s_t) for s_g, s_t in zip(gold_sents, train_sents)
           if isprojective.is_projective(s_g)]
gold_sents, train_sents = zip(*nonproj)
train_sents = list(train_sents)
gold_sents = list(gold_sents)
print len(train_sents), len(gold_sents)
assert len(train_sents) == len(gold_sents)

random.seed(opts.random_seed)
if opts.follow_incorrect:
    explore = ExplorePolicy(2, 0.9)  # almost always
else:
    explore = None
if (opts.labeled):
    from easyfirst import train_labeled
    train_labeled(train_sents,
                  gold_sents,
コード例 #6
0
ファイル: train.py プロジェクト: pombredanne/nlp-3
parser.add_option("-o","--model",dest="model_file")
parser.add_option("-f","--features",dest="features_file",default="None")
parser.add_option("--iters",dest="iters",action="store",type="int",default=20)
parser.add_option("--every",dest="save_every",action="store",type="int",default=1)

opts, args = parser.parse_args()

if len(args)<1 or not (opts.model_file or opts.features_file):
   parser.print_usage()
   sys.exit(1)

TRAIN_FILE = args[0]
DEV_FILE   = args[1] if len(args)>1 else None
FEATURES   = opts.features_file
MODEL      = opts.model_file


model = Model(FEATURES, "%s.weights" % MODEL)
model.save("%s.model" % MODEL)


dev = [s for s in io.conll_to_sents(file(DEV_FILE))] if DEV_FILE else []

train_sents = list(io.conll_to_sents(file(TRAIN_FILE)))
print len(train_sents)
train_sents = [s for s in train_sents if isprojective.is_projective(s)]
print len(train_sents)

train(train_sents, model, dev, opts.iters,save_every=opts.save_every)

コード例 #7
0
ファイル: easyfirst.py プロジェクト: pombredanne/nlp-3
def load_sentences(filename,ONLY_PROJECTIVE=False):
   sents = [s for s in io.conll_to_sents(file(filename)) if (not ONLY_PROJECTIVE) or isprojective.is_projective(s)]
   return sents
コード例 #8
0
gflags.DEFINE_integer("random_seed", 0, "Random seed.")

gflags.DEFINE_integer("save_every", 0, "Dump a model every k iterations.")

args = FLAGS(sys.argv)
print args

DATA_FILE = args[1]

featExt = extractors.get(FLAGS.feature_extractor)

sents = list(io.conll_to_sents(file(DATA_FILE)))

if FLAGS.train and (True or FLAGS.only_proj):
    import isprojective
    sents = [s for s in sents if isprojective.is_projective(s)]

if FLAGS.add_dep_label:
    for sent in sents:
        for tok in sent:
            if tok['prel'] == '_': tok['prel'] = "dep"

EXPLORE = 1

LABELED = True

MODE = "train" if FLAGS.train else "test"
#MODE="test_system"

system = FLAGS.system
コード例 #9
0
def load_sentences(filename, ONLY_PROJECTIVE=False):
    sents = [
        s for s in io.conll_to_sents(file(filename))
        if (not ONLY_PROJECTIVE) or isprojective.is_projective(s)
    ]
    return sents