"which tagger to use; change the default to the one you want to use on q 3.2" ) try: args = parser.parse_args() except IOError as msg: parser.error(str(msg)) data = read_twitter(trainfile=args.train, evalfiles=args.eval) import tagger if args.tagger == "logreg": tagger = tagger.LogisticRegressionTagger() elif args.tagger == "crf": tagger = tagger.CRFPerceptron() else: sys.stderr.write("Did not properly select tagger!") sys.exit(1) # Train the tagger tagger.fit_data(data.train_sents, data.train_labels) # Evaluation (also writes out predictions) trainoutfile = "{}/{}.pred".format(args.outdir, os.path.basename(args.train)) print "### Train evaluation; writing to {}".format(trainoutfile) data.train_preds = tagger.evaluate_data(data.train_sents, data.train_labels) write_preds(trainoutfile, data.train_sents, data.train_labels, data.train_preds)
type=int, default=4, help="batch size") try: args = parser.parse_args() except IOError as msg: parser.error(str(msg)) # print os.getcwd() # data = read_twitter(trainfile=args.train, evalfiles=args.eval) import tagger if args.tagger == "logreg": tagger = tagger.LogisticRegressionTagger() elif args.tagger == "crf": tagger = tagger.CRFPerceptron(args.epochs, args.batch_size) else: sys.stderr.write("Did not properly select tagger!") sys.exit(1) # Train the tagger tagger.fit_data(data.train_sents, data.train_labels) # Evaluation (also writes out predictions) for evalstr, evalset in zip(args.eval, data.eval): evaloutfile = "{}/{}.pred".format(args.outdir, os.path.basename(evalstr)) print("### evaluation of {}; writing to {}".format( evalstr, evaloutfile)) preds = tagger.evaluate_data(evalset["sents"], evalset["labels"]) write_preds(evaloutfile, evalset["sents"], evalset["labels"], preds)