"which tagger to use; change the default to the one you want to use on q 3.2"
    )

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    data = read_twitter(trainfile=args.train, evalfiles=args.eval)

    import tagger

    if args.tagger == "logreg":
        tagger = tagger.LogisticRegressionTagger()
    elif args.tagger == "crf":
        tagger = tagger.CRFPerceptron()
    else:
        sys.stderr.write("Did not properly select tagger!")
        sys.exit(1)

    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    trainoutfile = "{}/{}.pred".format(args.outdir,
                                       os.path.basename(args.train))
    print "### Train evaluation; writing to {}".format(trainoutfile)
    data.train_preds = tagger.evaluate_data(data.train_sents,
                                            data.train_labels)
    write_preds(trainoutfile, data.train_sents, data.train_labels,
                data.train_preds)
예제 #2
0
                        type=int,
                        default=4,
                        help="batch size")
    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    # print os.getcwd() #
    data = read_twitter(trainfile=args.train, evalfiles=args.eval)

    import tagger
    if args.tagger == "logreg":
        tagger = tagger.LogisticRegressionTagger()
    elif args.tagger == "crf":
        tagger = tagger.CRFPerceptron(args.epochs, args.batch_size)
    else:
        sys.stderr.write("Did not properly select tagger!")
        sys.exit(1)

    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    for evalstr, evalset in zip(args.eval, data.eval):
        evaloutfile = "{}/{}.pred".format(args.outdir,
                                          os.path.basename(evalstr))
        print("### evaluation of {}; writing to {}".format(
            evalstr, evaloutfile))
        preds = tagger.evaluate_data(evalset["sents"], evalset["labels"])
        write_preds(evaloutfile, evalset["sents"], evalset["labels"], preds)