Ejemplo n.º 1
0
    if logpath:
        logdir = os.makedirs(os.path.dirname(logpath), exist_ok=True)
        log_f = open(c.get('log_path'), 'w', encoding='utf-8')

    # Now do the testing and training
    train_postagger(c['train_file'],
                    c['model'],
                    c['delimeter'])
    test_postagger(c['test_file'],
                   c['model'],
                   c['out_file'],
                   c['delimeter'])
    time.sleep(1)

    # Evaluate...
    slashtags_eval(c['gold_file'], c['out_file'], c['delimeter'], log_f)

class TestPeriodTagging(unittest.TestCase):


    def runTest(self, result=None):
        p = StanfordPOSTagger(tagger_model)

        first_tagged = p.tag('this is a test . with a period in the middle\n')
        second_tagged= p.tag('and a second . to make sure the feed advances.\n')

        self.assertEqual(len(first_tagged), 11)
        self.assertEqual(len(second_tagged), 10)


Ejemplo n.º 2
0
        print("Tagger training complete.")
        tagger_path = tagger_file.name
    else:
        print('Loading tagger from "{}"'.format(args.tagger))
        tagger_path = args.tagger

    # =============================================================================
    # Next, strip the tags from the test file into a temporary file.
    # =============================================================================
    raw_tmp = NamedTemporaryFile()

    remove_tags(args.test, raw_tmp.name)
    # =============================================================================
    # Figure out if we want to save the output path
    # =============================================================================
    if args.output:
        outpath = args.output
    else:
        output_file = NamedTemporaryFile('w', encoding='utf-8')
        outpath = output_file.name

    print('Running tagger on "{}"'.format(args.test))
    test_postagger(raw_tmp.name, tagger_path, outpath)

    print("RESULTS ON SENTENCES OF ALL LENGTHS")
    slashtags_eval(args.test, outpath, args.delimiter, details=True, matrix=False, length_limit=None)

    print("RESULTS ON SENTENCES OF <=10")
    slashtags_eval(args.test, outpath, args.delimiter, details=True, matrix=False, length_limit=10)