if logpath: logdir = os.makedirs(os.path.dirname(logpath), exist_ok=True) log_f = open(c.get('log_path'), 'w', encoding='utf-8') # Now do the testing and training train_postagger(c['train_file'], c['model'], c['delimeter']) test_postagger(c['test_file'], c['model'], c['out_file'], c['delimeter']) time.sleep(1) # Evaluate... slashtags_eval(c['gold_file'], c['out_file'], c['delimeter'], log_f) class TestPeriodTagging(unittest.TestCase): def runTest(self, result=None): p = StanfordPOSTagger(tagger_model) first_tagged = p.tag('this is a test . with a period in the middle\n') second_tagged= p.tag('and a second . to make sure the feed advances.\n') self.assertEqual(len(first_tagged), 11) self.assertEqual(len(second_tagged), 10)
print("Tagger training complete.") tagger_path = tagger_file.name else: print('Loading tagger from "{}"'.format(args.tagger)) tagger_path = args.tagger # ============================================================================= # Next, strip the tags from the test file into a temporary file. # ============================================================================= raw_tmp = NamedTemporaryFile() remove_tags(args.test, raw_tmp.name) # ============================================================================= # Figure out if we want to save the output path # ============================================================================= if args.output: outpath = args.output else: output_file = NamedTemporaryFile('w', encoding='utf-8') outpath = output_file.name print('Running tagger on "{}"'.format(args.test)) test_postagger(raw_tmp.name, tagger_path, outpath) print("RESULTS ON SENTENCES OF ALL LENGTHS") slashtags_eval(args.test, outpath, args.delimiter, details=True, matrix=False, length_limit=None) print("RESULTS ON SENTENCES OF <=10") slashtags_eval(args.test, outpath, args.delimiter, details=True, matrix=False, length_limit=10)