def tag(test_items=[], WS="", tokenize=False, tokenizer_context=0, left_context=1, right_context=1, gazetteer=None, min_tok_freq=25, min_lem_freq=25, mode=""): """ Tags a list of (potentially annotated) test tokens. """ tokenized_tokens = [] token_acc, token_f1 = None, None if tokenize: # load and apply a tokenizer: tokenizer = Tokenizer(context=tokenizer_context, WS=WS) if mode == "tag": tokenized_tokens = tokenizer.tokenize(test_items=test_items, gazetteer=gazetteer) elif mode in ("test", "crossval"): items = [] for item in test_items: if item == "<utt>": items.append(item) else: items.append(item[0].lower()) token_acc, token_f1 = tokenizer.eval_tokenizer(test_items=items, gazetteer=gazetteer) # return the original tokens since we only tokenize for evaluation purposes: tokenized_tokens = items else: # assume the input has been properly tokenized already: if mode == "tag": tokenized_tokens = test_items elif mode == "test": tokenized_tokens = tuple(item[0].lower() for item in test_items) sequential_tagger = MaxentTagger(WS=WS, left_context=left_context, right_context=right_context, min_tok_freq=min_tok_freq, min_lem_freq=min_lem_freq) sequential_tagger.load_models() tagged_items = sequential_tagger.tag(tokenized=tokenized_tokens, gazetteer=gazetteer) if mode in ("crossval", "test"): results = sequential_tagger.evaluate_tags_and_lemmas(gold_items=test_items,\ silver_items=tagged_items) if tokenize: results.extend((token_acc, token_f1)) return results else: return tagged_items