def setUp(self): self.tagger = GeniaTagger() self.sentence = "Inhibition of NF-kappa beta activation reversed " \ "the anti-apoptotic effect of isochamaejasmin." self.tokens = [ Token('Inhibition', 'Inhibition', 'NN', 'B-NP', 'O'), Token('of', 'of', 'IN', 'B-PP', 'O'), Token('NF-kappa', 'NF-kappa', 'NN', 'B-NP', 'B-protein'), Token('beta', 'beta', 'NN', 'I-NP', 'I-protein'), Token('activation', 'activation', 'NN', 'I-NP', 'O'), Token('reversed', 'reverse', 'VBD', 'B-VP', 'O'), Token('the', 'the', 'DT', 'B-NP', 'O'), Token('anti-apoptotic', 'anti-apoptotic', 'JJ', 'I-NP', 'O'), Token('effect', 'effect', 'NN', 'I-NP', 'O'), Token('of', 'of', 'IN', 'B-PP', 'O'), Token('isochamaejasmin', 'isochamaejasmin', 'NN', 'B-NP', 'O'), Token('.', '.', '.', 'O', 'O') ]
class GeniaTaggerTests(TestCase): # TODO: test "empty" word/stem handling works on geniatagger return lines of the form: # \t\t<pos>\t<phrase>\t<entity> def setUp(self): self.tagger = GeniaTagger() self.sentence = "Inhibition of NF-kappa beta activation reversed " \ "the anti-apoptotic effect of isochamaejasmin." self.tokens = [ Token('Inhibition', 'Inhibition', 'NN', 'B-NP', 'O'), Token('of', 'of', 'IN', 'B-PP', 'O'), Token('NF-kappa', 'NF-kappa', 'NN', 'B-NP', 'B-protein'), Token('beta', 'beta', 'NN', 'I-NP', 'I-protein'), Token('activation', 'activation', 'NN', 'I-NP', 'O'), Token('reversed', 'reverse', 'VBD', 'B-VP', 'O'), Token('the', 'the', 'DT', 'B-NP', 'O'), Token('anti-apoptotic', 'anti-apoptotic', 'JJ', 'I-NP', 'O'), Token('effect', 'effect', 'NN', 'I-NP', 'O'), Token('of', 'of', 'IN', 'B-PP', 'O'), Token('isochamaejasmin', 'isochamaejasmin', 'NN', 'B-NP', 'O'), Token('.', '.', '.', 'O', 'O') ] def tearDown(self): del self.tagger def testTagger(self): for dummy in range(2): self.tagger.send(self.sentence) for idx, token in enumerate(iter(self.tagger)): self.assertTupleEqual(token, self.tokens[idx]) def testBadPath(self): self.assertRaises(AssertionError, GeniaTagger, "/fail", "whatever") self.assertRaises(AssertionError, GeniaTagger, "whatever", "/fail")
logging.basicConfig(level=args.loglevel, format='%(asctime)s %(levelname)s: %(message)s') if args.output == NORMALIZED: method = normalize elif args.output == TABULAR: method = tagging elif args.output == ALIGNED: method = align else: parser.error("unknown output option " + args.output) method = lambda *args: None try: pos_tagger = GeniaTagger() ner_tagger = NerSuite(args.model) qualifier_list = [l.strip() for l in args.qranks] raw_dict_data = [ dictionaryReader(d, qualifier_list, args.separator) for d in args.dictionary ] # a tokenizer that skips Unicode Categories Zs and Pd: tokenizer = WordTokenizer(skipTags={'space'}, skipOrthos={'e'}) dictionaries = [ Dictionary(stream, tokenizer) for stream in raw_dict_data ] logging.info("initialized %s dictionaries", len(dictionaries)) lst = [dictionaries, tokenizer, pos_tagger, ner_tagger] kwds = dict(sep=args.separator, tag_all_nouns=args.nouns,