Exemple #1
0
 def setUp(self):
     self.tagger = GeniaTagger()
     self.sentence = "Inhibition of NF-kappa beta activation reversed " \
         "the anti-apoptotic effect of isochamaejasmin."
     self.tokens = [
         Token('Inhibition', 'Inhibition', 'NN', 'B-NP', 'O'),
         Token('of', 'of', 'IN', 'B-PP', 'O'),
         Token('NF-kappa', 'NF-kappa', 'NN', 'B-NP', 'B-protein'),
         Token('beta', 'beta', 'NN', 'I-NP', 'I-protein'),
         Token('activation', 'activation', 'NN', 'I-NP', 'O'),
         Token('reversed', 'reverse', 'VBD', 'B-VP', 'O'),
         Token('the', 'the', 'DT', 'B-NP', 'O'),
         Token('anti-apoptotic', 'anti-apoptotic', 'JJ', 'I-NP', 'O'),
         Token('effect', 'effect', 'NN', 'I-NP', 'O'),
         Token('of', 'of', 'IN', 'B-PP', 'O'),
         Token('isochamaejasmin', 'isochamaejasmin', 'NN', 'B-NP', 'O'),
         Token('.', '.', '.', 'O', 'O')
     ]
Exemple #2
0
class GeniaTaggerTests(TestCase):

    # TODO: test "empty" word/stem handling works on geniatagger return lines of the form:
    # \t\t<pos>\t<phrase>\t<entity>

    def setUp(self):
        self.tagger = GeniaTagger()
        self.sentence = "Inhibition of NF-kappa beta activation reversed " \
            "the anti-apoptotic effect of isochamaejasmin."
        self.tokens = [
            Token('Inhibition', 'Inhibition', 'NN', 'B-NP', 'O'),
            Token('of', 'of', 'IN', 'B-PP', 'O'),
            Token('NF-kappa', 'NF-kappa', 'NN', 'B-NP', 'B-protein'),
            Token('beta', 'beta', 'NN', 'I-NP', 'I-protein'),
            Token('activation', 'activation', 'NN', 'I-NP', 'O'),
            Token('reversed', 'reverse', 'VBD', 'B-VP', 'O'),
            Token('the', 'the', 'DT', 'B-NP', 'O'),
            Token('anti-apoptotic', 'anti-apoptotic', 'JJ', 'I-NP', 'O'),
            Token('effect', 'effect', 'NN', 'I-NP', 'O'),
            Token('of', 'of', 'IN', 'B-PP', 'O'),
            Token('isochamaejasmin', 'isochamaejasmin', 'NN', 'B-NP', 'O'),
            Token('.', '.', '.', 'O', 'O')
        ]

    def tearDown(self):
        del self.tagger

    def testTagger(self):
        for dummy in range(2):
            self.tagger.send(self.sentence)

            for idx, token in enumerate(iter(self.tagger)):
                self.assertTupleEqual(token, self.tokens[idx])

    def testBadPath(self):
        self.assertRaises(AssertionError, GeniaTagger, "/fail", "whatever")
        self.assertRaises(AssertionError, GeniaTagger, "whatever", "/fail")
Exemple #3
0
    logging.basicConfig(level=args.loglevel,
                        format='%(asctime)s %(levelname)s: %(message)s')

    if args.output == NORMALIZED:
        method = normalize
    elif args.output == TABULAR:
        method = tagging
    elif args.output == ALIGNED:
        method = align
    else:
        parser.error("unknown output option " + args.output)
        method = lambda *args: None

    try:
        pos_tagger = GeniaTagger()
        ner_tagger = NerSuite(args.model)
        qualifier_list = [l.strip() for l in args.qranks]
        raw_dict_data = [
            dictionaryReader(d, qualifier_list, args.separator)
            for d in args.dictionary
        ]
        # a tokenizer that skips Unicode Categories Zs and Pd:
        tokenizer = WordTokenizer(skipTags={'space'}, skipOrthos={'e'})
        dictionaries = [
            Dictionary(stream, tokenizer) for stream in raw_dict_data
        ]
        logging.info("initialized %s dictionaries", len(dictionaries))
        lst = [dictionaries, tokenizer, pos_tagger, ner_tagger]
        kwds = dict(sep=args.separator,
                    tag_all_nouns=args.nouns,