Ejemplo n.º 1
0
	def testWalk(self):
		d = Dictionary([('key', 'the term', 42)], DictionaryTests.tokenizer)
		s = "Here is the term we're looking for."
		tokens = [s[start:end] for start, end, tag, ortho in DictionaryTests.tokenizer.tokenize(s)]
		result = list(d.walk(tokens))
		O = Dictionary.O
		B = Dictionary.B % 'key'
		I = Dictionary.I % 'key'
		expected = [O, O, B, I, O, O, O, O, O, O]
		self.assertEqual(result, expected)
Ejemplo n.º 2
0
	def testCapitalizationAlts(self):
		d = Dictionary(
			[('NEUROD1', 'NEUROD', 100),
			 ('NEUROD2', 'NEUROD2', 100)],
			DictionaryTests.tokenizer
		)
		s = "Transfection of vectors expressing neuroD and neuroD2 into P19 cells."
		tokens = [s[start:end] for start, end, tag, ortho in DictionaryTests.tokenizer.tokenize(s)]
		result = list(d.walk(tokens))
		O = Dictionary.O
		B = Dictionary.B % 'NEUROD'
		I = Dictionary.I % 'NEUROD'
		expected = [O, O, O, O, B + "1", I + "1", O, B + "2", I + "2", I + "2", O, O, O, O, O]
		self.assertEqual(result, expected)
Ejemplo n.º 3
0
	def testFullWalk(self):
		d = Dictionary(
			[('alt', 'the term we', 84),
			 ('key', 'the term we', 42),
			 ('apo', "'", 1),
			 ('part', 'term we', 21)],
			DictionaryTests.tokenizer
		)
		s = "Here is the term we're looking for."
		tokens = [s[start:end] for start, end, tag, ortho in DictionaryTests.tokenizer.tokenize(s)]
		result = list(d.walk(tokens))
		O = Dictionary.O
		B = Dictionary.B % 'key'
		I = Dictionary.I % 'key'
		A = Dictionary.B % 'apo'
		expected = [O, O, B, I, I, A, O, O, O, O]
		self.assertEqual(result, expected)
Ejemplo n.º 4
0
	def testExamples(self):
		d = Dictionary(
			[('NR1D1', 'rev erb α', 1),
			 ('NR1D1', 'rev erb alpha', 1),
			 ('PPARA', 'PPAR', 1)],
			DictionaryTests.tokenizer
		)
		O = Dictionary.O
		B = Dictionary.B
		I = Dictionary.I
		sentences = [
			("A functional Rev-erb alpha responsive element located in the human Rev-erb alpha promoter mediates a repressing activity.",
			 [O, O, B % 'NR1D1', I % 'NR1D1', I % 'NR1D1', O, O, O, O, O, O, B % 'NR1D1', I % 'NR1D1', I % 'NR1D1', O, O, O, O, O, O]),
			("A positive PPAR-response element in the human apoA-I promoter nonfunctional in rats.",
			 [O, O, B % 'PPARA', O, O, O, O, O, O, O, O, O, O, O, O, O])
			]

		for s, e in sentences:
			r = list(d.walk([s[start:end] for start, end, tag, ortho in DictionaryTests.tokenizer.tokenize(s)]))
			self.assertEqual(len(r), len(e))
			self.assertEqual(r, e)
Ejemplo n.º 5
0
	def testCreateDictionary(self):
		d = Dictionary([('key', 'The Term', 42, 21)], DictionaryTests.tokenizer)
		n = Node(The=Node(Term=Node(([42, 21], 'key'))))
		self.assertEqual(d.root, n)
Ejemplo n.º 6
0
    else:
        parser.error("unknown output option " + args.output)
        method = lambda *args: None

    try:
        pos_tagger = GeniaTagger()
        ner_tagger = NerSuite(args.model)
        qualifier_list = [l.strip() for l in args.qranks]
        raw_dict_data = [
            dictionaryReader(d, qualifier_list, args.separator)
            for d in args.dictionary
        ]
        # a tokenizer that skips Unicode Categories Zs and Pd:
        tokenizer = WordTokenizer(skipTags={'space'}, skipOrthos={'e'})
        dictionaries = [
            Dictionary(stream, tokenizer) for stream in raw_dict_data
        ]
        logging.info("initialized %s dictionaries", len(dictionaries))
        lst = [dictionaries, tokenizer, pos_tagger, ner_tagger]
        kwds = dict(sep=args.separator,
                    tag_all_nouns=args.nouns,
                    use_greek_letters=args.greek)

        if args.files:
            lst.append(args.files)
        else:
            lst.append([sys.stdin])

        method(*lst, **kwds)

        del ner_tagger