예제 #1
0
def pretrained_tagger(request):
    """Exposes the command-line option to a test case."""
    pretrained_tagger_path = request.config.getoption("--tagger")
    if not pretrained_tagger_path:
        pytest.skip("No --tagger given")
    else:
        return pos.Tagger(
            model_file=pretrained_tagger_path,
            device="cpu",
        )
예제 #2
0
#!/usr/bin/env python3
import sys
import re

import rnc
import pos

sentences = []
sentences.extend(rnc.Reader().read('tmp/media1.xml'))
sentences.extend(rnc.Reader().read('tmp/media2.xml'))
sentences.extend(rnc.Reader().read('tmp/media3.xml'))

re_pos = re.compile('([\w-]+)(?:[^\w-]|$)'.format('|'.join(pos.tagset)))

tagger = pos.Tagger()

sentence_labels = []
sentence_words = []
for sentence in sentences:
	labels = []
	words = []
	for word in sentence:
		gr = word[1]['gr']
		m = re_pos.match(gr)
		if not m:
			print(gr, file = sys.stderr)
		pos = m.group(1)
		if pos == 'ANUM':
			pos = 'A-NUM'
		label = tagger.get_label_id(pos)
		if not label: