def get_tagger(): """ Return a tagging function given some app settings. `Settings` is the settings module of an app. The returned value is a function that receives a unicode string and returns a list of `Word` instances. """ if settings.USE_FREELING: from quepy.freeling import run_freeling tagger_function = lambda x: run_freeling(x, settings.FREELING_CMD) else: from quepy.nltktagger import run_nltktagger tagger_function = lambda x: run_nltktagger(x, settings.NLTK_DATA_PATH) def wrapper(string): assert_valid_encoding(string) words = tagger_function(string) for word in words: if word.pos not in PENN_TAGSET: logger.warning("Tagger emmited a non-penn " "POS tag {!r}".format(word.pos)) return words return wrapper
def test_run_freeling(self): class FakeStringIO(StringIO): name = "some_name" class FakeExecutionCtx(object): runcmd_called = False def __init__(self, *args, **kwargs): pass def runcmd(self, cmd, stdin=None): self.cmd = cmd FakeExecutionCtx.runcmd_called = True return (StringIO(), StringIO()) def tmpfile(self, name): return FakeStringIO() class FakeFunction(object): def __init__(self): self.called = False self.args = None self.kwargs = None def __call__(self, *args, **kwargs): self.called = True self.args = args self.kwargs = kwargs bkp_ctx = sysutils.ExecutionContext bkp_parse_output = freeling._parse_freeling_output sysutils.ExecutionContext = FakeExecutionCtx fake_parse_output = FakeFunction() freeling._parse_freeling_output = fake_parse_output freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD) self.assertTrue(FakeExecutionCtx.runcmd_called) self.assertTrue(fake_parse_output.called) sysutils.ExecutionContext = bkp_ctx freeling._parse_freeling_output = bkp_parse_output
def test_real_run(self): out = freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD) out = list(out) expected_pos = { u"who": u"WP", u"is": u"VBZ", u"Tom Cruise": u"NNP", u"?": u".", } for word in out: self.assertIsInstance(word, freeling.Word) self.assertEqual(word.pos, expected_pos[word.token])