Exemple #1
0
def test_parse_raw():
    check_status(AlpinoParser())
    deps = parse_raw(_SENT)
    assert_equal({dep
                  for dep in deps.split("\n") if dep},
                 {dep
                  for dep in _PARSE.split("\n") if dep})
Exemple #2
0
def test_parse():
    p = AlpinoParser()
    check_status(p)
    deps = p.process(_SENT)
    assert_equal({dep
                  for dep in deps.split("\n") if dep},
                 {dep
                  for dep in _PARSE.split("\n") if dep})
Exemple #3
0
def test_convert():
    p = AlpinoParser()
    check_status(p)
    s = p.convert(123, p.process(_SENT), "csv")
    tokens = list(csv.DictReader(StringIO(s)))
    print(tokens)
    assert_equal(len(tokens), 3)
    assert_equal(tokens[0]['doc'], '123')

    assert_equal(tokens[0]['lemma'], 'Toob')
    assert_equal(tokens[0]['parent'], tokens[1]['id'])
Exemple #4
0
def test_process():
    """
    Test CoreNLP processing
    Make sure a corenlp server is listening at port 9000, e.g.:
    docker run -dp 9000:9000 chilland/corenlp-docker
    """
    c = CoreNLPLemmatizer()
    check_status(c)
    result = c.process("two words")
    assert_in("<lemma>word</lemma>", result)

    tokens = list(csv.DictReader(StringIO(c.convert(1, result, format="csv"))))
    assert_equal(len(tokens), 2)
    assert_equal(tokens[1]['lemma'], "word")
Exemple #5
0
def test_process():
    """
    Test Frog lemmatizing
    Make sure a frog server is listening at port 9000, e.g.:
    sudo docker run -dp 9887:9887 proycon/lamachine frog -S 9887 --skip=pm
    """
    c = FrogLemmatizer()
    check_status(c)
    result = c.process("Nederlandse woordjes")
    print(result)
    r = list(csv.DictReader(StringIO(result)))
    assert_equal(len(r), 2)
    assert_equal(r[0]["lemma"], "nederlands")
    assert_equal(r[0]["ner"], "B-LOC")
Exemple #6
0
def test_alpino_unicode():
    "Test what happens with non-ascii characters in input"
    check_status(AlpinoParser())
    text = "Bjarnfre\xf0arson leeft"
    # tokenize should convery to utf-8 and only add final line break
    assert_equal(tokenize(text), text + "\n")
Exemple #7
0
def test_tokenize():
    check_status(AlpinoParser())
    text = u"D\xedt is een zin, met komma |nietwaar|? En nog 'n zin"
    expected = u"D\xedt is een zin , met komma nietwaar ?\nEn nog 'n zin\n"
    assert_equal(tokenize(text), expected)