Example #1
0
def test_language_disambiguation():
    titledb = data_path('input/en_titles.db')
    semparser = EnWiktionarySemantics(titledb=titledb)
    parsed_list = semparser.parse_structured_entry(ENTRY)
    eq_(len(parsed_list), 3)
    for index, elem in enumerate(parsed_list):
        for k, v in TARGET[index].items():
            eq_(v, elem[k])
def test_language_disambiguation():
    titledb = data_path('input/en_titles.db')
    semparser = EnWiktionarySemantics(titledb=titledb)
    parsed_list = semparser.parse_structured_entry(ENTRY)
    eq_(len(parsed_list), 3)
    for index, elem in enumerate(parsed_list):
        for k,v in TARGET[index].items():
            eq_(v, elem[k])
Example #3
0
def test_language_disambiguation():
    titledb = data_path('input/en_titles.db')
    semparser = EnWiktionarySemantics(titledb=titledb)
    eq_(semparser.parse_structured_entry(ENTRY), TARGET)
# coding: utf-8
from __future__ import unicode_literals
from nose.tools import eq_

from conceptnet5.wiktparse.rules import (EdgeInfo, LinkedText,
                                         EnWiktionarySemantics,
                                         DeWiktionarySemantics)

# Global variable to hold the parsers for the different languages
PARSERS = {
    'en': EnWiktionarySemantics('en'),
    'de': DeWiktionarySemantics('de')
}


def check_output(lang, rule_name, text, expected):
    """Callable used by all the test generators below; `text` is the string to
    be parsed; `expected` is the result the parser is expected to produce from
    the string."""
    match_list = PARSERS[lang].parse(text, rule_name)
    eq_(expected, match_list)


def test_en_etymology():
    test_list = [('{{compound|blow|fish}}', [
        EdgeInfo('en', 'blow', None, 'DerivedFrom'),
        EdgeInfo('en', 'fish', None, 'DerivedFrom')
    ]),
                 ('{{etycomp|blow|fish}}', [
                     EdgeInfo('en', 'blow', None, 'EtymologicallyDerivedFrom'),
                     EdgeInfo('en', 'fish', None, 'EtymologicallyDerivedFrom')
def test_language_disambiguation():
    titledb = data_path('input/en_titles.db')
    semparser = EnWiktionarySemantics(titledb=titledb)
    eq_(semparser.parse_structured_entry(ENTRY), TARGET)