Beispiel #1
0
def test_numeral_tables(tmprepo):
    glottolog = Glottolog(tmprepo['glottolog'])
    d = list(find_tables([tmprepo['raw'] / 'Abui.htm']))[0]
    assert len(d) == 7
    entry = NumeralsEntry(
        base_name=d[0],
        tables=d[1],
        file_name=d[2],
        title_name=d[3],
        codes=glottolog.languoids_by_code(),
        iso=glottolog.iso.languages,
        source=d[4],
        base=d[5],
        comment=d[6],
    )
    assert len(entry.tables) == 8
    assert entry.get_numeral_lexemes()[0][0][6][0] == 'tä.ˈlä.mä'
Beispiel #2
0
def test_num_entry(tmprepo, x, expected):
    raw_htmls = tmprepo['raw']
    glottolog = Glottolog(tmprepo['glottolog'])
    f = raw_htmls / x
    d = list(find_tables([f]))[0]
    entry = NumeralsEntry(
        base_name=d[0],
        tables=d[1],
        file_name=d[2],
        title_name=d[3],
        codes=glottolog.languoids_by_code(),
        iso=glottolog.iso.languages,
        source=d[4],
        base=d[5],
        comment=d[6],
    )
    assert entry.base_name == Path(f).stem
    assert entry.glottocodes[0] == expected
Beispiel #3
0
def test_fuzzy_number_matching(tmprepo):
    glottolog = Glottolog(tmprepo['glottolog'])
    d = list(find_tables([tmprepo['raw'] / 'Aari.htm']))[0]
    entry = NumeralsEntry(
        base_name=d[0],
        tables=d[1],
        file_name=d[2],
        title_name=d[3],
        codes=glottolog.languoids_by_code(),
        iso=glottolog.iso.languages,
        source=d[4],
        base=d[5],
        comment=d[6],
    )
    numeral_table = entry.tables[1]
    table_elements = numeral_table.find_all('tr')
    cell_content = []

    for row in table_elements:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        cell_content.append([ele for ele in cols if ele])

    # Table is roughly structured like this:
    # 1 | 21
    # 2 | 22
    # 3 | 23
    # ...
    # 10 | 30
    # ..
    # 20 | 2000

    assert parse_number(cell_content[0][0]) == 1
    assert parse_number(cell_content[0][1]) == 21
    assert parse_number(cell_content[9][0]) == 10
    assert parse_number(cell_content[19][1]) == 2000