def main():
    if argv[2] == 'all' and len(argv) > 3:
        wc_f = open(argv[3])
        wikicodes = [wc.strip() for wc in wc_f]
    else:
        wikicodes = [wc.strip() for wc in argv[2:]]
    cfg_fn = argv[1]
    logger = logging.getLogger('wikt2dict')
    for wc in wikicodes:
        try:
            print 'Parsing ' + wc + 'wiktionary'
            wiktionary = None #FIXME error handling workaround 
            wiktionary = Wiktionary(wc, cfg_fn)
            logger.info('%s Wiktionary object built', wiktionary.cfg['fullname'])
            wiktionary.parse_all_articles()
            logger.info('%s Wiktionary articles parsed', wiktionary.cfg['fullname'])
            print '   Extracted {0} pairs'.format(len(wiktionary.article_parser.pairs))
            wiktionary.write_pairs()
            logger.info('%s Wiktionary translations written to file', wiktionary.cfg['fullname'])
        except NotImplementedError as e:
            if wiktionary and wiktionary.cfg and wiktionary.cfg['fullname']:
                logger.error('%s Wiktionary unrecognized parser type', wiktionary.cfg['fullname'])
            else:
                logger.error('%s Wiktionary unrecognized parser type', wc)
            continue
        except AttributeError as e:
            print e
            continue
        except Exception as e:
            print wc, str(e)
            continue
def main():
    if argv[2] == 'all' and len(argv) > 3:
        wc_f = open(argv[3])
        wikicodes = [wc.strip() for wc in wc_f]
    else:
        wikicodes = [wc.strip() for wc in argv[2:]]
    cfg_fn = argv[1]
    logger = logging.getLogger('wikt2dict')
    for wc in wikicodes:
        print wc
        try:
            wiktionary = Wiktionary(wc, cfg_fn)
            logger.info('%s Wiktionary object built', wiktionary.cfg['fullname'])
            wiktionary.parse_all_articles()
            logger.info('%s Wiktionary articles parsed', wiktionary.cfg['fullname'])
            wiktionary.write_pairs()
            logger.info('%s Wiktionary translations written to file', wiktionary.cfg['fullname'])
        except AttributeError:
            continue
        except Exception as e:
            print wc, str(e)
            continue
def main():
    if argv[2] == 'all' and len(argv) > 3:
        wc_f = open(argv[3])
        wikicodes = [wc.strip() for wc in wc_f]
    else:
        wikicodes = [wc.strip() for wc in argv[2:]]
    cfg_fn = argv[1]
    logger = logging.getLogger('wikt2dict')
    for wc in wikicodes:
        try:
            print 'Parsing ' + wc + 'wiktionary'
            wiktionary = None  #FIXME error handling workaround
            wiktionary = Wiktionary(wc, cfg_fn)
            logger.info('%s Wiktionary object built',
                        wiktionary.cfg['fullname'])
            wiktionary.parse_all_articles()
            logger.info('%s Wiktionary articles parsed',
                        wiktionary.cfg['fullname'])
            print '   Extracted {0} pairs'.format(
                len(wiktionary.article_parser.pairs))
            wiktionary.write_pairs()
            logger.info('%s Wiktionary translations written to file',
                        wiktionary.cfg['fullname'])
        except NotImplementedError as e:
            if wiktionary and wiktionary.cfg and wiktionary.cfg['fullname']:
                logger.error('%s Wiktionary unrecognized parser type',
                             wiktionary.cfg['fullname'])
            else:
                logger.error('%s Wiktionary unrecognized parser type', wc)
            continue
        except AttributeError as e:
            print e
            continue
        except Exception as e:
            print wc, str(e)
            continue
import pytest

from wiktionary import Wiktionary, Declensions

wiktionary = Wiktionary("tests/hestur.xml")
page = wiktionary.get_by_title("hestur")
entry = next(page.get_entries())


def test_database_discovers_templates():
    assert wiktionary.get_declension_template("kk sb 01") is not None


def test_entry_title():
    assert entry.name == "hestur"


def test_entry_declension_arguments():
    assert entry.declension_arguments[0] == "hest"
    assert entry.declension_arguments[1] == "ur"


def test_entry_part_of_speech():
    assert entry.category == "nafnorð"


def test_entry_part_of_speech():
    assert entry.part_of_speech == "Karlkynsnafnorð"


def test_entry_is_icelandic():
Example #5
0
import click

from database import db as sqldb
from wiktionary import Wiktionary, Declensions
from frequencies import Frequencies
from models import Form, Lemma, Translation, MODELS

wikitionary = Wiktionary("articles.xml")
frequencies = Frequencies("frequency.csv")
d = Declensions(wikitionary)

sqldb.drop_tables(MODELS)
sqldb.create_tables(MODELS)

known_failures = [
    'Mið-Afríkulýðveldið', 'mar', 'endurnýjanleg orka', 'Garðabær'
]

with open("failures.txt", "w") as out:
    failures = []
    count = 0

    with click.progressbar(wikitionary.pages, label="populating") as pages:
        for page in pages:
            for entry in page.get_entries():
                try:
                    if entry.name in known_failures:
                        continue

                    if not entry.is_icelandic:
                        continue
Example #6
0
from wiktionary import Wiktionary, Declensions

word = "matseðill"

db = Wiktionary("articles.xml")

d = Declensions(db)

page = db.get_by_title(word)
entries = list(page.get_entries())

for entry in entries:
    print(entry.to_dict())

declensions = d.get_declensions(word)