Beispiel #1
0
def load(chunk_len=200, debug=False):
    print source_slug
    print name_slug
    bulk = Bulk(source_slug, name_slug, chunk_len)
    lines = get_lines(slug)
    for line in lines:
        word, desc = line.split('#')
        word = prettify(word)
        desc = prettify(desc)
        if not check_word(word, debug):  # "-", "." and " "
            continue
        bulk.add(word, desc)
    bulk.process()
Beispiel #2
0
def load(chunk_len=200, debug=False):
    for source_slug, name_slug in slugs:
        print source_slug
        print name_slug
        bulk = Bulk(source_slug, name_slug, chunk_len)
        slug = "%s_%s" % (source_slug, name_slug)
        lines = get_lines(slug)
        for value in lines:
            value = prettify(value, remove_dot=False)
            if not check_word(value, debug):  # "-" and "."
                continue
            bulk.add(value)
        bulk.process()
Beispiel #3
0
def load(chunk_len=200, debug=False):
    print source_slug
    print name_slug
    bulk = Bulk(source_slug, name_slug, chunk_len)
    lines = get_lines(slug)
    for line in lines:
        word, desc = line.split('#', 1)
        # todo: не покрывает два случая:
        # - Господин# (ж. р. госпожа)#, владелец, ...
        # - Приходить# (прийти# ), прибыть, ...

        # todo: c запятыми можно отдельно повозиться:
        # - аутентичный (автентичный, отентичный)
        # - барон, баронет
        # - бросать деньги (за окно, на ветер)

        word = prettify(word).upper()
        desc = prettify(remove_begin(desc, [',', '||']).strip())
        if not check_word(word, debug):  # "-", "," and " "
            continue
        bulk.add(word, desc)
    bulk.process()