예제 #1
0
def create_index(data, path, desc, debug=True, push=False, use_other=True,
                 alphabet=None, force_wiki_title=None):
    builder = ReversedIndexBuilder(data.keys(), 'ru', data, alphabet=alphabet)

    # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_1[letter]
    # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_2[letter]

    if debug:
        folders = path.split('/')
        subpath = PAGES_DIR
        for folder in folders:
            subpath = join(subpath, folder)
            if not exists(subpath):
                mkdir(subpath)

    file_path = join(PAGES_DIR, path)

    wiki_prefix = u"Участник:Vitalik"

    letters = alphabet
    if use_other:
        letters += '-'

    for letter in letters:
        title_letter = "-%s" % letter.lower()
        print title_letter
        content = builder.get(letter)
        # letter = ur(letter)
        if debug:
            debug_write(file_path, title_letter, content)
        if push:
            if force_wiki_title:
                wiki_title = force_wiki_title
            else:
                wiki_title = "%s/%s" % (wiki_prefix, path)
            wiki_title = "%s/%s" % (wiki_title, title_letter)
            save_wiki_page(wiki_title, content, desc)
        # sleep(1)
        # break
    # exit()

    for letter, pages in builder.separate_pages.items():
        for page in sorted(pages, key=lambda x: len(x)):
            title_page = "-%s" % page.lower()[::-1]
            print title_page
            if not use_other and page[0] == '-':
                continue
            content = builder.get(page)
            # page = ur(page)
            if debug:
                debug_write(file_path, title_page, content)
            if push:
                if force_wiki_title:
                    wiki_title = force_wiki_title
                else:
                    wiki_title = "%s/%s" % (wiki_prefix, path)
                wiki_title = "%s/%s" % (wiki_title, title_page)
                save_wiki_page(wiki_title, content, desc)
def get_unknown_inflection_simple():
    items = WordInflection.objects.filter(kind__in=[u'f ?', u'm ?', u'n ?'])
    words = sorted(items, key=lambda item: item.word[::-1])
    report = dict()
    for word in words:
        print word.word
        valid_num = check_correct_inflection(word.word, word.gender)
        if valid_num < 0:
            continue
        key = "%s%d" % (word.gender, valid_num)
        report.setdefault(key, list())
        report[key].append((word.word, word.content))

    file_path = join(PAGES_DIR, u'Массовое редактирование (simple)')
    wiki_prefix = u"Участник:Vitalik/Массовое редактирование/Словоизменение/сущ/simple"
    # desc = u"Полное обновление данных"
    desc = u"Создание списков для массового редактирования"
    for key in report.keys():
        print key
        items = report.get(key)
        content = gen_report_simple(items, key[0], key[1])
        filename = "%s.txt" % (key)
        print filename
        debug_write(file_path, filename, content)
        # sys.exit()
        wiki_title = "%s/%s" % (wiki_prefix, key)
        save_wiki_page(wiki_title, content, desc)
예제 #3
0
파일: get_stubs.py 프로젝트: 2vitalik/words
def load_stubs():
    articles = load_category(u'Категория:Викисловарь:Шаблоны_для_создания_статей')
    for article in articles:
        title = article.title()
        prefix = u'Шаблон:'
        if not title.startswith(prefix):
            continue
        title = title[len(prefix):]
        content = article.get()
        file_path = join(PAGES_DIR, u'Шаблоны-болванки')
        debug_write(file_path, title, content)
예제 #4
0
def report_headers(data, path, desc, debug=True, push=False):
    alphabet = u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
    # alphabet = None
    builder = HeadersBuilder(data.keys(), 'ru', data, max_words_on_page=500,
                             alphabet=alphabet)

    # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_1[letter]
    # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_2[letter]

    if debug:
        folders = path.split('/')
        subpath = PAGES_DIR
        for folder in folders:
            subpath = join(subpath, folder)
            if not exists(subpath):
                mkdir(subpath)

    file_path = join(PAGES_DIR, path)

    wiki_prefix = u"Участник:Vitalik"

    # for letter in russian_letters() + ['_']:
    for letter in alphabet + '-':
    # for letter in ['-', u'А', u'Б', u'В', u'Г', ]:
        print letter
        content = builder.get(letter)
        # letter = ur(letter)
        debug_write(file_path, letter, content)
        if push:
            wiki_title = "%s/%s/%s" % (wiki_prefix, path, letter)
            save_wiki_page(wiki_title, content, desc)
        # sleep(1)
        # break
    # exit()

    #todo: делить на 3 буквенные статьи!!!
    for letter, pages in builder.separate_pages.items():
        for page in pages:
            print dt(), page
            if page in [u'Пер', u'Про', u'Под', u'При', u'Раз', u'Рас',
                        u'Пре', u'Пол', u'Кол', u'Мик', u'Кон', u'Ком']:
                print u'→ too big!'
                continue
            content = builder.get(page)
            # page = ur(page)
            debug_write(file_path, page, content)
            if push:
                wiki_title = "%s/%s/%s" % (wiki_prefix, path, page)
                save_wiki_page(wiki_title, content, desc)
def get_unknown_inflection_full():
    items = WordInflection.objects.filter(kind__in=[u'f ?', u'm ?', u'n ?'])
    words = sorted(items, key=lambda item: item.word[::-1])
    report = dict()
    for word in words:
        print word.word
        valid_num = check_correct_inflection(word.word, word.gender)
        if valid_num < 0:
            continue
        key = "%s%d" % (word.gender, valid_num)
        report.setdefault(key, list())
        report[key].append((word.word, word.content))

    file_path = join(PAGES_DIR, u'Массовое редактирование')
    wiki_prefix = u"Участник:Vitalik/Массовое редактирование/Словоизменение/сущ"
    desc = u"Полное обновление данных"
    for key in report.keys():
        print key
        items = report.get(key)
        i = 1
        page_count = len(items) / 100 + 1
        for chunk in chunks(items, 100):
            content = gen_report_full(chunk, key[0], key[1], page_count)
            filename = "%s_%d.txt" % (key, i)
            print filename
            debug_write(file_path, filename, content)
            # sys.exit()
            wiki_title = "%s/%s/%s" % (wiki_prefix, key, i)
            changed = save_wiki_page(wiki_title, content, desc)
            if changed:
                db_title = u'сущ/%s/%s' % (key, i)
                item, created = WordInflectionMassEdit.objects.get_or_create(
                    title=db_title)
                item.content = content
                item.save()
            i += 1
예제 #6
0
def create_index(words, path, desc, debug=True, push=False, use_other=True,
                 alphabet=None, force_wiki_title=None, max_words_on_page=None,
                 force_letters=None, header=None, force_wiki_prefix=None,
                 wiki_save_only_total=False, need_div=True, words_details=None):
    if not alphabet:
        alphabet = russian_alphabet()
    builder = IndexBuilder(words, 'ru', alphabet=alphabet,
                           max_words_on_page=max_words_on_page or 2000,
                           header=header, need_div=need_div, words_details=words_details)

    # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_1[letter]
    # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp):
    #     print letter, builder.counts_2[letter]

    if debug:
        folders = path.split('/')
        subpath = PAGES_DIR
        for folder in folders:
            subpath = join(subpath, folder)
            # print subpath
            if not exists(subpath):
                mkdir(subpath)

    file_path = join(PAGES_DIR, path)

    wiki_prefix = u"Участник:Vitalik"
    if force_wiki_prefix:
        wiki_prefix = force_wiki_prefix

    letters = force_letters or alphabet
    if use_other:
        # letters += ['-']
        letters += '-'

    for letter in letters:
        # print letter
        content = builder.get(letter)
        # letter = ur(letter)
        if debug:
            debug_write(file_path, letter, content)
        if push:
            if force_wiki_title:
                wiki_title = force_wiki_title
            else:
                wiki_title = "%s/%s" % (wiki_prefix, path)
            wiki_title = "%s/%s" % (wiki_title, letter)
            if not wiki_save_only_total:
                save_wiki_page(wiki_title, content, desc)
        # sleep(1)
        # break
    # exit()

    for letter, pages in builder.separate_pages.items():
        for page in sorted(pages, key=lambda x: len(x)):
            # print page
            if not use_other and page[0] == '-':
                continue
            content = builder.get(page)
            # page = ur(page)
            if debug:
                debug_write(file_path, page, content)
            if push:
                if force_wiki_title:
                    wiki_title = force_wiki_title
                else:
                    wiki_title = "%s/%s" % (wiki_prefix, path)
                wiki_title = "%s/%s" % (wiki_title, page)
                if not wiki_save_only_total:
                    save_wiki_page(wiki_title, content, desc)
            # sleep(1)

    if push and header:
        if force_wiki_title:
            wiki_title = force_wiki_title
        else:
            wiki_title = "%s/%s" % (wiki_prefix, path)
        wiki_title = "%s/%s" % (wiki_title, u"Итого")
        content = u"""{{Алфавит|%s|lang=ru}}

== Итого ==
Итого в индексе "%s": '''<onlyinclude>%s</onlyinclude>''' статей
""" % (header, header, len(words))
        save_wiki_page(wiki_title, content, u'Итого: %s статей' % len(words))