def create_index(data, path, desc, debug=True, push=False, use_other=True, alphabet=None, force_wiki_title=None): builder = ReversedIndexBuilder(data.keys(), 'ru', data, alphabet=alphabet) # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp): # print letter, builder.counts_1[letter] # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp): # print letter, builder.counts_2[letter] if debug: folders = path.split('/') subpath = PAGES_DIR for folder in folders: subpath = join(subpath, folder) if not exists(subpath): mkdir(subpath) file_path = join(PAGES_DIR, path) wiki_prefix = u"Участник:Vitalik" letters = alphabet if use_other: letters += '-' for letter in letters: title_letter = "-%s" % letter.lower() print title_letter content = builder.get(letter) # letter = ur(letter) if debug: debug_write(file_path, title_letter, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, title_letter) save_wiki_page(wiki_title, content, desc) # sleep(1) # break # exit() for letter, pages in builder.separate_pages.items(): for page in sorted(pages, key=lambda x: len(x)): title_page = "-%s" % page.lower()[::-1] print title_page if not use_other and page[0] == '-': continue content = builder.get(page) # page = ur(page) if debug: debug_write(file_path, title_page, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, title_page) save_wiki_page(wiki_title, content, desc)
def get_unknown_inflection_simple(): items = WordInflection.objects.filter(kind__in=[u'f ?', u'm ?', u'n ?']) words = sorted(items, key=lambda item: item.word[::-1]) report = dict() for word in words: print word.word valid_num = check_correct_inflection(word.word, word.gender) if valid_num < 0: continue key = "%s%d" % (word.gender, valid_num) report.setdefault(key, list()) report[key].append((word.word, word.content)) file_path = join(PAGES_DIR, u'Массовое редактирование (simple)') wiki_prefix = u"Участник:Vitalik/Массовое редактирование/Словоизменение/сущ/simple" # desc = u"Полное обновление данных" desc = u"Создание списков для массового редактирования" for key in report.keys(): print key items = report.get(key) content = gen_report_simple(items, key[0], key[1]) filename = "%s.txt" % (key) print filename debug_write(file_path, filename, content) # sys.exit() wiki_title = "%s/%s" % (wiki_prefix, key) save_wiki_page(wiki_title, content, desc)
def load_stubs(): articles = load_category(u'Категория:Викисловарь:Шаблоны_для_создания_статей') for article in articles: title = article.title() prefix = u'Шаблон:' if not title.startswith(prefix): continue title = title[len(prefix):] content = article.get() file_path = join(PAGES_DIR, u'Шаблоны-болванки') debug_write(file_path, title, content)
def report_headers(data, path, desc, debug=True, push=False): alphabet = u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ' # alphabet = None builder = HeadersBuilder(data.keys(), 'ru', data, max_words_on_page=500, alphabet=alphabet) # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp): # print letter, builder.counts_1[letter] # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp): # print letter, builder.counts_2[letter] if debug: folders = path.split('/') subpath = PAGES_DIR for folder in folders: subpath = join(subpath, folder) if not exists(subpath): mkdir(subpath) file_path = join(PAGES_DIR, path) wiki_prefix = u"Участник:Vitalik" # for letter in russian_letters() + ['_']: for letter in alphabet + '-': # for letter in ['-', u'А', u'Б', u'В', u'Г', ]: print letter content = builder.get(letter) # letter = ur(letter) debug_write(file_path, letter, content) if push: wiki_title = "%s/%s/%s" % (wiki_prefix, path, letter) save_wiki_page(wiki_title, content, desc) # sleep(1) # break # exit() #todo: делить на 3 буквенные статьи!!! for letter, pages in builder.separate_pages.items(): for page in pages: print dt(), page if page in [u'Пер', u'Про', u'Под', u'При', u'Раз', u'Рас', u'Пре', u'Пол', u'Кол', u'Мик', u'Кон', u'Ком']: print u'→ too big!' continue content = builder.get(page) # page = ur(page) debug_write(file_path, page, content) if push: wiki_title = "%s/%s/%s" % (wiki_prefix, path, page) save_wiki_page(wiki_title, content, desc)
def get_unknown_inflection_full(): items = WordInflection.objects.filter(kind__in=[u'f ?', u'm ?', u'n ?']) words = sorted(items, key=lambda item: item.word[::-1]) report = dict() for word in words: print word.word valid_num = check_correct_inflection(word.word, word.gender) if valid_num < 0: continue key = "%s%d" % (word.gender, valid_num) report.setdefault(key, list()) report[key].append((word.word, word.content)) file_path = join(PAGES_DIR, u'Массовое редактирование') wiki_prefix = u"Участник:Vitalik/Массовое редактирование/Словоизменение/сущ" desc = u"Полное обновление данных" for key in report.keys(): print key items = report.get(key) i = 1 page_count = len(items) / 100 + 1 for chunk in chunks(items, 100): content = gen_report_full(chunk, key[0], key[1], page_count) filename = "%s_%d.txt" % (key, i) print filename debug_write(file_path, filename, content) # sys.exit() wiki_title = "%s/%s/%s" % (wiki_prefix, key, i) changed = save_wiki_page(wiki_title, content, desc) if changed: db_title = u'сущ/%s/%s' % (key, i) item, created = WordInflectionMassEdit.objects.get_or_create( title=db_title) item.content = content item.save() i += 1
def create_index(words, path, desc, debug=True, push=False, use_other=True, alphabet=None, force_wiki_title=None, max_words_on_page=None, force_letters=None, header=None, force_wiki_prefix=None, wiki_save_only_total=False, need_div=True, words_details=None): if not alphabet: alphabet = russian_alphabet() builder = IndexBuilder(words, 'ru', alphabet=alphabet, max_words_on_page=max_words_on_page or 2000, header=header, need_div=need_div, words_details=words_details) # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp): # print letter, builder.counts_1[letter] # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp): # print letter, builder.counts_2[letter] if debug: folders = path.split('/') subpath = PAGES_DIR for folder in folders: subpath = join(subpath, folder) # print subpath if not exists(subpath): mkdir(subpath) file_path = join(PAGES_DIR, path) wiki_prefix = u"Участник:Vitalik" if force_wiki_prefix: wiki_prefix = force_wiki_prefix letters = force_letters or alphabet if use_other: # letters += ['-'] letters += '-' for letter in letters: # print letter content = builder.get(letter) # letter = ur(letter) if debug: debug_write(file_path, letter, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, letter) if not wiki_save_only_total: save_wiki_page(wiki_title, content, desc) # sleep(1) # break # exit() for letter, pages in builder.separate_pages.items(): for page in sorted(pages, key=lambda x: len(x)): # print page if not use_other and page[0] == '-': continue content = builder.get(page) # page = ur(page) if debug: debug_write(file_path, page, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, page) if not wiki_save_only_total: save_wiki_page(wiki_title, content, desc) # sleep(1) if push and header: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, u"Итого") content = u"""{{Алфавит|%s|lang=ru}} == Итого == Итого в индексе "%s": '''<onlyinclude>%s</onlyinclude>''' статей """ % (header, header, len(words)) save_wiki_page(wiki_title, content, u'Итого: %s статей' % len(words))