def _reply_buttons(self): buttons = [] if len(self.lang_keys) > 1: values = [ (f'{value}' if value != self.lang_key else f'• {value}', f'{self.title}|{value}|0') for value in self.lang_keys ] for chunk in chunks(values, 4): buttons.append([ telegram.InlineKeyboardButton(text, callback_data=data) for text, data in chunk ]) if self.homonyms_count > 1: values = [ (f'{i+1}' if i != self.homonym_index else f'• {i+1}', f'{self.title}|{self.lang_key}|{i}') for i in range(self.homonyms_count) ] for chunk in chunks(values, 6): buttons.append([ telegram.InlineKeyboardButton(text, callback_data=data) for text, data in chunk ]) if len(self.lang_keys) == 1 and self.homonyms_count == 1: buttons.append([ telegram.InlineKeyboardButton( 'Обновить', callback_data=f'{self.title}|{self.lang_keys[0]}|0' ) ]) return telegram.InlineKeyboardMarkup(buttons)
def _parse(self): parts = self.parse_pattern.split(self.content) if len(parts) == 1: self._top = '' self._sub_sections = { '': self.child_section_type(self, '', '', parts[0], self.silent), } return self._top = parts.pop(0) self._sub_sections = dict() if self.copy_top_to_sub_sections: self._sub_sections[''] = \ self.child_section_type(self, '', '', self._top, self.silent) for full_header, header, content in chunks(parts, 3): if header in self._sub_sections and not self.silent: raise Exception(f'Duplicated section `{header}` on the page ' f'"{self.title}"') child_section = \ self.child_section_type(self, full_header, header, content, self.silent) key = child_section.key if key in self._sub_sections and not self.silent: raise Exception(f'Duplicated header key `{key}` on the page ' f'"{self.title}"') self._sub_sections[key] = child_section
def check_page(self, page) -> list: values = [] parts = TR.lang_header.split(self.content) headers = set() if len(parts) == 1: return [] # наверное, редирект parts.pop(0) for full_header, header, content in chunks(parts, 3): if header in headers: values.append(header) headers.add(header) return values
def save_pages(): endings = json.loads(read('endings_ru_new.json')) for ending, entries in endings.items(): entries.sort(key=lambda x: x[0][::-1]) values = [entry[1] for entry in entries] for i, chunk in enumerate(chunks(values, 3700)): # content = f"= Глаголы на '''-{ending}''' =\n" \ # f'<div class="reverseindex">\n' + \ # f''.join(chunk) + \ # f'</div>' content = f"= Глаголы на '''-{ending}''' =\n" + f''.join(chunk) save_page(f'User:Vitalik/reports/verbs/-{ending}/{i+1}', content, 'Обратный список глаголов по окончаниям') print(ending, len(chunk))
def check_page(self, page) -> list: values = [] for lang, language_obj in page.languages.last_dict( unique=True).items(): # print(page.title, lang, language_obj) parts = R.second_header.split(language_obj.content) if len(parts) == 1: continue # наверное, редирект headers = set() # print(parts) parts.pop(0) for full_header, header, content in chunks(parts, 3): if header in headers: values.append((lang, header)) headers.add(header) return values
def group_entries(self): grouped = defaultdict(list) for title, details in self.entries.items(): aspect, stress, impersonal = details # unpack if impersonal: res_key = '(безличные)' elif '-' in title: res_key = '(через дефис)' else: res_key = '-???' for key in self.keys: if key.startswith('-') and title.endswith(key[1:]): res_key = key break grouped[res_key].append(title) for key, entries in grouped.copy().items(): max_size = 2000 # todo: implement хитрый алгоритм, разделения на подразделы по удобным разделяющим буквам... if len(entries) > max_size: for i, chunk in enumerate(chunks(entries, max_size)): new_key = f'{key} @{i+1}' grouped[new_key] = chunk del grouped[key] def sort_key(pair): check_key, value = pair if check_key in self.keys: return self.keys.index(check_key), 0 m = re.search('(.*) @(\d+)$', check_key) if m: check_key, page = m.groups() if check_key in self.keys: return self.keys.index(check_key), page return float('inf'), check_key return dict(sorted(grouped.items(), key=sort_key))