def convert_file(dev, file, _from, _to, out): dev_str = 'dev' if dev else 'prod' py_file = get_path(dev, 'py', file, out=out) lua_file = get_path(dev, 'lua', file, out=out) in_file = py_file if _from == 'py' else lua_file out_file = py_file if _to == 'py' else lua_file content = read(in_file) content = \ content.replace(before[_from].replace('{dev}', dev_str).strip(), '') for pattern, replace in regexps_before[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_DOTALL[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_MULTILINE[_from]: pattern = pattern.replace('{dev}', dev_str) replace = replace.replace('{dev}', dev_str) content = re.sub(pattern, replace, content, flags=re.MULTILINE) for i in range(5): for pattern, replace in regexps_DOTALL[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_after[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) content = \ f"{before[_to].replace('{dev}', dev_str)}\n\n" \ f"{content.strip()}\n".lstrip() write(out_file, content)
def convert_file(filename, _from, _to): py_file = join(get_path('py'), f'{filename}.py') lua_file = join(get_path('lua'), f'{filename}.lua') in_file = py_file if _from == 'py' else lua_file out_file = py_file if _to == 'py' else lua_file content = read(in_file) content = content.replace(before[_from].strip(), '') for pattern, replace in regexps_before[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_DOTALL[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_MULTILINE[_from]: content = re.sub(pattern, replace, content, flags=re.MULTILINE) for i in range(5): for pattern, replace in regexps_DOTALL[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) for pattern, replace in regexps_after[_from]: content = re.sub(pattern, replace, content, flags=re.DOTALL) content = f"{before[_to]}\n\n{content.strip()}\n".lstrip() write(out_file, content)
def get_contents_and_title(self): block_content = read(self.path) if block_content: self.contents = block_content.split('\n') else: # это тоже может быть, например, когда в файлы была единственная # статья, котороая потом была удалена self.contents = [] self.titles = [line.split('\t')[0] for line in self.contents]
def __init__(self, path): # print(path) self.path = path self.contents = None # should be set in inheritors self.titles = None # should be set in inheritors self.block_content = read(self.path) if not self.block_content: raise StorageError(f'Block is empty: "{self.path}"')
def get_contents_and_title(self): block_content = read(self.path) if not block_content: if exists(f'{self.path}.bak'): raise StorageError(f'Block is empty: "{self.path}"') if block_content: self.contents = block_content.split('\n') else: self.contents = [] self.titles = [line.split('\t')[0] for line in self.contents]
def save_pages(): endings = json.loads(read('endings_ru_new.json')) for ending, entries in endings.items(): entries.sort(key=lambda x: x[0][::-1]) values = [entry[1] for entry in entries] for i, chunk in enumerate(chunks(values, 3700)): # content = f"= Глаголы на '''-{ending}''' =\n" \ # f'<div class="reverseindex">\n' + \ # f''.join(chunk) + \ # f'</div>' content = f"= Глаголы на '''-{ending}''' =\n" + f''.join(chunk) save_page(f'User:Vitalik/reports/verbs/-{ending}/{i+1}', content, 'Обратный список глаголов по окончаниям') print(ending, len(chunk))
def download_page(title, path): print(f'- {title}', end='') try: content = load_page(title) + '\n' except NoPage: print(' - No page') return content = content.replace("\n-- dev_prefix = 'User:Vitalik/'", "\ndev_prefix = 'User:Vitalik/'") if exists(path): old_content = read(path) if old_content != content: print(' - OK') else: write(path, content) print(' - Not changed') else: write(path, content) print(' - NEW')
def get_stats(): new_data = {} for lang in langs: html_content = read(html_path(lang)) # table = re.search( # '<table class="wikitable mw-statistics-table">(.*?)</table>', # html_content, # re.DOTALL, # ).group(1) # write(f'table/{lang}.html', table) numbers = re.findall('<td class="mw-statistics-numbers">(.*?)</td>', html_content) # write(f'numbers/{lang}.txt', '\n'.join(numbers)) # print(':', ' | '.join(numbers)) # if len(numbers) != 20: # print(lang, len(numbers)) values = [] for index in [0, 1, 3, 5, 6]: value = re.sub('[^0-9]', '', numbers[index]) if value: values.append(value) if values: # write(f'values/{lang}.txt', '\n'.join(values)) new_data[lang] = { 'goods': int(values[0]), 'pages': int(values[1]), 'edits': int(values[2]), 'users': int(values[3]), 'active': int(values[4]), } # else: # print(lang) history_filename = join(conf.data_path, 'daily_stats', 'history', 'values', f'{dtf("Ym/Ymd")}.json') active_filename = join(conf.data_path, 'daily_stats', 'active.json') old_data = json_load(active_filename) json_dump(history_filename, new_data) json_dump(active_filename, new_data) return old_data, new_data
def __init__(self, path, tables, max_counts=None, lock_slug=''): """ Если lock_slug == '', то хранилище не блокируется """ self.path = path self.lock_slug = lock_slug self.locked = False if lock_slug: self.lock(lock_slug) self.handlers = {} for table, handler_type in tables.items(): table_path = join(path, table) max_count_path = join(table_path, '_sys', 'max_count') if exists(max_count_path): max_count = int(read(max_count_path)) elif max_counts and table in max_counts: max_count = max_counts[table] else: raise Exception('Undefined `max_count` for storage') self.handlers[table] = \ self.handler_types[handler_type](table_path, max_count) self._titles = None self._titles_set = None
def load_languages(): # todo: move this to some `lib` path = join(conf.SYNC_PATH, 'data', 'language-data.lua') content = read(path) languages = dict() """ m["en"] = { name = "Английск{ий} [язык]", ... } """ entries = re.findall('m\["([^"]+)"\] *= *{\s*name *= *"([^"]+)"', content) for lang, name in entries: name = re.sub(r'\[[^]]+\]', '', name) # removes: [язык] name = re.sub(r'[{}]', '', name) # removes: {ий} name = re.sub(r"'[^']+'", '', name) # removes: '...' languages[lang] = name.strip().lower() """ m["rus"] = table.copy(m["ru"]) """ entries = re.findall('m\["([^"]+)"\] *= *table\.copy\(m\["([^"]+)"\]\)"', content) for lang1, lang2 in entries: languages[lang1] = languages[lang2] return languages
def load_redirects(self): return read(self.redirects_filename).split('\n')
def latest_updated(self): if not exists(self.latest_updated_filename): return None return dtp(read(self.latest_updated_filename))
def all_pages_start_from(self): if not exists(self.all_pages_start_from_filename): return None return read(self.all_pages_start_from_filename)
def get(cls): return convert_date( datetime.strptime(read(cls.filename).strip(), '%Y-%m-%d %H:%M:%S'))
def get_contents_and_title(self): block_content = read(self.path) if not block_content: raise StorageError(f'Block is empty: "{self.path}"') self.contents = block_content.split(SEPARATOR) self.titles = self.contents[0].split('\n')
def load_articles(self): return read(self.articles_filename).split('\n')
def load(cls): return read(cls.active_filename).split(cls.sep)
def read_file(dev, filename): result = read(filename).replace('\r', '') if not dev: result = result.replace("\ndev_prefix = 'User:Vitalik/'", "\n-- dev_prefix = 'User:Vitalik/'") return result
def get(self, request, *args, **kwargs): root, path, full_path = self.get_path(kwargs) content = read(full_path) return HttpResponse(f'<pre>{content}</pre>')