Beispiel #1
0
def convert_file(dev, file, _from, _to, out):
    dev_str = 'dev' if dev else 'prod'

    py_file = get_path(dev, 'py', file, out=out)
    lua_file = get_path(dev, 'lua', file, out=out)
    in_file = py_file if _from == 'py' else lua_file
    out_file = py_file if _to == 'py' else lua_file

    content = read(in_file)
    content = \
        content.replace(before[_from].replace('{dev}', dev_str).strip(), '')

    for pattern, replace in regexps_before[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_DOTALL[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_MULTILINE[_from]:
        pattern = pattern.replace('{dev}', dev_str)
        replace = replace.replace('{dev}', dev_str)
        content = re.sub(pattern, replace, content, flags=re.MULTILINE)

    for i in range(5):
        for pattern, replace in regexps_DOTALL[_from]:
            content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_after[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    content = \
        f"{before[_to].replace('{dev}', dev_str)}\n\n" \
        f"{content.strip()}\n".lstrip()
    write(out_file, content)
Beispiel #2
0
def generate_lists():
    started = datetime.now()
    for title, page in storage.iterate_pages(silent=True):
        if ' ' in title:  # пропускаем словосочетания
            continue
        if '{{гл ru' in page.ru.content:
            print(title)

            # вид: совершенный, несовершенный, неизвестный
            aspect = get_aspect(page)

            # ударение из шаблона {{по-слогам}}
            stress = get_stress(page)

            # группировка по окончаниям
            value = f"# [[{title}]] ({aspect}) {stress}\n"
            added = False
            for ending in endings:
                if title.endswith(ending):
                    endings[ending].append((title, value))
                    added = True
            if not added:
                endings['?'].append((title, value))

    print(datetime.now() - started)
    write('endings_ru_new.json', json.dumps(endings, indent=4))
Beispiel #3
0
def convert_file(filename, _from, _to):
    py_file = join(get_path('py'), f'{filename}.py')
    lua_file = join(get_path('lua'), f'{filename}.lua')
    in_file = py_file if _from == 'py' else lua_file
    out_file = py_file if _to == 'py' else lua_file

    content = read(in_file)
    content = content.replace(before[_from].strip(), '')

    for pattern, replace in regexps_before[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_DOTALL[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_MULTILINE[_from]:
        content = re.sub(pattern, replace, content, flags=re.MULTILINE)

    for i in range(5):
        for pattern, replace in regexps_DOTALL[_from]:
            content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_after[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    content = f"{before[_to]}\n\n{content.strip()}\n".lstrip()
    write(out_file, content)
Beispiel #4
0
def dump_stats():
    print(dtf('Ymd'))
    for lang in langs:
        print('Dumping language:', lang)
        url = f'https://{lang}.wiktionary.org/wiki/Special:Statistics'
        r = requests.get(url)
        write(html_path(lang), r.content.decode())
Beispiel #5
0
 def lock(self, lock_slug):
     lock_filename = self.lock_filename(lock_slug)
     if exists(lock_filename):
         raise StorageError(f"Can't lock: Storage is already locked: "
                            f'"{lock_filename}"')
     write(lock_filename, dt())
     self.locked = True
Beispiel #6
0
    def block_path(self, title):
        category, name = char_info(title[0])
        if category not in ['Ll', 'Lo', 'Lu', 'Pd']:
            name = 'OTHER'
        candidates = [
            (name, join(self.handler.path, name)),
        ]

        path = candidates[-1][1]
        for i in range(MAX_DEPTH):
            code = ord(title[i]) if i < len(title) else 0
            key = f'{code} - {hex(code)}'
            path = join(path, key)
            candidates.append((title[:i + 1], path))

        for prefix, candidate in candidates:
            for attempt in range(3):
                if not exists(candidate):
                    print(f"`write`: {candidate}")
                    write(candidate, self.default_empty(prefix))
                    return candidate
                if is_locked(candidate):
                    if attempt < 2:
                        post_to_slack(
                            'recent-errors',
                            f'Locked #{attempt}: {prefix}, {candidate}')
                        time.sleep(1)
                        continue
                    raise LockedError(candidate)
                if isfile(candidate):
                    return candidate

        raise BlockNotFound(f"Path does't exist for title '{title}'"
                            )  # fixme: never should happen?
Beispiel #7
0
 def wrapped(*args, **kwargs):
     lock_file = join(conf.root_path, 'sys', 'lock', slug)
     if exists(lock_file):
         print(dt(), f'Already locked: `{slug}`')
         return
     write(lock_file, '')
     try:
         return func(*args, **kwargs)
     finally:
         os.remove(lock_file)
Beispiel #8
0
def download_module(title, path):
    print(f'- {title}', end='')
    content = load_page(title) + '\n'
    content = content.replace("\n-- dev_prefix = 'User:Vitalik/'",
                              "\ndev_prefix = 'User:Vitalik/'")
    outs = ['', '.out'] if not debug else [
        '',
    ]
    for out in outs:
        write(path.replace('[.out]', out), content)
    print(' - OK')
Beispiel #9
0
 def lock(self, lock_slug):
     lock_filename = self.lock_filename(lock_slug)
     if exists(lock_filename):
         storages = {
             '/home/vitalik/storages/wiktionary/storage'
             '/authors/sys/lock_recent':
             '*authors*',
         }
         storage_name = storages.get(lock_filename, lock_filename)
         post_to_slack(
             'recent-errors', f':lock: `{Logger.slug}` '
             f'Storage is locked: {storage_name}')
         raise StorageAlreadyLocked(
             f'Can\'t lock: Storage is already locked: "{lock_filename}"')
     write(lock_filename, dt())
     self.locked = True
Beispiel #10
0
def download_page(title, path):
    print(f'- {title}', end='')
    try:
        content = load_page(title) + '\n'
    except NoPage:
        print(' - No page')
        return
    content = content.replace("\n-- dev_prefix = 'User:Vitalik/'",
                              "\ndev_prefix = 'User:Vitalik/'")
    if exists(path):
        old_content = read(path)
        if old_content != content:
            print(' - OK')
        else:
            write(path, content)
            print(' - Not changed')
    else:
        write(path, content)
        print(' - NEW')
Beispiel #11
0
def update_langs():
    all_articles = defaultdict(list)
    without_redirects = defaultdict(list)
    # for title, page in storage.iterate_pages_with_info(silent=True):
    for title, page in storage.iterate_pages(silent=True):
        for lang in page.languages.keys:
            all_articles[lang].append(title)
            # if not page.is_redirect:
            if title not in storage.redirects_set:
                without_redirects[lang].append(title)

    path = join(conf.PARSED_STORAGE_PATH, 'lists', 'langs')
    for lang, titles in all_articles.items():
        write(f'{path}/{lang or "-"}.txt', '\n'.join(titles))

    path = join(conf.PARSED_STORAGE_PATH, 'lists', 'langs', 'articles')
    for lang, titles in without_redirects.items():
        write(f'{path}/{lang or "-"}.txt', '\n'.join(titles))

    print('ok')
Beispiel #12
0
def convert_dir(dev, _from, _to):
    if not compare_dir(dev, _to):
        print(f'Ошибка: папки `{_to}` не синхронизированы до конвертации.')
        return

    for file in declension_files:
        # Прямое преобразование:
        convert_file(dev, file, _from, _to, out=False)

        # Копирование результата в `ru.out`:
        in_file = get_path(dev, _to, file, out=False)
        out_file = get_path(dev, _to, file, out=True)
        write(out_file, '')  # чтобы создать папки, если их нет
        copy(in_file, out_file)

        # Обратное преобразование для .out:
        convert_file(dev, file, _to, _from, out=True)

    if not compare_dir(dev, _from):
        print(
            f'Ошибка: папки `{_from}` не синхронизированы после конвертации.')
        return
Beispiel #13
0
 def save(self):
     copy(self.path, f'{self.path}.bak')
     write(self.path, SEPARATOR.join(self.contents))
     super(ContentsBlockHandler, self).save()
Beispiel #14
0
 def latest_updated(self, value):
     write(self.latest_updated_filename, dt(value, utc=True))
Beispiel #15
0
 def save(self):
     copy(self.path, f'{self.path}.bak')
     write(self.path, '\n'.join(self.contents))
     super(SimpleBlockHandler, self).save()
Beispiel #16
0
 def set(cls, value):
     write(cls.filename, value.strftime('%Y-%m-%d %H:%M:%S'))
Beispiel #17
0
def sync_save(title, content):
    path = sync_path(title)
    write(path, content)
Beispiel #18
0
 def save_data(self, path, prefix, titles):
     titles.sort()
     titles_str = '\n'.join(titles)
     contents = [f"Prefix: {prefix}\n{titles_str}"]
     contents += [self.data(title) for title in titles]
     write(path, SEPARATOR.join(contents))
Beispiel #19
0
 def save_data(self, path, prefix, titles):
     lines = [f'{title}\t{self.data(title)}' for title in sorted(titles)]
     write(path, '\n'.join(lines))
Beispiel #20
0
 def all_pages_start_from(self, value):
     write(self.all_pages_start_from_filename, value)
Beispiel #21
0
 def save_articles(self, articles):
     write(self.articles_filename, '\n'.join(articles))
Beispiel #22
0
 def save_redirects(self, redirects):
     write(self.redirects_filename, '\n'.join(redirects))
 def save(cls):
     content = cls.sep.join(cls.data)
     write(cls.history_filename, content)
     write(cls.active_filename, content)
Beispiel #24
0
 def create_fs(self):
     self.create_dir(self.path, self.structure, level=1)
     write(join(self.path, '_sys', 'max_count'), str(self.max_count))