Python parse Examples

Programming Language: Python

Namespace/Package Name: tools.parsers.utils

Method/Function: parse

Examples at hotexamples.com: 4

Python parse - 4 examples found. These are the top rated real world Python examples of tools.parsers.utils.parse extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def parse_fb2(file_name: Path, get_transitions_func: Callable = None, coin_flip: List[str] = None):
    print(file_name)

    root = parse(file_name.read_bytes())
    book = parse_book_info(root)

    # print('book:', book)
    print('title:', book.title)
    print('author:', book.author)
    print('annotation:', repr(book.annotation))
    print('coverpage_id:', book.coverpage_id)
    print('sequence_name:', book.sequence_name)
    print('sequence_num:', book.sequence_num)
    print('publisher:', book.publisher)
    print('images:', list(book.images))

    dir_book = DIR_DUMP_BOOKS / file_name.name

    for section in root.select('body > section'):
        section_id = clear_number(section.select_one('title > p'))
        if not section_id:
            section_id = "0"

        # Удаление тегов-заголовков с номерами страниц
        for title in section.select('title'):
            title.decompose()

        tags = section.find_all(recursive=False)

        if get_transitions_func:
            transitions = get_transitions_func(tags)
        else:
            transitions = get_transitions(tags)

        images = get_images(tags)

        preprocess_tags(tags)
        section_tag = parse(''.join(map(str, tags)))
        html_section = get_section_text(section_tag, None if section_id == '0' else section_id)

        if coin_flip:
            is_coin_flip = section in coin_flip
        else:
            is_coin_flip = False

        book.add_section(
            id=section_id,
            text=html_section,
            transitions=transitions,
            images=images,
            coin_flip=is_coin_flip,
        )

    print('sections:', len(book.sections))

    book.save(dir_book)

Example #2

Show file

from tools.parsers.book import parse_book_info
from tools.parsers.utils import (
    parse, get_section_text, preprocess_tags, get_transitions, clear_number, get_images, DIR
)


# Страницы, в которых нужно монету подбрасывать
COIN_FLIP = [
    '52'
]


file_name = DIR / 'Ужастики-2' / 'Stayn_Beregis-Lilovoy-Pasty-_RuLit_Me.fb2'
print(file_name)

root = parse(file_name.read_bytes())
book = parse_book_info(root)

# print('book:', book)
print('title:', book.title)
print('author:', book.author)
print('annotation:', repr(book.annotation))
print('coverpage_id:', book.coverpage_id)
print('sequence_name:', book.sequence_name)
print('sequence_num:', book.sequence_num)
print('publisher:', book.publisher)
print('images:', list(book.images))

dir_book = DIR_DUMP_BOOKS / file_name.name

section_by_tags = defaultdict(list)

Example #3

Show file

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

from pathlib import Path
from tools.parsers.utils import parse

for file_name in Path('Ужастики-2').glob('*.fb2'):
    root = parse(file_name.read_bytes())

    title_info_tag = root.select_one('description > title-info')

    sequence_tag = title_info_tag.select_one('sequence')
    sequence_name = sequence_tag.get('name') if sequence_tag else None
    try:
        sequence_num = sequence_tag.get('number') if sequence_tag else None
        sequence_num = int(sequence_num)
    except:
        sequence_num = None

    print('{:4}'.format(str(sequence_num)), file_name)

Example #4

Show file

File: main__chelovekoszhimalki_RuLit_Net_264788_fb2.py Project: gil9red/telegram_bot__gamebook

def parse_fb2(file_name: Path, coin_flip: List[str]):
    print(file_name)

    root = parse(file_name.read_bytes())
    book = parse_book_info(root)

    # print('book:', book)
    print('title:', book.title)
    print('author:', book.author)
    print('annotation:', repr(book.annotation))
    print('coverpage_id:', book.coverpage_id)
    print('sequence_name:', book.sequence_name)
    print('sequence_num:', book.sequence_num)
    print('publisher:', book.publisher)
    print('images:', list(book.images))

    dir_book = DIR_DUMP_BOOKS / file_name.name

    section_by_tags = defaultdict(list)
    tags = None

    for tag in root.select_one('body > section').children:
        # If tag is not NavigableString
        if not tag.name:
            continue

        if is_start_section(tag):
            section = get_plaintext(tag)

            tags = []
            section_by_tags[section] = tags
            continue

        if tags is not None:
            tags.append(tag)

    end_number = 0
    link_to_section = 0

    for section, tags in section_by_tags.items():
        transitions = get_transitions(tags)
        images = get_images(tags)

        if not transitions:
            end_number += 1

        link_to_section += len(transitions)

        preprocess_tags(tags)
        section_tag = parse(''.join(map(str, tags)))
        html_section = get_section_text(section_tag, section)

        book.add_section(id=section,
                         text=html_section,
                         transitions=transitions,
                         images=images,
                         coin_flip=section in coin_flip)

    print('sections:', len(book.sections))

    book.save(dir_book)