Python html_to_nodes Examples, telegraph.utils.html_to_nodes Python Examples

Example #1

0

Show file

File: test_html_converter.py Project: python273/telegraph

 def test_html_to_nodes_multi_line(self):
     self.assertEqual(
         html_to_nodes(HTML_MULTI_LINES),
         HTML_MULTI_LINES_NODES_LIST
     )
     self.assertEqual(
         html_to_nodes(HTML_MULTI_LINES1),
         HTML_MULTI_LINES_NODES_LIST
     )

Example #2

0

Show file

File: servant_handlers.py Project: l0rem/XakepTgPoster

def new_post(client, message):
    post = message.text
    link = re.findall(r'(https?://\S+)', post)[-1]

    parsed = requests.get(link).text

    page = BeautifulSoup(parsed, features="html.parser")
    try:
        title = page.find('div', class_='bdaia-post-title').find('h1').next
    except AttributeError:
        title = page.find('h1', class_='post-title')
    content = page.find('div', class_='bdaia-post-content')
    image_url = re.findall(
        r'(https?://\S+)',
        page.find('a',
                  class_='bdaia-featured-img-cover')['style'])[-1].replace(
                      ');', '')
    response = requests.get(image_url, stream=True)
    with open('img.png', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)

    cover = tupload.upload_file('img.png')[0]
    cover_url = 'http://telegra.ph' + cover
    text = '<img src=\"{}\"></img>'.format(cover_url)

    for elem in content:
        if elem.name == 'div' or elem.name == 'style' or 'script' in str(elem):
            pass
        elif elem.name == 'blockquote':
            if '<a>' in str(elem):
                quote = ''
                for p_ in elem:
                    quote += str(p_)
                twitter_link = elem.find_all('a')[-1]['href']
                text += '<p><blockquote>' + quote + '</blockquote></p>' + '<p><a href=\"{}\">Link to tweet</a></p>'.format(
                    twitter_link)
            else:
                text += str(elem)
        elif 'iframe' in str(elem):
            url = elem.find('iframe')['src']
            text += '<p><a href=\"{}\">Link to video</a></p>'.format(url)
        elif elem.name == 'h2':
            text += '<p><b>{}</b></p>'.format(elem.next)
        else:
            text += str(elem)

    nodes = tutils.html_to_nodes(text)

    tg_page = telegraph.create_page(content=nodes,
                                    title=title,
                                    author_name='@Lor3m',
                                    author_url='https://t.me/Lor3m')
    created_page = tg_page['url']

    bot.send_message(
        -1001347924999,
        '{}\n\n{}'.format(title, created_page),
        parse_mode=ParseMode.HTML,
    )

Example #3

0

Show file

File: test_html_converter.py Project: python273/telegraph

    def test_clear_whitespace(self):
        i = (
            '\n<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i>'
            ' D </b> E </p><p> F </p>\n'
        )
        expected = [
            {'tag': 'p', 'children': [
                {'tag': 'i', 'children': ['A']},
                {'tag': 'b', 'children': [' ']},
                {'tag': 'b', 'children': [
                    'B ',
                    {'tag': 'i', 'children': ['C']},
                    {'tag': 'i', 'children': [{'tag': 'b'}]},
                    ' D '
                ]},
                'E '
            ]},
            {'tag': 'p', 'children': ['F ']}
        ]

        self.assertEqual(html_to_nodes(i), expected)

Example #4

0

Show file

File: test_html_converter.py Project: weakish/telegraph

 def test_html_to_nodes_not_allowed_tag(self):
     with self.assertRaises(NotAllowedTag):
         html_to_nodes('<script src="localhost"></script>')

Example #5

0

Show file

File: test_html_converter.py Project: weakish/telegraph

 def test_html_to_nodes_invalid_html(self):
     with self.assertRaises(InvalidHTML):
         html_to_nodes('<p><b></p></b>')

Example #6

0

Show file

File: test_html_converter.py Project: weakish/telegraph

 def test_html_to_nodes(self):
     self.assertEqual(html_to_nodes(HTML_TEST_STR), NODES_TEST_LIST)

Example #7

0

Show file

File: test_html_converter.py Project: weakish/telegraph

 def test_no_starttag_node(self):
     with self.assertRaises(InvalidHTML):
         html_to_nodes(HTML_NO_STARTTAG)

Example #8

0

Show file

File: telegraph.py Project: Dank-del/MultiUserbot

def telegraph(c: Client, msg: Message):
    if len(msg.command) > 1:
        targetmsg = msg
        text = (targetmsg.text[len("/telegraph"):]
                or targetmsg.caption[len("/telegraph"):])
        author = targetmsg.from_user.first_name
        title = msg.chat.username or msg.chat.title or msg.chat.first_name
    elif msg.reply_to_message:
        targetmsg = msg.reply_to_message
        text = targetmsg.text
        author = targetmsg.from_user.first_name
        title = msg.chat.username or msg.chat.title or msg.chat.first_name
    else:
        msg.edit_text(
            "Please reply to a message or specify the text with <code>/telegraph Some Text Here</code>"
        )
        return 1

    if not text:
        msg.edit_text(
            f"{Emoji.NEWSPAPER} Telegraph\n"
            f"\n"
            f"{Emoji.CROSS_MARK} <b>Error:</b> <code>Invalid message</code>")

    nodes = utils.html_to_nodes(text.replace("'", "\\u0027"))

    if len(nodes) == 0:
        msg.edit_text(
            f"{Emoji.NEWSPAPER} Telegraph\n"
            f"\n"
            f"{Emoji.CROSS_MARK} <b>Error:</b> <code>Invalid text!</code>")

    content = ("[" + "".join([
        '{{"tag": "p", "children": [{}]}},'.format(
            i if isinstance(i, str) else f'"{i}"') for i in nodes[:-1]
    ]) + '{{"tag": "p", "children": [{}]}}'.format(
        nodes[-1] if isinstance(nodes[-1], str) != str else f'"{nodes[-1]}"') +
               "]")

    files = {
        "Data": (
            "content.html",
            io.BytesIO(content.replace("'", '"').encode()),
            "plain/text",
        )
    }
    data = {
        "title": title,
        "author": author,
        "author_url": "https://github.com/GodSaveTheDoge",
        "save_hash": "",
        "page_id": "0",
    }

    r = requests.post("https://edit.telegra.ph/save",
                      files=files,
                      data=data,
                      headers=HEADERS).json()

    if "error" in r.keys():
        msg.edit_text(
            f"{Emoji.NEWSPAPER} Telegraph\n"
            f"\n"
            f"{Emoji.CROSS_MARK} <b>Error:</b> <code>{r['error']}</code>")
        return 1

    msg.edit_text(
        f"{Emoji.NEWSPAPER} Telegraph\n"
        f"\n"
        f"{Emoji.PAGE_WITH_CURL} <b>Title:</b> <code>{title}</code>\n"
        f"{Emoji.LINK} <b>Link:</b> https://telegra.ph/{r['path']}\n"
        f"{Emoji.PEN} <b>Author:</b> <code>{author}</code>",
        parse_mode="html",
    )

Example #9

0

Show file

File: test_html_converter.py Project: python273/telegraph

 def test_uppercase_tags(self):
     self.assertEqual(
         html_to_nodes("<P>Hello</P>"),
         [{'tag': 'p', 'children': ['Hello']}]
     )

Example #10

0

Show file

File: test_html_converter.py Project: python273/telegraph

 def test_pre_whitespace_preserved(self):
     self.assertEqual(
         html_to_nodes("<pre>\nhello\nworld</pre>"),
         [{'tag': 'pre', 'children': ['\nhello\nworld']}]
     )

Example #11

0

Show file

File: test_html_converter.py Project: python273/telegraph

 def test_clear_whitespace_1(self):
     x = '\n<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i> D </b> E </p><p> F </p>\n'
     y = '<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i> D </b>E </p><p>F </p>'
     self.assertEqual(nodes_to_html(html_to_nodes(x)), y)