def test_html_to_nodes_multi_line(self): self.assertEqual( html_to_nodes(HTML_MULTI_LINES), HTML_MULTI_LINES_NODES_LIST ) self.assertEqual( html_to_nodes(HTML_MULTI_LINES1), HTML_MULTI_LINES_NODES_LIST )
def new_post(client, message): post = message.text link = re.findall(r'(https?://\S+)', post)[-1] parsed = requests.get(link).text page = BeautifulSoup(parsed, features="html.parser") try: title = page.find('div', class_='bdaia-post-title').find('h1').next except AttributeError: title = page.find('h1', class_='post-title') content = page.find('div', class_='bdaia-post-content') image_url = re.findall( r'(https?://\S+)', page.find('a', class_='bdaia-featured-img-cover')['style'])[-1].replace( ');', '') response = requests.get(image_url, stream=True) with open('img.png', 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) cover = tupload.upload_file('img.png')[0] cover_url = 'http://telegra.ph' + cover text = '<img src=\"{}\"></img>'.format(cover_url) for elem in content: if elem.name == 'div' or elem.name == 'style' or 'script' in str(elem): pass elif elem.name == 'blockquote': if '<a>' in str(elem): quote = '' for p_ in elem: quote += str(p_) twitter_link = elem.find_all('a')[-1]['href'] text += '<p><blockquote>' + quote + '</blockquote></p>' + '<p><a href=\"{}\">Link to tweet</a></p>'.format( twitter_link) else: text += str(elem) elif 'iframe' in str(elem): url = elem.find('iframe')['src'] text += '<p><a href=\"{}\">Link to video</a></p>'.format(url) elif elem.name == 'h2': text += '<p><b>{}</b></p>'.format(elem.next) else: text += str(elem) nodes = tutils.html_to_nodes(text) tg_page = telegraph.create_page(content=nodes, title=title, author_name='@Lor3m', author_url='https://t.me/Lor3m') created_page = tg_page['url'] bot.send_message( -1001347924999, '{}\n\n{}'.format(title, created_page), parse_mode=ParseMode.HTML, )
def test_clear_whitespace(self): i = ( '\n<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i>' ' D </b> E </p><p> F </p>\n' ) expected = [ {'tag': 'p', 'children': [ {'tag': 'i', 'children': ['A']}, {'tag': 'b', 'children': [' ']}, {'tag': 'b', 'children': [ 'B ', {'tag': 'i', 'children': ['C']}, {'tag': 'i', 'children': [{'tag': 'b'}]}, ' D ' ]}, 'E ' ]}, {'tag': 'p', 'children': ['F ']} ] self.assertEqual(html_to_nodes(i), expected)
def test_html_to_nodes_not_allowed_tag(self): with self.assertRaises(NotAllowedTag): html_to_nodes('<script src="localhost"></script>')
def test_html_to_nodes_invalid_html(self): with self.assertRaises(InvalidHTML): html_to_nodes('<p><b></p></b>')
def test_html_to_nodes(self): self.assertEqual(html_to_nodes(HTML_TEST_STR), NODES_TEST_LIST)
def test_no_starttag_node(self): with self.assertRaises(InvalidHTML): html_to_nodes(HTML_NO_STARTTAG)
def telegraph(c: Client, msg: Message): if len(msg.command) > 1: targetmsg = msg text = (targetmsg.text[len("/telegraph"):] or targetmsg.caption[len("/telegraph"):]) author = targetmsg.from_user.first_name title = msg.chat.username or msg.chat.title or msg.chat.first_name elif msg.reply_to_message: targetmsg = msg.reply_to_message text = targetmsg.text author = targetmsg.from_user.first_name title = msg.chat.username or msg.chat.title or msg.chat.first_name else: msg.edit_text( "Please reply to a message or specify the text with <code>/telegraph Some Text Here</code>" ) return 1 if not text: msg.edit_text( f"{Emoji.NEWSPAPER} Telegraph\n" f"\n" f"{Emoji.CROSS_MARK} <b>Error:</b> <code>Invalid message</code>") nodes = utils.html_to_nodes(text.replace("'", "\\u0027")) if len(nodes) == 0: msg.edit_text( f"{Emoji.NEWSPAPER} Telegraph\n" f"\n" f"{Emoji.CROSS_MARK} <b>Error:</b> <code>Invalid text!</code>") content = ("[" + "".join([ '{{"tag": "p", "children": [{}]}},'.format( i if isinstance(i, str) else f'"{i}"') for i in nodes[:-1] ]) + '{{"tag": "p", "children": [{}]}}'.format( nodes[-1] if isinstance(nodes[-1], str) != str else f'"{nodes[-1]}"') + "]") files = { "Data": ( "content.html", io.BytesIO(content.replace("'", '"').encode()), "plain/text", ) } data = { "title": title, "author": author, "author_url": "https://github.com/GodSaveTheDoge", "save_hash": "", "page_id": "0", } r = requests.post("https://edit.telegra.ph/save", files=files, data=data, headers=HEADERS).json() if "error" in r.keys(): msg.edit_text( f"{Emoji.NEWSPAPER} Telegraph\n" f"\n" f"{Emoji.CROSS_MARK} <b>Error:</b> <code>{r['error']}</code>") return 1 msg.edit_text( f"{Emoji.NEWSPAPER} Telegraph\n" f"\n" f"{Emoji.PAGE_WITH_CURL} <b>Title:</b> <code>{title}</code>\n" f"{Emoji.LINK} <b>Link:</b> https://telegra.ph/{r['path']}\n" f"{Emoji.PEN} <b>Author:</b> <code>{author}</code>", parse_mode="html", )
def test_uppercase_tags(self): self.assertEqual( html_to_nodes("<P>Hello</P>"), [{'tag': 'p', 'children': ['Hello']}] )
def test_pre_whitespace_preserved(self): self.assertEqual( html_to_nodes("<pre>\nhello\nworld</pre>"), [{'tag': 'pre', 'children': ['\nhello\nworld']}] )
def test_clear_whitespace_1(self): x = '\n<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i> D </b> E </p><p> F </p>\n' y = '<p><i>A</i><b> </b><b>B <i>C</i><i><b></b></i> D </b>E </p><p>F </p>' self.assertEqual(nodes_to_html(html_to_nodes(x)), y)