def main(): test_url = "http://gazeta.ru/" txt = urlOpen.get_html(test_url) txt = textParser.tags_filter_head_and_script(txt) for url, text in get_url_and_url_text(txt, test_url): if url.startswith(test_url): print("{} {}\n".format(url, text))
def parse(source_url: str): html_code = urlOpen.get_html(source_url) html_code = textParser.tags_filter_head_and_script(html_code) for url, text in get_url_and_url_text(html_code, source_url): yield model.NewsData(url=url, title=text, pub_date=datetime.now(), summary=None)