def worker(news): html = urlOpen.get_html(news.url) print(str(news.pk) + " ", end='\n') if html: text = textParser.get_text_from_html(html) url_list = [url for url in aParser.get_a_from_news_text(news_url=news.url, text=text)] text = aParser.remove_all_tags(text) text = text_prerparer.text_preparer(text) return NewsText(news=news, text=text), url_list
def parse_news_text(news_text: NewsText): print(str(news_text.pk) + " ", end='\r') for url in aParser.get_a_from_news_text(news_url=news_text.news.url, text=news_text.text): url_in_text = UrlInText.objects.filter(url=url)[:1] if url_in_text.exists(): url_in_text = url_in_text[0] else: url_in_text = UrlInText.objects.create(url=url) url_in_text.news.add(news_text.news) news_text.text = aParser.remove_all_tags(news_text.text) news_text.is_parsed = True news_text.save()
def worker(news): html = urlOpen.get_html(news.url) print(str(news.pk) + " ", end='\n') if html: text = textParser.get_text_from_html(html) url_list = [ url for url in aParser.get_a_from_news_text(news_url=news.url, text=text) ] text = aParser.remove_all_tags(text) text = text_prerparer.text_preparer(text) return NewsText(news=news, text=text), url_list
def worker(news_text: NewsText): url_list = [url for url in aParser.get_a_from_news_text(news_url=news_text.news.url, text=news_text.text)] return news_text, url_list
def worker(news_text: NewsText): url_list = [ url for url in aParser.get_a_from_news_text( news_url=news_text.news.url, text=news_text.text) ] return news_text, url_list