def worker(news): html = urlOpen.get_html(news.url) print(str(news.pk) + " ", end='\n') if html: text = textParser.get_text_from_html(html) url_list = [url for url in aParser.get_a_from_news_text(news_url=news.url, text=text)] text = aParser.remove_all_tags(text) text = text_prerparer.text_preparer(text) return NewsText(news=news, text=text), url_list
def parse_news(n=None): for news in News.objects.filter(is_parsed=False)[:n].iterator(): print(str(news.id) + " ", end='\n') html = urlOpen.get_html(news.url) # 0.19 - 2.5 s if html: text = textParser.get_text_from_html(html) # 0.0099 - 0.026 s NewsText.objects.create(news=news, text=text) news.is_parsed = True news.save() # 0.004 with atomic and 0.23 without
def parse_news(n=None): for news in News.objects.filter(is_parsed=False)[:n].iterator(): print(str(news.id) + " ", end='\n') html = urlOpen.get_html(news.url) # 0.19 - 2.5 s if html: text = textParser.get_text_from_html(html) # 0.0099 - 0.026 s NewsText.objects.create(news=news, text=text) news.is_parsed = True news.save() # 0.004 with atomic and 0.23 without
def worker(news): html = urlOpen.get_html(news.url) print(str(news.pk) + " ", end='\n') if html: text = textParser.get_text_from_html(html) url_list = [ url for url in aParser.get_a_from_news_text(news_url=news.url, text=text) ] text = aParser.remove_all_tags(text) text = text_prerparer.text_preparer(text) return NewsText(news=news, text=text), url_list
def worker(input_q: JoinableQueue, output: Queue): from django import db db.connection.close() while True: task = input_q.get() if task == "end": break html = urlOpen.get_html(task.url) if html: text = textParser.get_text_from_html(html) input_q.task_done() # info() output.put(task.url) print("exit")
def worker(news): print(str(news.id) + " ", end='\n') html = urlOpen.get_html(news.url) if html: text = textParser.get_text_from_html(html) return NewsText(news=news, text=text)
def worker(news): print(str(news.id) + " ", end='\n') html = urlOpen.get_html(news.url) if html: text = textParser.get_text_from_html(html) return NewsText(news=news, text=text)