def handle(self, *args, **options): print "in process" ## delete the redis key redis_conn.delete("RedisStore:article") for article in Article.objects.all(): q.enqueue(article.make_keyword_index) print "workers are working on reindexing, type`rqinfo` to check progress"
def add_article(request): url = request.POST.get("url") or None if(url is None): raise ParseError("require url parameter") article, _ = Article.objects.get_or_create(original_url=url) upa, created = UserPostArticle.objects.get_or_create(article=article, user=request.user) # post process in rq worker if created: q.enqueue(upa.defer_process) return {}
def handle(self, *args, **options): machine = User.objects.filter().first() ## try http://news.dbanotes.net/ d = pq(url="http://news.dbanotes.net/") for element in d(".title a"): url = pq(element).attr("href") article, _ = Article.objects.get_or_create(original_url=url) upa, created = UserPostArticle.objects.get_or_create(article=article, user=machine) # post process in rq worker if created: q.enqueue(upa.defer_process)
def handle(self, *args, **options): machine = User.objects.filter().first() ## try http://news.dbanotes.net/ d = pq(url="http://news.dbanotes.net/") for element in d(".title a"): url = pq(element).attr("href") article, _ = Article.objects.get_or_create(original_url=url) upa, created = UserPostArticle.objects.get_or_create( article=article, user=machine) # post process in rq worker if created: q.enqueue(upa.defer_process)
def defer_process(self): from readability.readability import Document import urllib article = self.article if not article.finished: header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',} response = requests.get(article.original_url, headers=header, verify=False) response.encoding = chardet.detect(response.content)["encoding"] html = response.text article.content = Document(html).summary() article.title = Document(html).short_title() article.primary = True # TODO article.finished = True redis_conn.sadd(Article.ALL_PRIMARY_IDS_KEY, article.id) article.save() q.enqueue(article._catch_image)
def handle(self, *args, **options): print "in process" file_path = options["pocket_path"] soup = None with open(file_path, 'r') as pocket_file: soup = BeautifulSoup(pocket_file.read()) user = User.objects.filter().first() for link in soup.find_all('a'): url = link.get('href') article, _ = Article.objects.get_or_create(original_url=url) upa, created = UserPostArticle.objects.get_or_create(article=article, user=user) # post process in rq worker if created: q.enqueue(upa.defer_process) print "done"
def handle(self, *args, **options): print "in process" file_path = options["pocket_path"] soup = None with open(file_path, 'r') as pocket_file: soup = BeautifulSoup(pocket_file.read()) user = User.objects.filter().first() for link in soup.find_all('a'): url = link.get('href') article, _ = Article.objects.get_or_create(original_url=url) upa, created = UserPostArticle.objects.get_or_create( article=article, user=user) # post process in rq worker if created: q.enqueue(upa.defer_process) print "done"
def defer_process(self): from readability.readability import Document import urllib article = self.article if not article.finished: header = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0', } response = requests.get(article.original_url, headers=header, verify=False) response.encoding = chardet.detect(response.content)["encoding"] html = response.text article.content = Document(html).summary() article.title = Document(html).short_title() article.primary = True # TODO article.finished = True redis_conn.sadd(Article.ALL_PRIMARY_IDS_KEY, article.id) article.save() q.enqueue(article._catch_image)