Beispiel #1
0
	def retrieve(self, url):
		"""Public method to retrieve content for an article URL"""
		pages = self._pages(url)
		response = ParserResponse()

		for page in pages:
			article = Article(page, keep_article_html=True)
			article.download()
			article.parse()
			response.content = response.content + article.article_html

		if self._config.strip_images:
			response.content = re.sub('<img [^<]+?>', '', response.content)

		if not response.content:
			response.error = "Parser could not get content for URL '%s'" % (url)

		return response
Beispiel #2
0
	def retrieve(self, url):
		"""Public method to retrieve content for an article URL"""
		request = requests.get(Parser.Endpoint, params={'token': self._config.readability.token, 'url': url})
		article = json.loads(request.text)
		response = ParserResponse()

		if 'error' in article:
			response.error = "Readability parser error: %s" % (article['messages'])

		if 'content' in article:
			response.content = article['content']

		if self._config.strip_images:
			response.content = re.sub('<img [^<]+?>', '', response.content)

		if not response.content:
			response.error = "Parser could not get content for URL '%s'" % (url)

		return response