Example #1
0
 def complement(self):
     for entry in self.entries:
         try:
             response = requests.get(entry.url, timeout=10)
         except requests.RequestException as excp:
             logger.warn('Exception requesting article %s: %s',
                         entry.url, excp.message)
             continue
         document = Document(response.content, url=response.url)
         # Image extraction first
         document._html()  # Trigger parsing
         images = document.html.xpath(
             '//meta[@property="og:image"]/@content')
         images += document.html.xpath(
             '//meta[@name="twitter:image:src"]/@content')
         # Content extraction second
         entry.url = response.url
         entry.image = (images or [''])[0]
         entry.title = document.short_title()
         entry.content = document.summary()
         yield entry