def complement(self): for entry in self.entries: try: response = requests.get(entry.url, timeout=10) except requests.RequestException as excp: logger.warn('Exception requesting article %s: %s', entry.url, excp.message) continue document = Document(response.content, url=response.url) # Image extraction first document._html() # Trigger parsing images = document.html.xpath( '//meta[@property="og:image"]/@content') images += document.html.xpath( '//meta[@name="twitter:image:src"]/@content') # Content extraction second entry.url = response.url entry.image = (images or [''])[0] entry.title = document.short_title() entry.content = document.summary() yield entry