Beispiel #1
0
    def _find_image_url(self):
        netloc = urlparse.urlparse(self._url).netloc
        img_elements = self._soup.select('a[href="/"] img')
        img_elements += self._soup.select('a[href="/."] img')
        img_elements += self._soup.select('a[href$="%s"] img' % netloc)
        img_elements += self._soup.select('a[href$="%s/"] img' % netloc)
        img_elements += self._soup.select('a[href$="http://%s"] img' % netloc)
        img_elements += self._soup.select('a[href$="http://%s/"] img' % netloc)
        for img_element in img_elements:
            image_url = img_element.get("src")
            if image_url and is_supported_image(image_url):
                return self._absolute_url(image_url)

        link_elements = self._soup.select('link[rel*="icon"]')
        for link_element in link_elements:
            link_url = link_element.get("href")
            if link_url and is_supported_image(link_url):
                return self._absolute_url(link_url)

        return urlparse.urljoin(self._url, "favicon.ico")
Beispiel #2
0
 def get_preview_image_url(self):
     img_tags = self._unmassaged_soup.select('div#body div.copy.post-body img')
     for img_tag in img_tags:
         image_url = img_tag.get('src')
         if image_url and is_supported_image(image_url):
             return image_url
Beispiel #3
0
 def get_preview_image_url(self):
     img_tags = self._unmassaged_soup.select('article.full-article img')
     for img_tag in img_tags:
         image_url = img_tag.get('data-src')
         if image_url and is_supported_image(image_url):
             return image_url
Beispiel #4
0
 def get_preview_image_url(self):
     img_tags = self._unmassaged_soup.select('div.l-main-container div.l-main div.article-entry.text img')
     for img_tag in img_tags:
         image_url = img_tag.get('src')
         if image_url and is_supported_image(image_url):
             return image_url