def _find_image_url(self): netloc = urlparse.urlparse(self._url).netloc img_elements = self._soup.select('a[href="/"] img') img_elements += self._soup.select('a[href="/."] img') img_elements += self._soup.select('a[href$="%s"] img' % netloc) img_elements += self._soup.select('a[href$="%s/"] img' % netloc) img_elements += self._soup.select('a[href$="http://%s"] img' % netloc) img_elements += self._soup.select('a[href$="http://%s/"] img' % netloc) for img_element in img_elements: image_url = img_element.get("src") if image_url and is_supported_image(image_url): return self._absolute_url(image_url) link_elements = self._soup.select('link[rel*="icon"]') for link_element in link_elements: link_url = link_element.get("href") if link_url and is_supported_image(link_url): return self._absolute_url(link_url) return urlparse.urljoin(self._url, "favicon.ico")
def get_preview_image_url(self): img_tags = self._unmassaged_soup.select('div#body div.copy.post-body img') for img_tag in img_tags: image_url = img_tag.get('src') if image_url and is_supported_image(image_url): return image_url
def get_preview_image_url(self): img_tags = self._unmassaged_soup.select('article.full-article img') for img_tag in img_tags: image_url = img_tag.get('data-src') if image_url and is_supported_image(image_url): return image_url
def get_preview_image_url(self): img_tags = self._unmassaged_soup.select('div.l-main-container div.l-main div.article-entry.text img') for img_tag in img_tags: image_url = img_tag.get('src') if image_url and is_supported_image(image_url): return image_url