def get_direct_link(url): """ If we've got a non-gallery, and missing the direct image url, correct to the direct image link. """ if is_direct_link(url): return url base_img = url.split("/")[-1] req = http_downloader.open_request(url, stream=False) if 'i.img' in req.url: # Redirected to valid Image. return req.url else: # Load the page and parse for image. possible_eles = [('link', 'href'), ('img', 'src')] for pe in possible_eles: for u in stringutil.html_elements(req.text, pe[0], pe[1]): if is_direct_link(u, base_img): return urllib.parse.urljoin('https://i.imgur.com/', u) # Check for embedded video: possible_eles = [('meta', 'content'), ('source', 'src')] for pe in possible_eles: for u in stringutil.html_elements(req.text, pe[0], pe[1]): if is_direct_link(u, base_img) and any( (ext in u) for ext in imgur_animation_exts): return urllib.parse.urljoin('https://i.imgur.com/', u) return None
def _submission(self, post): """ Handle a Submission. """ # out("[Post](%s): %s" % (post.subreddit.display_name, post.title) ) self.type = 'Submission' self.id = str(post.fullname) self.title = str(post.title) self.subreddit = str(post.subreddit.display_name) if post.author is None: self.author = 'Deleted' else: self.author = str(post.author.name) self.over_18 = post.over_18 self.num_comments = post.num_comments self.score = post.score self.body = post.selftext if post.selftext.strip() != '': # This post probably doesn't have a URL, and has selftext instead. for url in stringutil.html_elements(post.selftext_html, 'a', 'href'): self.add_url(url) if getattr(post, 'is_gallery', False) and getattr( post, 'media_metadata', False): for k, img in post.media_metadata.items(): try: self.add_url(img['s']['u']) except: stringutil.error('Unable to parse URL from reddit album.') elif post.url is not None and post.url.strip() != '': self.add_url(post.url)
def _comment(self, c): """ Handle a Comment object. """ # out("[Comment](%s): %s" % (c.subreddit.display_name, c.link_title) ) self.type = 'Comment' self.id = str(c.fullname) self.parent = self._comment_field(c, 'link_id', 'fullname') self.title = self._comment_field(c, 'link_title', 'title') self.subreddit = str(c.subreddit.display_name) if c.author: self.author = str(c.author.name) else: self.author = 'Deleted' self.over_18 = self._comment_field(c, 'over_18', 'over_18') self.num_comments = self._comment_field(c, 'num_comments', 'num_comments') self.score = self._comment_field(c, 'score', 'score') self.body = c.body for url in stringutil.html_elements(c.body_html, 'a', 'href'): self.add_url(url)
def _submission(self, post): """ Handle a Submission. """ # out("[Post](%s): %s" % (post.subreddit.display_name, post.title) ) self.type = 'Submission' self.id = str(post.fullname) self.title = str(post.title) self.subreddit = str(post.subreddit.display_name) if post.author is None: self.author = 'Deleted' else: self.author = str(post.author.name) self.over_18 = post.over_18 self.num_comments = post.num_comments self.score = post.score self.body = post.selftext if post.selftext.strip() != '': # This post probably doesn't have a URL, and has selftext instead. for url in stringutil.html_elements(post.selftext_html, 'a', 'href'): self.add_url(url) if post.url is not None and post.url.strip() != '': self.add_url(post.url)
def test_html_elements(self): """ All HTML elements should be found """ html = '''<afake href="nope"></fake><a href="test"><a href="test-nested"></a></a>''' self.assertEqual(sorted(su.html_elements(html)), sorted(['test', 'test-nested']))