Example #1
0
def get_direct_link(url):
    """
	If we've got a non-gallery, and missing the direct image url, correct to the direct image link.
	"""
    if is_direct_link(url):
        return url
    base_img = url.split("/")[-1]
    req = http_downloader.open_request(url, stream=False)
    if 'i.img' in req.url:
        # Redirected to valid Image.
        return req.url
    else:
        # Load the page and parse for image.
        possible_eles = [('link', 'href'), ('img', 'src')]
        for pe in possible_eles:
            for u in stringutil.html_elements(req.text, pe[0], pe[1]):
                if is_direct_link(u, base_img):
                    return urllib.parse.urljoin('https://i.imgur.com/', u)
        # Check for embedded video:
        possible_eles = [('meta', 'content'), ('source', 'src')]
        for pe in possible_eles:
            for u in stringutil.html_elements(req.text, pe[0], pe[1]):
                if is_direct_link(u, base_img) and any(
                    (ext in u) for ext in imgur_animation_exts):
                    return urllib.parse.urljoin('https://i.imgur.com/', u)
    return None
Example #2
0
 def _submission(self, post):
     """ Handle a Submission. """
     # out("[Post](%s): %s" % (post.subreddit.display_name, post.title) )
     self.type = 'Submission'
     self.id = str(post.fullname)
     self.title = str(post.title)
     self.subreddit = str(post.subreddit.display_name)
     if post.author is None:
         self.author = 'Deleted'
     else:
         self.author = str(post.author.name)
     self.over_18 = post.over_18
     self.num_comments = post.num_comments
     self.score = post.score
     self.body = post.selftext
     if post.selftext.strip() != '':
         # This post probably doesn't have a URL, and has selftext instead.
         for url in stringutil.html_elements(post.selftext_html, 'a',
                                             'href'):
             self.add_url(url)
     if getattr(post, 'is_gallery', False) and getattr(
             post, 'media_metadata', False):
         for k, img in post.media_metadata.items():
             try:
                 self.add_url(img['s']['u'])
             except:
                 stringutil.error('Unable to parse URL from reddit album.')
     elif post.url is not None and post.url.strip() != '':
         self.add_url(post.url)
	def _comment(self, c):
		""" Handle a Comment object. """
		# out("[Comment](%s): %s" % (c.subreddit.display_name, c.link_title) )
		self.type = 'Comment'
		self.id = str(c.fullname)
		self.parent = self._comment_field(c, 'link_id', 'fullname')
		self.title = self._comment_field(c, 'link_title', 'title')
		self.subreddit = str(c.subreddit.display_name)
		if c.author:
			self.author = str(c.author.name)
		else:
			self.author = 'Deleted'
		self.over_18 = self._comment_field(c, 'over_18', 'over_18')
		self.num_comments = self._comment_field(c, 'num_comments', 'num_comments')
		self.score = self._comment_field(c, 'score', 'score')
		self.body = c.body
		for url in stringutil.html_elements(c.body_html, 'a', 'href'):
			self.add_url(url)
	def _submission(self, post):
		""" Handle a Submission. """
		# out("[Post](%s): %s" % (post.subreddit.display_name, post.title) )
		self.type = 'Submission'
		self.id = str(post.fullname)
		self.title = str(post.title)
		self.subreddit = str(post.subreddit.display_name)
		if post.author is None:
			self.author = 'Deleted'
		else:
			self.author = str(post.author.name)
		self.over_18 = post.over_18
		self.num_comments = post.num_comments
		self.score = post.score
		self.body = post.selftext
		if post.selftext.strip() != '':
			# This post probably doesn't have a URL, and has selftext instead.
			for url in stringutil.html_elements(post.selftext_html, 'a', 'href'):
				self.add_url(url)
		if post.url is not None and post.url.strip() != '':
			self.add_url(post.url)
Example #5
0
	def test_html_elements(self):
		""" All HTML elements should be found """
		html = '''<afake href="nope"></fake><a href="test"><a href="test-nested"></a></a>'''
		self.assertEqual(sorted(su.html_elements(html)), sorted(['test', 'test-nested']))