def test_url_mutation(self): u = UrlParser("http://example.com/") u.hostname = g.domain self.assertTrue(u.is_reddit_url()) u = UrlParser("http://%s/" % g.domain) u.hostname = "example.com" self.assertFalse(u.is_reddit_url())
def _get_scrape_url(link): if not link.is_self: sr_name = link.subreddit_slow.name if not feature.is_enabled("imgur_gif_conversion", subreddit=sr_name): return link.url p = UrlParser(link.url) # If it's a gif link on imgur, replacing it with gifv should # give us the embedly friendly video url if is_subdomain(p.hostname, "imgur.com"): if p.path_extension().lower() == "gif": p.set_extension("gifv") return p.unparse() return link.url urls = extract_urls_from_markdown(link.selftext) second_choice = None for url in urls: p = UrlParser(url) if p.is_reddit_url(): continue # If we don't find anything we like better, use the first image. if not second_choice: second_choice = url # This is an optimization for "proof images" in AMAs. if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension(): return url return second_choice
def format_output_url(cls, url, **kw): """ Helper method used during redirect to ensure that the redirect url (assisted by frame busting code or javasctipt) will point to the correct domain and not have any extra dangling get parameters. The extensions are also made to match and the resulting url is utf8 encoded. Node: for development purposes, also checks that the port matches the request port """ u = UrlParser(url) if u.is_reddit_url(): # make sure to pass the port along if not 80 if not kw.has_key("port"): kw["port"] = request.port # disentagle the cname (for urls that would have # cnameframe=1 in them) u.mk_cname(**kw) # make sure the extensions agree with the current page if c.extension: u.set_extension(c.extension) # unparse and encode it un utf8 rv = _force_unicode(u.unparse()).encode("utf8") if any(ch.isspace() for ch in rv): raise ValueError("Space characters in redirect URL: [%r]" % rv) return rv
def format_output_url(cls, url, **kw): """ Helper method used during redirect to ensure that the redirect url (assisted by frame busting code or javasctipt) will point to the correct domain and not have any extra dangling get parameters. The extensions are also made to match and the resulting url is utf8 encoded. Node: for development purposes, also checks that the port matches the request port """ u = UrlParser(url) if u.is_reddit_url(): # make sure to pass the port along if not 80 if not kw.has_key('port'): kw['port'] = request.port # disentagle the cname (for urls that would have # cnameframe=1 in them) u.mk_cname(**kw) # make sure the extensions agree with the current page if c.extension: u.set_extension(c.extension) # unparse and encode it un utf8 rv = _force_unicode(u.unparse()).encode('utf8') if "\n" in rv or "\r" in rv: abort(400) return rv
def format_output_url(cls, url, **kw): """ Helper method used during redirect to ensure that the redirect url (assisted by frame busting code or javasctipt) will point to the correct domain and not have any extra dangling get parameters. The extensions are also made to match and the resulting url is utf8 encoded. Node: for development purposes, also checks that the port matches the request port """ preserve_extension = kw.pop("preserve_extension", True) u = UrlParser(url) if u.is_reddit_url(): # make sure to pass the port along if not 80 if not kw.has_key('port'): kw['port'] = request.port # disentangle the cname (for urls that would have # cnameframe=1 in them) u.mk_cname(**kw) # make sure the extensions agree with the current page if preserve_extension and c.extension: u.set_extension(c.extension) # unparse and encode it un utf8 rv = _force_unicode(u.unparse()).encode('utf8') if "\n" in rv or "\r" in rv: abort(400) return rv
def add_sr(path, sr_path=True, nocname=False, force_hostname=False, retain_extension=True, force_https=False, force_extension=None): """ Given a path (which may be a full-fledged url or a relative path), parses the path and updates it to include the subreddit path according to the rules set by its arguments: * sr_path: if a cname is not used for the domain, updates the path to include c.site.path. * nocname: deprecated. * force_hostname: if True, force the url's hostname to be updated even if it is already set in the path. If false, the path will still have its domain updated if no hostname is specified in the url. * retain_extension: if True, sets the extention according to c.render_style. * force_https: force the URL scheme to https For caching purposes: note that this function uses: c.render_style, c.site.name """ # don't do anything if it is just an anchor if path.startswith(('#', 'javascript:')): return path u = UrlParser(path) if sr_path: u.path_add_subreddit(c.site) if not u.hostname or force_hostname: u.hostname = get_domain(subreddit=False) if (c.secure and u.is_reddit_url()) or force_https: u.scheme = "https" if force_extension is not None: u.set_extension(force_extension) elif retain_extension: if c.render_style == 'mobile': u.set_extension('mobile') elif c.render_style == 'compact': u.set_extension('compact') # SaidIt CUSTOM elif c.render_style == g.extension_subdomain_mobile_v2_render_style: u.set_extension(g.extension_subdomain_mobile_v2_render_style) return u.unparse()
def add_sr(path, sr_path=True, nocname=False, force_hostname=False, retain_extension=True, force_https=False): """ Given a path (which may be a full-fledged url or a relative path), parses the path and updates it to include the subreddit path according to the rules set by its arguments: * sr_path: if a cname is not used for the domain, updates the path to include c.site.path. * nocname: when updating the hostname, overrides the value of c.cname to set the hostname to g.domain. The default behavior is to set the hostname consistent with c.cname. * force_hostname: if True, force the url's hostname to be updated even if it is already set in the path, and subject to the c.cname/nocname combination. If false, the path will still have its domain updated if no hostname is specified in the url. * retain_extension: if True, sets the extention according to c.render_style. * force_https: force the URL scheme to https For caching purposes: note that this function uses: c.cname, c.render_style, c.site.name """ # don't do anything if it is just an anchor if path.startswith(('#', 'javascript:')): return path u = UrlParser(path) if sr_path and (nocname or not c.cname): u.path_add_subreddit(c.site) if not u.hostname or force_hostname: if c.secure: u.hostname = request.host else: u.hostname = get_domain(cname=(c.cname and not nocname), subreddit=False) if (c.secure and u.is_reddit_url()) or force_https: u.scheme = "https" if retain_extension: if c.render_style == 'mobile': u.set_extension('mobile') elif c.render_style == 'compact': u.set_extension('compact') return u.unparse()
def add_sr( path, sr_path=True, nocname=False, force_hostname=False, retain_extension=True, force_https=False): """ Given a path (which may be a full-fledged url or a relative path), parses the path and updates it to include the subreddit path according to the rules set by its arguments: * sr_path: if a cname is not used for the domain, updates the path to include c.site.path. * nocname: when updating the hostname, overrides the value of c.cname to set the hostname to g.domain. The default behavior is to set the hostname consistent with c.cname. * force_hostname: if True, force the url's hostname to be updated even if it is already set in the path, and subject to the c.cname/nocname combination. If false, the path will still have its domain updated if no hostname is specified in the url. * retain_extension: if True, sets the extention according to c.render_style. * force_https: force the URL scheme to https For caching purposes: note that this function uses: c.cname, c.render_style, c.site.name """ # don't do anything if it is just an anchor if path.startswith(('#', 'javascript:')): return path u = UrlParser(path) if sr_path and (nocname or not c.cname): u.path_add_subreddit(c.site) if not u.hostname or force_hostname: if c.secure: u.hostname = request.host else: u.hostname = get_domain(cname = (c.cname and not nocname), subreddit = False) if (c.secure and u.is_reddit_url()) or force_https: u.scheme = "https" if retain_extension: if c.render_style == 'mobile': u.set_extension('mobile') elif c.render_style == 'compact': u.set_extension('compact') return u.unparse()
def make_scraper(url): parsed = UrlParser(url) if parsed.is_reddit_url(): if parsed.path.startswith("/live/"): try: event_id = parsed.path.split("/")[2] except IndexError: return else: return _LiveUpdateScraper(event_id)
def run(self, url): if not url: return None u = UrlParser(url) # TODO: We should probably set error messages in these cases. if not u.is_reddit_url(): return None event_id = re.match(r'/live/(\w+)/?', u.path) if not event_id: return None return VLiveUpdateEvent.run(self, event_id.group(1))
def run(self, url): if not url: return None u = UrlParser(url) # TODO: We should probably set error messages in these cases. if not u.is_reddit_url(): return None event_id = re.match(r'/live/(\w+)/?', u.path) if not event_id: return None try: return models.LiveUpdateEvent._byID(event_id.group(1)) except tdb_cassandra.NotFound: return None
def _get_scrape_url(link): if not link.is_self: return link.url urls = extract_urls_from_markdown(link.selftext) second_choice = None for url in urls: p = UrlParser(url) if p.is_reddit_url(): continue # If we don't find anything we like better, use the first image. if not second_choice: second_choice = url # This is an optimization for "proof images" in AMAs. if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension(): return url return second_choice
def add_ext_to_link(link): url = UrlParser(link.get("href")) if url.is_reddit_url(): link["href"] = add_sr(link.get("href"), sr_path=False)
def add_ext_to_link(link): url = UrlParser(link.get('href')) if url.is_reddit_url(): link['href'] = add_sr(link.get('href'), sr_path=False)