Example #1
0
    def test_url_mutation(self):
        u = UrlParser("http://example.com/")
        u.hostname = g.domain
        self.assertTrue(u.is_reddit_url())

        u = UrlParser("http://%s/" % g.domain)
        u.hostname = "example.com"
        self.assertFalse(u.is_reddit_url())
Example #2
0
    def test_url_mutation(self):
        u = UrlParser("http://example.com/")
        u.hostname = g.domain
        self.assertTrue(u.is_reddit_url())

        u = UrlParser("http://%s/" % g.domain)
        u.hostname = "example.com"
        self.assertFalse(u.is_reddit_url())
Example #3
0
def _get_scrape_url(link):
    if not link.is_self:
        sr_name = link.subreddit_slow.name
        if not feature.is_enabled("imgur_gif_conversion", subreddit=sr_name):
            return link.url
        p = UrlParser(link.url)
        # If it's a gif link on imgur, replacing it with gifv should
        # give us the embedly friendly video url
        if is_subdomain(p.hostname, "imgur.com"):
            if p.path_extension().lower() == "gif":
                p.set_extension("gifv")
                return p.unparse()
        return link.url

    urls = extract_urls_from_markdown(link.selftext)
    second_choice = None
    for url in urls:
        p = UrlParser(url)
        if p.is_reddit_url():
            continue
        # If we don't find anything we like better, use the first image.
        if not second_choice:
            second_choice = url
        # This is an optimization for "proof images" in AMAs.
        if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension():
            return url

    return second_choice
Example #4
0
    def format_output_url(cls, url, **kw):
        """
        Helper method used during redirect to ensure that the redirect
        url (assisted by frame busting code or javasctipt) will point
        to the correct domain and not have any extra dangling get
        parameters.  The extensions are also made to match and the
        resulting url is utf8 encoded.

        Node: for development purposes, also checks that the port
        matches the request port
        """
        u = UrlParser(url)

        if u.is_reddit_url():
            # make sure to pass the port along if not 80
            if not kw.has_key("port"):
                kw["port"] = request.port

            # disentagle the cname (for urls that would have
            # cnameframe=1 in them)
            u.mk_cname(**kw)

            # make sure the extensions agree with the current page
            if c.extension:
                u.set_extension(c.extension)

        # unparse and encode it un utf8
        rv = _force_unicode(u.unparse()).encode("utf8")
        if any(ch.isspace() for ch in rv):
            raise ValueError("Space characters in redirect URL: [%r]" % rv)
        return rv
Example #5
0
def _get_scrape_url(link):
    if not link.is_self:
        sr_name = link.subreddit_slow.name
        if not feature.is_enabled("imgur_gif_conversion", subreddit=sr_name):
            return link.url
        p = UrlParser(link.url)
        # If it's a gif link on imgur, replacing it with gifv should
        # give us the embedly friendly video url
        if is_subdomain(p.hostname, "imgur.com"):
            if p.path_extension().lower() == "gif":
                p.set_extension("gifv")
                return p.unparse()
        return link.url

    urls = extract_urls_from_markdown(link.selftext)
    second_choice = None
    for url in urls:
        p = UrlParser(url)
        if p.is_reddit_url():
            continue
        # If we don't find anything we like better, use the first image.
        if not second_choice:
            second_choice = url
        # This is an optimization for "proof images" in AMAs.
        if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension():
            return url

    return second_choice
Example #6
0
    def format_output_url(cls, url, **kw):
        """
        Helper method used during redirect to ensure that the redirect
        url (assisted by frame busting code or javasctipt) will point
        to the correct domain and not have any extra dangling get
        parameters.  The extensions are also made to match and the
        resulting url is utf8 encoded.

        Node: for development purposes, also checks that the port
        matches the request port
        """
        u = UrlParser(url)

        if u.is_reddit_url():
            # make sure to pass the port along if not 80
            if not kw.has_key('port'):
                kw['port'] = request.port

            # disentagle the cname (for urls that would have
            # cnameframe=1 in them)
            u.mk_cname(**kw)

            # make sure the extensions agree with the current page
            if c.extension:
                u.set_extension(c.extension)

        # unparse and encode it un utf8
        rv = _force_unicode(u.unparse()).encode('utf8')
        if "\n" in rv or "\r" in rv:
            abort(400)
        return rv
Example #7
0
    def format_output_url(cls, url, **kw):
        """
        Helper method used during redirect to ensure that the redirect
        url (assisted by frame busting code or javasctipt) will point
        to the correct domain and not have any extra dangling get
        parameters.  The extensions are also made to match and the
        resulting url is utf8 encoded.

        Node: for development purposes, also checks that the port
        matches the request port
        """
        preserve_extension = kw.pop("preserve_extension", True)
        u = UrlParser(url)

        if u.is_reddit_url():
            # make sure to pass the port along if not 80
            if not kw.has_key('port'):
                kw['port'] = request.port

            # disentangle the cname (for urls that would have
            # cnameframe=1 in them)
            u.mk_cname(**kw)

            # make sure the extensions agree with the current page
            if preserve_extension and c.extension:
                u.set_extension(c.extension)

        # unparse and encode it un utf8
        rv = _force_unicode(u.unparse()).encode('utf8')
        if "\n" in rv or "\r" in rv:
            abort(400)
        return rv
Example #8
0
def add_sr(path,
           sr_path=True,
           nocname=False,
           force_hostname=False,
           retain_extension=True,
           force_https=False,
           force_extension=None):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.

     * nocname: deprecated.

     * force_hostname: if True, force the url's hostname to be updated
       even if it is already set in the path. If false, the path will still
       have its domain updated if no hostname is specified in the url.

     * retain_extension: if True, sets the extention according to
       c.render_style.

     * force_https: force the URL scheme to https

    For caching purposes: note that this function uses:
      c.render_style, c.site.name

    """
    # don't do anything if it is just an anchor
    if path.startswith(('#', 'javascript:')):
        return path

    u = UrlParser(path)
    if sr_path:
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        u.hostname = get_domain(subreddit=False)

    if (c.secure and u.is_reddit_url()) or force_https:
        u.scheme = "https"

    if force_extension is not None:
        u.set_extension(force_extension)
    elif retain_extension:
        if c.render_style == 'mobile':
            u.set_extension('mobile')

        elif c.render_style == 'compact':
            u.set_extension('compact')

        # SaidIt CUSTOM
        elif c.render_style == g.extension_subdomain_mobile_v2_render_style:
            u.set_extension(g.extension_subdomain_mobile_v2_render_style)

    return u.unparse()
Example #9
0
def add_sr(path,
           sr_path=True,
           nocname=False,
           force_hostname=False,
           retain_extension=True,
           force_https=False):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.

     * nocname: when updating the hostname, overrides the value of
       c.cname to set the hostname to g.domain.  The default behavior
       is to set the hostname consistent with c.cname.

     * force_hostname: if True, force the url's hostname to be updated
       even if it is already set in the path, and subject to the
       c.cname/nocname combination.  If false, the path will still
       have its domain updated if no hostname is specified in the url.

     * retain_extension: if True, sets the extention according to
       c.render_style.

     * force_https: force the URL scheme to https

    For caching purposes: note that this function uses:
      c.cname, c.render_style, c.site.name
    """
    # don't do anything if it is just an anchor
    if path.startswith(('#', 'javascript:')):
        return path

    u = UrlParser(path)
    if sr_path and (nocname or not c.cname):
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        if c.secure:
            u.hostname = request.host
        else:
            u.hostname = get_domain(cname=(c.cname and not nocname),
                                    subreddit=False)

    if (c.secure and u.is_reddit_url()) or force_https:
        u.scheme = "https"

    if retain_extension:
        if c.render_style == 'mobile':
            u.set_extension('mobile')

        elif c.render_style == 'compact':
            u.set_extension('compact')

    return u.unparse()
Example #10
0
def add_sr(
        path, sr_path=True, nocname=False, force_hostname=False,
        retain_extension=True, force_https=False):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.

     * nocname: when updating the hostname, overrides the value of
       c.cname to set the hostname to g.domain.  The default behavior
       is to set the hostname consistent with c.cname.

     * force_hostname: if True, force the url's hostname to be updated
       even if it is already set in the path, and subject to the
       c.cname/nocname combination.  If false, the path will still
       have its domain updated if no hostname is specified in the url.

     * retain_extension: if True, sets the extention according to
       c.render_style.

     * force_https: force the URL scheme to https

    For caching purposes: note that this function uses:
      c.cname, c.render_style, c.site.name
    """
    # don't do anything if it is just an anchor
    if path.startswith(('#', 'javascript:')):
        return path

    u = UrlParser(path)
    if sr_path and (nocname or not c.cname):
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        if c.secure:
            u.hostname = request.host
        else:
            u.hostname = get_domain(cname = (c.cname and not nocname),
                                    subreddit = False)

    if (c.secure and u.is_reddit_url()) or force_https:
        u.scheme = "https"

    if retain_extension:
        if c.render_style == 'mobile':
            u.set_extension('mobile')

        elif c.render_style == 'compact':
            u.set_extension('compact')

    return u.unparse()
def make_scraper(url):
    parsed = UrlParser(url)

    if parsed.is_reddit_url():
        if parsed.path.startswith("/live/"):
            try:
                event_id = parsed.path.split("/")[2]
            except IndexError:
                return
            else:
                return _LiveUpdateScraper(event_id)
Example #12
0
def make_scraper(url):
    parsed = UrlParser(url)

    if parsed.is_reddit_url():
        if parsed.path.startswith("/live/"):
            try:
                event_id = parsed.path.split("/")[2]
            except IndexError:
                return
            else:
                return _LiveUpdateScraper(event_id)
    def run(self, url):
        if not url:
            return None

        u = UrlParser(url)
        # TODO: We should probably set error messages in these cases.
        if not u.is_reddit_url():
            return None

        event_id = re.match(r'/live/(\w+)/?', u.path)
        if not event_id:
            return None

        return VLiveUpdateEvent.run(self, event_id.group(1))
    def run(self, url):
        if not url:
            return None

        u = UrlParser(url)
        # TODO: We should probably set error messages in these cases.
        if not u.is_reddit_url():
            return None

        event_id = re.match(r'/live/(\w+)/?', u.path)
        if not event_id:
            return None

        return VLiveUpdateEvent.run(self, event_id.group(1))
    def run(self, url):
        if not url:
            return None

        u = UrlParser(url)
        # TODO: We should probably set error messages in these cases.
        if not u.is_reddit_url():
            return None

        event_id = re.match(r'/live/(\w+)/?', u.path)
        if not event_id:
            return None

        try:
            return models.LiveUpdateEvent._byID(event_id.group(1))
        except tdb_cassandra.NotFound:
            return None
Example #16
0
    def run(self, url):
        if not url:
            return None

        u = UrlParser(url)
        # TODO: We should probably set error messages in these cases.
        if not u.is_reddit_url():
            return None

        event_id = re.match(r'/live/(\w+)/?', u.path)
        if not event_id:
            return None

        try:
            return models.LiveUpdateEvent._byID(event_id.group(1))
        except tdb_cassandra.NotFound:
            return None
Example #17
0
def _get_scrape_url(link):
    if not link.is_self:
        return link.url

    urls = extract_urls_from_markdown(link.selftext)
    second_choice = None
    for url in urls:
        p = UrlParser(url)
        if p.is_reddit_url():
            continue
        # If we don't find anything we like better, use the first image.
        if not second_choice:
            second_choice = url
        # This is an optimization for "proof images" in AMAs.
        if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension():
            return url

    return second_choice
Example #18
0
def _get_scrape_url(link):
    if not link.is_self:
        return link.url

    urls = extract_urls_from_markdown(link.selftext)
    second_choice = None
    for url in urls:
        p = UrlParser(url)
        if p.is_reddit_url():
            continue
        # If we don't find anything we like better, use the first image.
        if not second_choice:
            second_choice = url
        # This is an optimization for "proof images" in AMAs.
        if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension():
            return url

    return second_choice
Example #19
0
 def add_ext_to_link(link):
     url = UrlParser(link.get("href"))
     if url.is_reddit_url():
         link["href"] = add_sr(link.get("href"), sr_path=False)
Example #20
0
 def add_ext_to_link(link):
     url = UrlParser(link.get('href'))
     if url.is_reddit_url():
         link['href'] = add_sr(link.get('href'), sr_path=False)
Example #21
0
 def add_ext_to_link(link):
     url = UrlParser(link.get('href'))
     if url.is_reddit_url():
         link['href'] = add_sr(link.get('href'), sr_path=False)