Python UrlParser.UrlParser Examples, r2.lib.utils.UrlParser.UrlParser Python Examples

Example #1

0

Show file

File: media.py Project: freshy969/saidit

def _get_scrape_url(link):
    if not link.is_self:
        sr_name = link.subreddit_slow.name
        if not feature.is_enabled("imgur_gif_conversion", subreddit=sr_name):
            return link.url
        p = UrlParser(link.url)
        # If it's a gif link on imgur, replacing it with gifv should
        # give us the embedly friendly video url
        if is_subdomain(p.hostname, "imgur.com"):
            if p.path_extension().lower() == "gif":
                p.set_extension("gifv")
                return p.unparse()
        return link.url

    urls = extract_urls_from_markdown(link.selftext)
    second_choice = None
    for url in urls:
        p = UrlParser(url)
        if p.is_reddit_url():
            continue
        # If we don't find anything we like better, use the first image.
        if not second_choice:
            second_choice = url
        # This is an optimization for "proof images" in AMAs.
        if is_subdomain(p.netloc, 'imgur.com') or p.has_image_extension():
            return url

    return second_choice

Example #2

0

Show file

File: urlparser_test.py Project: wqx081/reddit

    def test_default_prefix(self):
        u = UrlParser('http://i.reddit.com/r/redditdev')
        u.switch_subdomain_by_extension()
        self.assertEquals('http://www.reddit.com/r/redditdev', u.unparse())

        u = UrlParser('http://i.reddit.com/r/redditdev')
        u.switch_subdomain_by_extension('does-not-exist')
        self.assertEquals('http://www.reddit.com/r/redditdev', u.unparse())

Example #3

0

Show file

File: urlparser_test.py Project: wqx081/reddit

    def test_url_mutation(self):
        u = UrlParser("http://example.com/")
        u.hostname = g.domain
        self.assertTrue(u.is_reddit_url())

        u = UrlParser("http://%s/" % g.domain)
        u.hostname = "example.com"
        self.assertFalse(u.is_reddit_url())

Example #4

0

Show file

File: link.py Project: erhanBLC/lesswrong

 def make_anchored_permalink(self, link=None, sr=None, context=1, anchor=None):
     if link:
         permalink = UrlParser(self.make_permalink(link, sr))
     else:
         permalink = UrlParser(self.make_permalink_slow())
     permalink.update_query(context=context)
     permalink.fragment = anchor if anchor else self._id36
     return permalink.unparse()

Example #5

0

Show file

File: urlparser_test.py Project: wqx081/reddit

    def test_normal_urls(self):
        u = UrlParser('http://www.reddit.com/r/redditdev')
        u.switch_subdomain_by_extension('compact')
        result = u.unparse()
        self.assertEquals('http://i.reddit.com/r/redditdev', result)

        u = UrlParser(result)
        u.switch_subdomain_by_extension('mobile')
        result = u.unparse()
        self.assertEquals('http://simple.reddit.com/r/redditdev', result)

Example #6

0

Show file

File: urlparser_test.py Project: wqx081/reddit

    def test_same_url(self):
        u = UrlParser('http://example.com:8000/a;b?foo=bar&bar=baz#spam')
        u2 = UrlParser('http://example.com:8000/a;b?bar=baz&foo=bar#spam')
        self.assertEquals(u, u2)

        u3 = UrlParser('')
        u3.scheme = 'http'
        u3.hostname = 'example.com'
        u3.port = 8000
        u3.path = '/a'
        u3.params = 'b'
        u3.update_query(foo='bar', bar='baz')
        u3.fragment = 'spam'
        self.assertEquals(u, u3)

Example #7

0

Show file

File: media.py Project: freshy969/saidit

    def validate_secure_oembed(self, oembed):
        """Check the "secure" embed is safe to embed, and not a placeholder"""
        if not oembed.get("html"):
            return False

        # Get the embed.ly iframe's src
        iframe_src = lxml.html.fromstring(oembed['html']).get('src')
        if not iframe_src:
            return False
        iframe_src_url = UrlParser(iframe_src)

        # Per embed.ly support: If the URL for the provider is HTTP, we're
        # gonna get a placeholder image instead
        provider_src_url = UrlParser(iframe_src_url.query_dict.get('src'))
        return not provider_src_url.scheme or provider_src_url.scheme == "https"

Example #8

0

Show file

    def test_sign_url(self):
        u = UrlParser('http://examples.imgix.net/frog.jpg?w=100')
        signed_url = self.provider._sign_url(u, 'abcdef')
        self.assertEqual(
            signed_url.unparse(),
            'http://examples.imgix.net/frog.jpg?w=100&s=cd3bdf071108af73b15c21bdcee5e49c'
        )

        u = UrlParser('http://examples.imgix.net/frog.jpg')
        u.update_query(w=100)
        signed_url = self.provider._sign_url(u, 'abcdef')
        self.assertEqual(
            signed_url.unparse(),
            'http://examples.imgix.net/frog.jpg?w=100&s=cd3bdf071108af73b15c21bdcee5e49c'
        )

Example #9

0

Show file

File: cloudflare.py Project: zeantsoi/reddit

    def _do_content_purge(self, url):  
        """Does the purge of the content from CloudFlare."""      
        data = {
            'files': [
                url,
            ]
        }

        timer = g.stats.get_timer("providers.cloudflare.content_purge")
        timer.start()

        # Get the proper zone id for the purge cache url based on hostname
        if UrlParser(url).hostname == g.image_hosting_domain:
            purge_key_url = g.secrets['cloudflare_purge_key_imagehosting_url']
        else:
            purge_key_url = g.secrets['cloudflare_purge_key_url']

        response = requests.delete(
            purge_key_url,
            headers={
                'X-Auth-Email': g.secrets['cloudflare_email_address'],
                'X-Auth-Key': g.secrets['cloudflare_api_key'],
                'content-type': 'application/json',
            },
            data=json.dumps(data),
        )
        timer.stop()

Example #10

0

Show file

File: post.py Project: aguamar/reddit

    def POST_options(self, all_langs, pref_lang, **kw):
        #temporary. eventually we'll change pref_clickgadget to an
        #integer preference
        kw['pref_clickgadget'] = kw['pref_clickgadget'] and 5 or 0
        if c.user.pref_show_promote is None:
            kw['pref_show_promote'] = None
        elif not kw.get('pref_show_promote'):
            kw['pref_show_promote'] = False

        if not kw.get("pref_over_18") or not c.user.pref_over_18:
            kw['pref_no_profanity'] = True

        if kw.get("pref_no_profanity") or c.user.pref_no_profanity:
            kw['pref_label_nsfw'] = True

        if not c.user.gold:
            kw['pref_show_adbox'] = True
            kw['pref_show_sponsors'] = True

        self.set_options(all_langs, pref_lang, **kw)
        u = UrlParser(c.site.path + "prefs")
        u.update_query(done='true')
        if c.cname:
            u.put_in_frame()
        return self.redirect(u.unparse())

Example #11

0

Show file

File: post.py Project: ziyoudefeng/reddit

    def POST_options(self, all_langs, pref_lang, **kw):
        #temporary. eventually we'll change pref_clickgadget to an
        #integer preference
        kw['pref_clickgadget'] = kw['pref_clickgadget'] and 5 or 0
        if c.user.pref_show_promote is None:
            kw['pref_show_promote'] = None
        elif not kw.get('pref_show_promote'):
            kw['pref_show_promote'] = False

        if not kw.get("pref_over_18") or not c.user.pref_over_18:
            kw['pref_no_profanity'] = True

        if kw.get("pref_no_profanity") or c.user.pref_no_profanity:
            kw['pref_label_nsfw'] = True

        # default all the gold options to on if they don't have gold
        if not c.user.gold:
            for pref in ('pref_show_adbox', 'pref_show_sponsors',
                         'pref_show_sponsorships',
                         'pref_highlight_new_comments',
                         'pref_monitor_mentions'):
                kw[pref] = True

        self.set_options(all_langs, pref_lang, **kw)
        u = UrlParser(c.site.path + "prefs")
        u.update_query(done='true')
        if c.cname:
            u.put_in_frame()
        return self.redirect(u.unparse())

Example #12

0

Show file

File: oauth2.py Project: j4gold/reddit

def _update_redirect_uri(base_redirect_uri, params, as_fragment=False):
    parsed = UrlParser(base_redirect_uri)
    if as_fragment:
        parsed.fragment = urlencode(params)
    else:
        parsed.update_query(**params)
    return parsed.unparse()

Example #13

0

Show file

    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    h = _hot(ups, downs, timestamp)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain), sc,
                               timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)
                        if tkey == "all":
                            yield ('domain/hot/%s/%s' % (tkey, domain), h,
                                   timestamp, fname)
                            yield ('domain/new/%s/%s' % (tkey, domain),
                                   timestamp, timestamp, fname)

Example #14

0

Show file

    def GET_framebuster(self, what = None, blah = None):
        """
        renders the contents of the iframe which, on a cname, checks
        if the user is currently logged into reddit.
        
        if this page is hit from the primary domain, redirects to the
        cnamed domain version of the site.  If the user is logged in,
        this cnamed version will drop a boolean session cookie on that
        domain so that subsequent page reloads will be caught in
        middleware and a frame will be inserted around the content.

        If the user is not logged in, previous session cookies will be
        emptied so that subsequent refreshes will not be rendered in
        that pesky frame.
        """
        if not c.site.domain:
            return ""
        elif c.cname:
            return FrameBuster(login = (what == "login")).render()
        else:
            path = "/framebuster/"
            if c.user_is_loggedin:
                path += "login/"
            u = UrlParser(path + str(random.random()))
            u.mk_cname(require_frame = False, subreddit = c.site,
                       port = request.port)
            return self.redirect(u.unparse())
        # the user is not logged in or there is no cname.
        return FrameBuster(login = False).render()

Example #15

0

Show file

    def _process_data(self, wiki_xml):
        """This method processes the wiki data and extracts what is used"""
        MEDIAWIKI_NS = 'http://www.mediawiki.org/xml/export-0.3/'
        sequences = []
        lw_url_re = re.compile(r'\[(http://lesswrong\.com/lw/[^ ]+) [^\]]+\]')

        for page in wiki_xml.getroot().iterfind(
                './/{%s}page' % MEDIAWIKI_NS):  # TODO: Change to use iterparse
            # Get the titles
            title = page.findtext('{%s}title' % MEDIAWIKI_NS)

            # See if this page is a sequence page
            sequence_elem = page.xpath(
                "mw:revision[1]/mw:text[contains(., '[[Category:Sequences]]')]",
                namespaces={'mw': MEDIAWIKI_NS})

            if sequence_elem:
                sequence_elem = sequence_elem[0]
                articles = []

                # Find all the lesswrong urls
                for match in lw_url_re.finditer(sequence_elem.text):
                    article_url = UrlParser(match.group(1))

                    # Only store the path to the article
                    article_path = article_url.path

                    # Ensure path ends in slash
                    if article_path[-1] != '/':
                        article_path += '/'

                    articles.append(article_path)

                sequences.append({'title': title, 'articles': articles})
        return {'sequences': sequences}

Example #16

0

Show file

    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS
                    ],
                    insert_items=links,
                )

Example #17

0

Show file

File: imgix.py Project: wqx081/reddit

    def resize_image(self,
                     image,
                     width=None,
                     censor_nsfw=False,
                     max_ratio=None):
        url = UrlParser(image['url'])
        url.hostname = g.imgix_domain
        # Let's encourage HTTPS; it's cool, works just fine on HTTP pages, and
        # will prevent insecure content warnings on HTTPS pages.
        url.scheme = 'https'

        if max_ratio:
            url.update_query(fit='crop')
            # http://www.imgix.com/docs/reference/size#param-crop
            url.update_query(crop='faces,entropy')
            url.update_query(arh=max_ratio)

        if width:
            if width > image['width']:
                raise NotLargeEnough()
            # http://www.imgix.com/docs/reference/size#param-w
            url.update_query(w=width)
        if censor_nsfw:
            # Do an initial blur to make sure we're getting rid of icky
            # details.
            #
            # http://www.imgix.com/docs/reference/stylize#param-blur
            url.update_query(blur=600)
            # And then add pixellation to help the image compress well.
            #
            # http://www.imgix.com/docs/reference/stylize#param-px
            url.update_query(px=32)
        if g.imgix_signing:
            url = self._sign_url(url, g.secrets['imgix_signing_token'])
        return url.unparse()

Example #18

0

Show file

File: template_helpers.py Project: jeffkaufman/eaforum

def add_sr(path, sr_path=True, nocname=False, force_hostname=False):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * force_hostname: if True, force the url's hotname to be updated
       even if it is already set in the path, and subject to the
       c.cname/nocname combination.  If false, the path will still
       have its domain updated if no hostname is specified in the url.
    
     * nocname: when updating the hostname, overrides the value of
       c.cname to set the hotname to g.domain.  The default behavior
       is to set the hostname consistent with c.cname.

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.
    """
    u = UrlParser(path)
    if sr_path and (nocname or not c.cname):
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        u.hostname = get_domain(cname=(c.cname and not nocname),
                                subreddit=False)

    if c.render_style == 'mobile':
        u.set_extension('mobile')

    return u.unparse()

Example #19

0

Show file

    def url_for_title(self, title):
        """Uses the MediaWiki API to get the URL for a wiki page
      with the given title"""
        if title is None:
            return None

        from pylons import g
        cache_key = ('wiki_url_%s' % title).encode('ascii', 'ignore')
        wiki_url = g.cache.get(cache_key)
        if wiki_url is None:
            # http://www.mediawiki.org/wiki/API:Query_-_Properties#info_.2F_in
            api = UrlParser(g.wiki_api_url)
            api.update_query(action='query',
                             titles=title,
                             prop='info',
                             format='yaml',
                             inprop='url')

            try:
                response = urlopen(api.unparse()).read()
                parsed_response = yaml.load(response, Loader=yaml.CLoader)
                page = parsed_response['query']['pages'][0]
            except:
                return None

            wiki_url = page.get('fullurl').strip()

            # Things are created every couple of days so 12 hours seems
            # to be a reasonable cache time
            g.permacache.set(cache_key, wiki_url, time=3600 * 12)

        return wiki_url

Example #20

0

Show file

    def format_output_url(cls, url, **kw):
        """
        Helper method used during redirect to ensure that the redirect
        url (assisted by frame busting code or javasctipt) will point
        to the correct domain and not have any extra dangling get
        parameters.  The extensions are also made to match and the
        resulting url is utf8 encoded.

        Node: for development purposes, also checks that the port
        matches the request port
        """
        u = UrlParser(url)

        if u.is_reddit_url():
            # make sure to pass the port along if not 80
            if not kw.has_key('port'):
                kw['port'] = request.port

            # disentagle the cname (for urls that would have
            # cnameframe=1 in them)
            u.mk_cname(**kw)

            # make sure the extensions agree with the current page
            if c.extension:
                u.set_extension(c.extension)

        # unparse and encode it un utf8
        rv = _force_unicode(u.unparse()).encode('utf8')
        if "\n" in rv or "\r" in rv:
            abort(400)
        return rv

Example #21

0

Show file

    def purge_url(self, url):
        """Purge an image (by url) from imgix.

        Reference: http://www.imgix.com/docs/tutorials/purging-images

        Note that as mentioned in the imgix docs, in order to remove
        an image, this function should be used *after* already
        removing the image from our source, or imgix will just re-fetch
        and replace the image with a new copy even after purging.
        """

        p = UrlParser(url)

        if p.hostname == g.imgix_domain:
            p.hostname = g.imgix_purge_domain
        elif p.hostname == g.imgix_gif_domain:
            p.hostname = g.imgix_gif_purge_domain

        url = p.unparse()

        requests.post(
            "https://api.imgix.com/v2/image/purger",
            auth=(g.secrets["imgix_api_key"], ""),
            data={"url": url},
        )

Example #22

0

Show file

    def POST_request_promo(self, srnames, is_mobile_web, platform, loid,
                           is_refresh):
        self.OPTIONS_request_promo()

        if not srnames:
            return

        # backwards compat
        if platform is None:
            platform = "mobile_web" if is_mobile_web else "desktop"

        srnames = srnames.split('+')

        # request multiple ads in case some are hidden by the builder due
        # to the user's hides/preferences
        response = adzerk_request(srnames,
                                  self.get_uid(loid),
                                  platform=platform)

        if not response:
            g.stats.simple_event('adzerk.request.no_promo')
            return

        # for adservers, adzerk returns markup so we pass it to the client
        if isinstance(response, AdserverResponse):
            g.stats.simple_event('adzerk.request.adserver')
            return responsive(response.body)

        res_by_campaign = {r.campaign: r for r in response}
        adserver_click_urls = {r.campaign: r.click_url for r in response}
        tuples = [promote.PromoTuple(r.link, 1., r.campaign) for r in response]
        builder = CampaignBuilder(tuples,
                                  wrap=default_thing_wrapper(),
                                  keep_fn=promote.promo_keep_fn,
                                  num=1,
                                  skip=True)
        listing = LinkListing(builder, nextprev=False).listing()
        promote.add_trackers(listing.things,
                             c.site,
                             adserver_click_urls=adserver_click_urls)
        promote.update_served(listing.things)
        if listing.things:
            g.stats.simple_event('adzerk.request.valid_promo')
            if is_refresh:
                g.stats.simple_event('adzerk.request.auto_refresh')

            w = listing.things[0]
            r = res_by_campaign[w.campaign]

            up = UrlParser(r.imp_pixel)
            up.hostname = "pixel.redditmedia.com"
            w.adserver_imp_pixel = up.unparse()
            w.adserver_upvote_pixel = r.upvote_pixel
            w.adserver_downvote_pixel = r.downvote_pixel
            w.adserver_click_url = r.click_url
            w.num = ""
            return responsive(w.render(), space_compress=True)
        else:
            g.stats.simple_event('adzerk.request.skip_promo')

Example #23

0

Show file

File: media.py Project: freshy969/saidit

def allowed_media_preview_url(url):
    p = UrlParser(url)
    if p.has_static_image_extension():
        return True
    for allowed_domain in g.media_preview_domain_whitelist:
        if is_subdomain(p.hostname, allowed_domain):
            return True
    return False

Example #24

0

Show file

    def GET_framebuster(self):
        if c.site.domain and c.user_is_loggedin:
            u = UrlParser(c.site.path + "/frame")
            u.put_in_frame()
            c.cname = True
            return self.redirect(u.unparse())

        return "fail"

Example #25

0

Show file

    def resize_image(self, image, width=None, file_type=None, censor_nsfw=False,
                     max_ratio=None):
        url = UrlParser(image['url'])
        is_gif = url.path.endswith('.gif') and (file_type == 'mp4' or not file_type)

        if is_gif:
            url.hostname = g.imgix_gif_domain
        else:
            url.hostname = g.imgix_domain

        # Let's encourage HTTPS; it's cool, works just fine on HTTP pages, and
        # will prevent insecure content warnings on HTTPS pages.
        url.scheme = 'https'

        # g.s3_media_direct affects how preview image urls are stored
        # True: http://{s3_media_domain}/mybucket/helloworld.jpg
        # False: http://mybucket/helloworld.jpg
        # If it's True, we'll need to strip the bucket out of the path
        if g.s3_media_direct:
            path_parts = url.path.split('/')
            path_parts.pop(1)
            url.path = '/'.join(path_parts)

        if max_ratio:
            url.update_query(fit='crop')
            # http://www.imgix.com/docs/reference/size#param-crop
            url.update_query(crop='faces,entropy')
            url.update_query(arh=max_ratio)

        if width:
            if width > image['width']:
                raise NotLargeEnough()
            # http://www.imgix.com/docs/reference/size#param-w
            url.update_query(w=width)

        if file_type and file_type in ('gif', 'jpg', 'png', 'mp4'):
            url.update_query(fm=file_type)

        # We need to disable fragmented mp4s for proper playback in Firefox
        if file_type == 'mp4':
            url.update_query(**{'mp4-fragmented': 'false'})

        if censor_nsfw:
            # Do an initial blur to make sure we're getting rid of icky
            # details.
            #
            # http://www.imgix.com/docs/reference/stylize#param-blur
            url.update_query(blur=600)
            # And then add pixellation to help the image compress well.
            #
            # http://www.imgix.com/docs/reference/stylize#param-px
            url.update_query(px=32)
        if g.imgix_signing:
            if is_gif:
                url = self._sign_url(url, g.secrets['imgix_gif_signing_token'])
            else:
                url = self._sign_url(url, g.secrets['imgix_signing_token'])
        return url.unparse()

Example #26

0

Show file

File: template_helpers.py Project: z0r0/saidit

def add_sr(path,
           sr_path=True,
           nocname=False,
           force_hostname=False,
           retain_extension=True,
           force_https=False,
           force_extension=None):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.

     * nocname: deprecated.

     * force_hostname: if True, force the url's hostname to be updated
       even if it is already set in the path. If false, the path will still
       have its domain updated if no hostname is specified in the url.

     * retain_extension: if True, sets the extention according to
       c.render_style.

     * force_https: force the URL scheme to https

    For caching purposes: note that this function uses:
      c.render_style, c.site.name

    """
    # don't do anything if it is just an anchor
    if path.startswith(('#', 'javascript:')):
        return path

    u = UrlParser(path)
    if sr_path:
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        u.hostname = get_domain(subreddit=False)

    if (c.secure and u.is_reddit_url()) or force_https:
        u.scheme = "https"

    if force_extension is not None:
        u.set_extension(force_extension)
    elif retain_extension:
        if c.render_style == 'mobile':
            u.set_extension('mobile')

        elif c.render_style == 'compact':
            u.set_extension('compact')

        # SaidIt CUSTOM
        elif c.render_style == g.extension_subdomain_mobile_v2_render_style:
            u.set_extension(g.extension_subdomain_mobile_v2_render_style)

    return u.unparse()

Example #27

0

Show file

File: indextank.py Project: aguamar/reddit

def maps_from_things(things, boost_only=False):
    """We only know how to do links for now"""

    maps = []

    if not boost_only:
        # we can avoid looking these up at all if only the boosts were
        # updated

        author_ids = [
            thing.author_id for thing in things if hasattr(thing, 'author_id')
        ]
        accounts = Account._byID(author_ids, data=True, return_dict=True)

        sr_ids = [thing.sr_id for thing in things if hasattr(thing, 'sr_id')]
        srs = Subreddit._byID(sr_ids, data=True, return_dict=True)

    for thing in things:
        try:
            d = dict(fullname=thing._fullname,
                     ups=thing._ups,
                     downs=thing._downs,
                     num_comments=getattr(thing, 'num_comments', 0))

            if not boost_only:
                a = accounts[thing.author_id]
                sr = srs[thing.sr_id]

                if a._deleted:
                    # if the author was deleted, we won't updated it in
                    # indextank at all
                    continue

                d.update(
                    dict(
                        fullname=thing._fullname,
                        subreddit=sr.name,
                        reddit=sr.name,
                        text=' '.join([thing.title, a.name, sr.name]),
                        author=a.name,
                        timestamp=thing._date.strftime("%s"),
                        sr_id=str(thing.sr_id),
                        over18=yesno(sr.over_18),
                        is_self=yesno(thing.is_self),
                    ))
                if thing.is_self:
                    d['site'] = g.domain
                    if thing.selftext:
                        d['selftext'] = thing.selftext
                else:
                    d['url'] = thing.url
                    d['site'] = ' '.join(
                        UrlParser(thing.url).domain_permutations())
            maps.append(d)
        except AttributeError:
            pass
    return maps

Example #28

0

Show file

def add_sr(path,
           sr_path=True,
           nocname=False,
           force_hostname=False,
           retain_extension=True,
           force_https=False):
    """
    Given a path (which may be a full-fledged url or a relative path),
    parses the path and updates it to include the subreddit path
    according to the rules set by its arguments:

     * sr_path: if a cname is not used for the domain, updates the
       path to include c.site.path.

     * nocname: when updating the hostname, overrides the value of
       c.cname to set the hostname to g.domain.  The default behavior
       is to set the hostname consistent with c.cname.

     * force_hostname: if True, force the url's hostname to be updated
       even if it is already set in the path, and subject to the
       c.cname/nocname combination.  If false, the path will still
       have its domain updated if no hostname is specified in the url.

     * retain_extension: if True, sets the extention according to
       c.render_style.

     * force_https: force the URL scheme to https

    For caching purposes: note that this function uses:
      c.cname, c.render_style, c.site.name
    """
    # don't do anything if it is just an anchor
    if path.startswith(('#', 'javascript:')):
        return path

    u = UrlParser(path)
    if sr_path and (nocname or not c.cname):
        u.path_add_subreddit(c.site)

    if not u.hostname or force_hostname:
        if c.secure:
            u.hostname = request.host
        else:
            u.hostname = get_domain(cname=(c.cname and not nocname),
                                    subreddit=False)

    if (c.secure and u.is_reddit_url()) or force_https:
        u.scheme = "https"

    if retain_extension:
        if c.render_style == 'mobile':
            u.set_extension('mobile')

        elif c.render_style == 'compact':
            u.set_extension('compact')

    return u.unparse()

Example #29

0

Show file

 def _key_from_url(cls, url):
     if not utils.domain(url) in g.case_sensitive_domains:
         keyurl = _force_utf8(UrlParser.base_url(url.lower()))
     else:
         # Convert only hostname to lowercase
         up = UrlParser(url)
         up.hostname = up.hostname.lower()
         keyurl = _force_utf8(UrlParser.base_url(up.unparse()))
     return keyurl

Example #30

0

Show file

File: betamode.py Project: DigDug101/reddit-plugin-betamode

def redirect_to_host(hostname, path=None):
    """Redirect (302) to the specified path and host."""
    if path is None:
        path = request.path

    u = UrlParser(path)
    u.hostname = hostname

    # 307 redirect so request method is retained
    abort(307, location=u.unparse())