Python UrlParser.domain_permutations Examples

Programming Language: Python

Namespace/Package Name: r2.lib.utils

Class/Type: UrlParser

Method/Function: domain_permutations

Examples at hotexamples.com: 8

Python UrlParser.domain_permutations - 8 examples found. These are the top rated real world Python examples of r2.lib.utils.UrlParser.domain_permutations extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

UrlParser(30)

update_query(20)

hostname(15)

is_reddit_url(10)

path_extension(7)

set_extension(6)

put_in_frame(6)

scheme(5)

domain_permutations(4)

fragment(3)

unparse(3)

path_add_subreddit(3)

base_url(3)

switch_subdomain_by_extension(2)

path(2)

mk_cname(2)

is_canonically_equivalent(2)

has_image_extension(2)

path_add_subdigg(1)

path_add_subsciteit(1)

is_sciteit_url(1)

port(1)

is_exposed(1)

is_digg_url(1)

has_static_image_extension(1)

params(1)

Example #1

Show file

File: voting.py Project: 13steinj/reddit

    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )

Example #2

Show file

    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS
                    ],
                    insert_items=links,
                )

Example #3

Show file

def add_to_domain_query_q(link):
    parsed = UrlParser(link.url)
    if not parsed.domain_permutations():
        # no valid domains found
        return

    if g.shard_domain_query_queues:
        domain_shard = hash(parsed.hostname) % 10
        queue_name = "domain_query_%s_q" % domain_shard
    else:
        queue_name = "domain_query_q"
    amqp.add_item(queue_name, link._fullname)

Example #4

Show file

File: voting.py Project: 13steinj/reddit

def add_to_domain_query_q(link):
    parsed = UrlParser(link.url)
    if not parsed.domain_permutations():
        # no valid domains found
        return

    if g.shard_domain_query_queues:
        domain_shard = hash(parsed.hostname) % 10
        queue_name = "domain_query_%s_q" % domain_shard
    else:
        queue_name = "domain_query_q"
    amqp.add_item(queue_name, link._fullname)

Example #5

Show file

File: mr_top.py Project: z0r0/saidit

    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_upvotes = upvotes(thing.ups)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_upvote_path, interval, thing.author_id),
                   thing_upvotes, thing.timestamp, fname)
            yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_controversial_path, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/%s/%s/%d" % (g.voting_upvote_path, interval, thing.sr_id),
                       thing_upvotes, thing.timestamp, fname)
                yield ("sr/link/%s/%s/%d" % (g.voting_controversial_path, interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    try:
                        parsed = UrlParser(thing.url)
                    except ValueError:
                        continue

                    for domain in parsed.domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/%s/%s/%s" % (g.voting_upvote_path, interval, domain),
                               thing_upvotes, thing.timestamp, fname)
                        yield ("domain/link/%s/%s/%s" % (g.voting_controversial_path, interval, domain),
                               thing_controversy, thing.timestamp, fname)

Example #6

Show file

File: mr_top.py Project: Sheesha1992/reddit

    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    try:
                        parsed = UrlParser(thing.url)
                    except ValueError:
                        continue

                    for domain in parsed.domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" % (interval, domain),
                               thing_controversy, thing.timestamp, fname)

Example #7

Show file

File: mr_top.py Project: zeantsoi/reddit

    def time_listing_iter(self, thing, cutoff_by_interval):
        if thing.deleted:
            return

        thing_cls = self.thing_cls
        fname = make_fullname(thing_cls, thing.thing_id)
        scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()}

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            for sort, value in scores.iteritems():
                aid = thing.author_id
                key = self.make_key("user", sort, interval, aid)
                yield (key, value, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                for sort, value in scores.iteritems():
                    sr_id = thing.sr_id
                    key = self.make_key("sr", sort, interval, sr_id)
                    yield (key, value, thing.timestamp, fname)

                if not thing.url:
                    continue
                try:
                    parsed = UrlParser(thing.url)
                except ValueError:
                    continue

                for d in parsed.domain_permutations():
                    for sort, value in scores.iteritems():
                        key = self.make_key("domain", sort, interval, d)
                        yield (key, value, thing.timestamp, fname)

Example #8

Show file

File: mr_top.py Project: zeantsoi/reddit

    def time_listing_iter(self, thing, cutoff_by_interval):
        if thing.deleted:
            return

        thing_cls = self.thing_cls
        fname = make_fullname(thing_cls, thing.thing_id)
        scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()}

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            for sort, value in scores.iteritems():
                aid = thing.author_id
                key = self.make_key("user", sort, interval, aid)
                yield (key, value, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                for sort, value in scores.iteritems():
                    sr_id = thing.sr_id
                    key = self.make_key("sr", sort, interval, sr_id)
                    yield (key, value, thing.timestamp, fname)

                if not thing.url:
                    continue
                try:
                    parsed = UrlParser(thing.url)
                except ValueError:
                    continue

                for d in parsed.domain_permutations():
                    for sort, value in scores.iteritems():
                        key = self.make_key("domain", sort, interval, d)
                        yield (key, value, thing.timestamp, fname)