Python sgmの例、r2.lib.sgm.sgm Pythonの例

コード例 #1

0

ファイルを表示

ファイル: comment_tree.py プロジェクト: zeantsoi/reddit

def moderator_messages(sr_ids):
    from r2.models import Subreddit

    srs = Subreddit._byID(sr_ids)
    sr_ids = [
        sr_id for sr_id, sr in srs.iteritems()
        if sr.is_moderator_with_perms(c.user, 'mail')
    ]

    def multi_load_tree(sr_ids):
        res = {}
        for sr_id in sr_ids:
            trees = subreddit_messages_nocache(srs[sr_id])
            if trees:
                res[sr_id] = trees
        return res

    res = sgm(g.permacache,
              sr_ids,
              miss_fn=multi_load_tree,
              prefix=sr_messages_key(""))

    res = {
        sr_id: filter_new_modmail(srs[sr_id], trees)
        for sr_id, trees in res.iteritems()
    }

    return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)

コード例 #2

0

ファイルを表示

ファイル: recommender.py プロジェクト: 0xcd03/reddit

    def for_srs(cls, srid36, to_omit, count, source, match_set=True):
        # It's usually better to use get_recommendations() than to call this
        # function directly because it does privacy filtering.

        srid36s = tup(srid36)
        to_omit = set(to_omit)
        to_omit.update(srid36s)  # don't show the originals
        rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s]

        # fetch multiple sets of recommendations, one for each input srid36
        d = sgm(g.cache, rowkeys, SRRecommendation._byID, prefix='srr.')
        rows = d.values()

        if match_set:
            sorted_recs = SRRecommendation._merge_and_sort_by_count(rows)
            # heuristic: if input set is large, rec should match more than one
            min_count = math.floor(.1 * len(srid36s))
            sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count)
        else:
            sorted_recs = SRRecommendation._merge_roundrobin(rows)
        # remove duplicates and ids listed in to_omit
        filtered = []
        for r in sorted_recs:
            if r not in to_omit:
                filtered.append(r)
                to_omit.add(r)
        return filtered[:count]

コード例 #3

0

ファイルを表示

ファイル: recommender.py プロジェクト: shadowstar64/reddit

    def for_srs(cls, srid36, to_omit, count, source, match_set=True):
        # It's usually better to use get_recommendations() than to call this
        # function directly because it does privacy filtering.

        srid36s = tup(srid36)
        to_omit = set(to_omit)
        to_omit.update(srid36s)  # don't show the originals
        rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s]

        # fetch multiple sets of recommendations, one for each input srid36
        d = sgm(g.cache, rowkeys, SRRecommendation._byID, prefix='srr.')
        rows = d.values()

        if match_set:
            sorted_recs = SRRecommendation._merge_and_sort_by_count(rows)
            # heuristic: if input set is large, rec should match more than one
            min_count = math.floor(.1 * len(srid36s))
            sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count)
        else:
            sorted_recs = SRRecommendation._merge_roundrobin(rows)
        # remove duplicates and ids listed in to_omit
        filtered = []
        for r in sorted_recs:
            if r not in to_omit:
                filtered.append(r)
                to_omit.add(r)
        return filtered[:count]

コード例 #4

0

ファイルを表示

def organization_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    organization_by_ip = sgm(
        cache=g.gencache,
        keys=ips,
        miss_fn=_organization_by_ips,
        prefix='geoip:org_',
        time=GEOIP_CACHE_TIME,
        ignore_set_errors=True,
    )
    if is_single and organization_by_ip:
        return organization_by_ip[ips[0]]
    else:
        return organization_by_ip

コード例 #5

0

ファイルを表示

ファイル: geoip.py プロジェクト: AHAMED750/reddit

def location_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    location_by_ip = sgm(
        cache=g.gencache,
        keys=ips,
        miss_fn=_location_by_ips,
        prefix='geoip:loc_',
        time=GEOIP_CACHE_TIME,
        ignore_set_errors=True,
    )
    if is_single and location_by_ip:
        return location_by_ip[ips[0]]
    else:
        return location_by_ip

コード例 #6

0

ファイルを表示

ファイル: promote.py プロジェクト: AHAMED750/reddit

def get_live_promotions(sr_names):
    sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names]
    promos_by_sanitized_name = sgm(
        cache=g.gencache,
        keys=sanitized_names,
        miss_fn=_get_live_promotions,
        prefix='srpromos:',
        time=60,
        stale=True,
    )
    promos_by_srname = {
        REVERSED_NAMES.get(name, name): val
        for name, val in promos_by_sanitized_name.iteritems()
    }
    return itertools.chain.from_iterable(promos_by_srname.itervalues())

コード例 #7

0

ファイルを表示

def normalized_rising(sr_ids):
    if not sr_ids:
        return []

    tuples_by_srid = sgm(
        cache=g.gencache,
        keys=sr_ids,
        miss_fn=get_rising_tuples,
        prefix='rising:',
        time=g.page_cache_time,
    )

    merged = heapq.merge(*tuples_by_srid.values())

    return [link_name for norm_score, score, link_name in merged]

コード例 #8

0

ファイルを表示

def get_live_promotions(sr_names):
    sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names]
    promos_by_sanitized_name = sgm(
        cache=g.gencache,
        keys=sanitized_names,
        miss_fn=_get_live_promotions,
        prefix='srpromos:',
        time=60,
        stale=True,
    )
    promos_by_srname = {
        REVERSED_NAMES.get(name, name): val
        for name, val in promos_by_sanitized_name.iteritems()
    }
    return itertools.chain.from_iterable(promos_by_srname.itervalues())

コード例 #9

0

ファイルを表示

ファイル: comment_tree.py プロジェクト: KeyserSosa/reddit

def moderator_messages(sr_ids):
    from r2.models import Subreddit

    srs = Subreddit._byID(sr_ids)
    sr_ids = [sr_id for sr_id, sr in srs.iteritems() if sr.is_moderator_with_perms(c.user, "mail")]

    def multi_load_tree(sr_ids):
        res = {}
        for sr_id in sr_ids:
            trees = subreddit_messages_nocache(srs[sr_id])
            if trees:
                res[sr_id] = trees
        return res

    res = sgm(g.permacache, sr_ids, miss_fn=multi_load_tree, prefix=sr_messages_key(""))

    return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)

コード例 #10

0

ファイルを表示

ファイル: thing.py プロジェクト: kcraig108/reddit

        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True,
                        thing_data=True, thing_stale=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""

            cache_key_lookup = dict()

            # We didn't find these keys in the cache, look them up in the
            # database
            def lookup_rel_ids(uncached_keys):
                rel_ids = {}

                # Lookup thing ids and name from cache key
                t1_ids = set()
                t2_ids = set()
                names = set()
                for cache_key in uncached_keys:
                    (thing1, thing2, name) = cache_key_lookup[cache_key]
                    t1_ids.add(thing1._id)
                    t2_ids.add(thing2._id)
                    names.add(name)

                q = cls._query(
                        cls.c._thing1_id == t1_ids,
                        cls.c._thing2_id == t2_ids,
                        cls.c._name == names)

                for rel in q:
                    rel_ids[cls._fast_cache_key_from_parts(
                        cls.__name__,
                        rel._thing1_id,
                        rel._thing2_id,
                        str(rel._name)
                    )] = rel._id

                for cache_key in uncached_keys:
                    if cache_key not in rel_ids:
                        rel_ids[cache_key] = None

                return rel_ids

            # make lookups for thing ids and names
            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            names = map(str, tup(name))

            # permute all of the pairs via cartesian product
            rel_tuples = itertools.product(
                thing1_dict.values(),
                thing2_dict.values(),
                names)

            # create cache keys for all permutations and initialize lookup
            for t in rel_tuples:
                thing1, thing2, name = t
                cache_key = cls._fast_cache_key_from_parts(
                    cls.__name__,
                    thing1._id,
                    thing2._id,
                    name)
                cache_key_lookup[cache_key] = t

            # get the relation ids from the cache or query the db
            res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids)

            # get the relation objects
            rel_ids = {rel_id for rel_id in res.itervalues()
                              if rel_id is not None}
            rels = cls._byID_rel(
                rel_ids,
                eager_load=eager_load,
                thing_stale=thing_stale)

            # Takes aggregated results from cache and db (res) and transforms
            # the values from ids to Relations.
            res_obj = {}
            for cache_key, rel_id in res.iteritems():
                t = cache_key_lookup[cache_key]
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[t] = rel

            return res_obj

コード例 #11

0

ファイルを表示

ファイル: tdb_cassandra.py プロジェクト: AHAMED750/reddit

    def _byID(cls, ids, return_dict=True, properties=None):
        ids, is_single = tup(ids, True)

        if properties is not None:
            asked_properties = frozenset(properties)
            willask_properties = set(properties)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) or str(_id) for _id in ids)

        def reject_bad_partials(cached, still_need):
            # tell sgm that the match it found in the cache isn't good
            # enough if it's a partial that doesn't include our
            # properties. we still need to look those items up to get
            # the properties that we're after
            stillfind = set()

            for k, v in cached.iteritems():
                if properties is None:
                    if v._partial is not None:
                        # there's a partial in the cache but we're not
                        # looking for partials
                        stillfind.add(k)
                elif v._partial is not None and not asked_properties.issubset(v._partial):
                    # we asked for a partial, and this is a partial,
                    # but it doesn't have all of the properties that
                    # we need
                    stillfind.add(k)

                    # other callers in our request are now expecting
                    # to find the properties that were on that
                    # partial, so we'll have to preserve them
                    for prop in v._partial:
                        willask_properties.add(prop)

            for k in stillfind:
                del cached[k]
                still_need.add(k)

        def lookup(l_ids):
            if properties is None:
                rows = cls._cf.multiget(l_ids, column_count=max_column_count)

                # if we got max_column_count columns back for a row, it was
                # probably clipped. in this case, we should fetch the remaining
                # columns for that row and add them to the result.
                if cls._fetch_all_columns:
                    for key, row in rows.iteritems():
                        if len(row) == max_column_count:
                            last_column_seen = next(reversed(row))
                            cols = cls._cf.xget(key,
                                                column_start=last_column_seen,
                                                buffer_size=max_column_count)
                            row.update(cols)
            else:
                rows = cls._cf.multiget(l_ids, columns = willask_properties)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                if properties is not None:
                    # make sure that the item is marked as a _partial
                    t._partial = willask_properties
                l_ret[t._id] = t

            return l_ret

        ret = sgm(
            cache=cls._local_cache,
            keys=ids,
            miss_fn=lookup,
            prefix=cls._cache_prefix(),
            found_fn=reject_bad_partials,
        )

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__,
                                        ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]
        elif return_dict:
            return ret
        else:
            return filter(None, (ret.get(i) for i in ids))