Example #1
0
def location_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips, prefix="location_by_ip", time=GEOIP_CACHE_TIME)
    if is_single and location_by_ip:
        return location_by_ip[ips[0]]
    else:
        return location_by_ip
Example #2
0
    def _byID(cls, ids):
        ids, is_single = tup(ids, True)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            else:
                return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) and str(_id) for _id in ids)

        def lookup(l_ids):
            rows = cls.cf.multiget(l_ids, column_count=max_column_count)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                l_ret[t._id] = t

            return l_ret

        ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix())

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__,
                                        ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]

        return ret
Example #3
0
def get_live_promotions(sr_names):
    sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names]
    promos_by_sanitized_name = sgm(
        g.cache, sanitized_names, miss_fn=_get_live_promotions, prefix="live_promotions", time=60, stale=True
    )
    promos_by_srname = {REVERSED_NAMES.get(name, name): val for name, val in promos_by_sanitized_name.iteritems()}
    return itertools.chain.from_iterable(promos_by_srname.itervalues())
Example #4
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cache, ids, items_db, prefix)

        #check to see if we found everything we asked for
        if any(i not in bases for i in ids):
            missing = [i for i in ids if i not in bases]
            raise NotFound, '%s %s' % (cls.__name__, missing)

        if data:
            need = [v for v in bases.itervalues() if not v._loaded]
            if need:
                cls._load_multi(need)

        #e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #5
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None,
              stale=False, ignore_missing=False):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound('huge thing_id in %r' % ids)

        def count_found(ret, still_need):
            cls._cache.stats.cache_report(
                hits=len(ret), misses=len(still_need),
                cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cls._cache, ids, items_db, prefix, stale=stale,
                    found_fn=count_found)

        # Check to see if we found everything we asked for
        missing = []
        for i in ids:
            if i not in bases:
                missing.append(i)
            elif bases[i] and bases[i]._id != i:
                g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" %
                            (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)
        for i in missing:
            ids.remove(i)

        if data:
            need = []
            for v in bases.itervalues():
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)

        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]] if ids else None
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #6
0
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None):
    timer = g.stats.get_timer("normalized_hot")
    timer.start()

    if not sr_ids:
        return []

    if ageweight and feature.is_enabled("scaled_normalized_hot"):
        tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight)
    else:
        tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples,
                             prefix='normalized_hot', time=g.page_cache_time)

    if obey_age_limit:
        cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE)
        oldest = epoch_seconds(cutoff)
    else:
        oldest = 0.

    merged = heapq.merge(*tuples_by_srid.values())
    generator = (link_name for ehot, hot, link_name, timestamp in merged
                           if timestamp > oldest)
    ret = list(itertools.islice(generator, MAX_LINKS))
    timer.stop()
    return ret
Example #7
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound("huge thing_id in %r" % ids)

        def count_found(ret, still_need):
            cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name="sgm.%s" % cls.__name__)

        if not cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cache, ids, items_db, prefix, stale=stale, found_fn=count_found)

        # check to see if we found everything we asked for
        for i in ids:
            if i not in bases:
                missing = [i for i in ids if i not in bases]
                raise NotFound, "%s %s" % (cls.__name__, missing)
            if bases[i] and bases[i]._id != i:
                g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()

        if data:
            need = []
            for v in bases.itervalues():
                v._asked_for_data = True
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)
        ### The following is really handy for debugging who's forgetting data=True:
        #       else:
        #           for v in bases.itervalues():
        #                if v._id in (1, 2, 123):
        #                    raise ValueError

        # e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #8
0
    def _by_name(cls, names, stale=False, _update = False):
        '''
        Usages: 
        1. Subreddit._by_name('funny') # single sr name
        Searches for a single subreddit. Returns a single Subreddit object or 
        raises NotFound if the subreddit doesn't exist.
        2. Subreddit._by_name(['aww','iama']) # list of sr names
        Searches for a list of subreddits. Returns a dict mapping srnames to 
        Subreddit objects. Items that were not found are ommitted from the dict.
        If no items are found, an empty dict is returned.
        '''
        #lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        for name in names:
            ascii_only = str(name.decode("ascii", errors="ignore"))
            lname = ascii_only.lower()

            if lname in cls._specials:
                ret[name] = cls._specials[lname]
            elif len(lname) > Subreddit.MAX_SRNAME_LENGTH:
                g.log.debug("Subreddit._by_name() ignoring invalid srname (too long): %s", lname)
            else:
                to_fetch[lname] = name

        if to_fetch:
            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames,
                               cls.c._spam == (True, False),
                               limit = len(lnames),
                               data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames,)
                    raise

                return dict((sr.name.lower(), sr._id)
                            for sr in srs)

            srs = {}
            srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale)
            if srids:
                srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, 'Subreddit %s' % name
        else:
            return ret
Example #9
0
def organization_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    organization_by_ip = sgm(g.cache, ips, miss_fn=_organization_by_ips,
                             prefix='organization_by_ip',
                             time=GEOIP_CACHE_TIME)
    if is_single and organization_by_ip:
        return organization_by_ip[ips[0]]
    else:
        return organization_by_ip
Example #10
0
        def _fast_query(cls, thing1s, thing2s, name, data=True):
            """looks up all the relationships between thing1_ids and thing2_ids
            and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            pairs = set((x, y, n)
                        for x in thing1_ids
                        for y in thing2_ids
                        for n in name)

            def items_db(pairs):
                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                q = cls._query(cls.c._thing1_id == t1_ids,
                               cls.c._thing2_id == t2_ids,
                               cls.c._name == names,
                               eager_load = True,
                               data = data)

                rel_ids = {}
                for rel in q:
                    #TODO an alternative for multiple
                    #relations with the same keys
                    #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), [])
                    #l.append(rel._id)
                    rel_ids[(rel._thing1._id, rel._thing2._id, rel._name)] = rel._id
                
                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None
                        
                return rel_ids

            res = sgm(cache, pairs, items_db, prefix)
            #convert the keys back into objects
            #we can assume the rels will be in the cache and just call
            #_byID lots
            res_obj = {}
            for k, rid in res.iteritems():
                obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2])
                res_obj[obj_key] = cls._byID(rid, data=data) if rid else None
                
            return res_obj
Example #11
0
def normalized_rising(sr_ids):
    if not sr_ids:
        return []

    tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_rising_tuples,
                         prefix='normalized_rising', time=g.page_cache_time)

    merged = heapq.merge(*tuples_by_srid.values())

    return [link_name for norm_score, score, link_name in merged]
Example #12
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            # permute all of the pairs
            pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name)

            def lookup_rel_ids(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names)
                else:
                    q = []

                for rel in q:
                    rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            # get the relation ids from the cache or query the db
            res = sgm(cls._cache, pairs, lookup_rel_ids, prefix)

            # get the relation objects
            rel_ids = {rel_id for rel_id in res.itervalues() if rel_id is not None}
            rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data)

            res_obj = {}
            for (thing1_id, thing2_id, name), rel_id in res.iteritems():
                pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name)
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[pair] = rel

            return res_obj
Example #13
0
def location_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    location_by_ip = sgm(g.cache,
                         ips,
                         miss_fn=_location_by_ips,
                         prefix='location_by_ip',
                         time=GEOIP_CACHE_TIME)
    if is_single and location_by_ip:
        return location_by_ip[ips[0]]
    else:
        return location_by_ip
Example #14
0
def organization_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    organization_by_ip = sgm(g.cache,
                             ips,
                             miss_fn=_organization_by_ips,
                             prefix='organization_by_ip',
                             time=GEOIP_CACHE_TIME)
    if is_single and organization_by_ip:
        return organization_by_ip[ips[0]]
    else:
        return organization_by_ip
Example #15
0
 def get(cls, sr_ids):
     """Return a dictionary of sr_id -> list of ads for each of sr_ids"""
     # Mangling: Caller convention is to use empty string for FRONT_PAGE
     sr_ids = [(sr_id or cls.FRONT_PAGE) for sr_id in sr_ids]
     adweights = sgm(cls.cache, sr_ids, cls._load_multi,
                     prefix=cls.cache_prefix, stale=True)
     results = {sr_id: cls.from_columns(adweights[sr_id])
                for sr_id in adweights}
     if cls.FRONT_PAGE in results:
         results[''] = results.pop(cls.FRONT_PAGE)
     return results
Example #16
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound('huge thing_id in %r' % ids)

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cache, ids, items_db, prefix)

        #check to see if we found everything we asked for
        for i in ids:
            if i not in bases:
                missing = [i for i in ids if i not in bases]
                raise NotFound, '%s %s' % (cls.__name__, missing)
            if bases[i] and bases[i]._id != i:
                g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" %
                            (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()


        if data:
            need = []
            for v in bases.itervalues():
                v._asked_for_data = True
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)
### The following is really handy for debugging who's forgetting data=True:
#       else:
#           for v in bases.itervalues():
#                if v._id in (1, 2, 123):
#                    raise ValueError

        #e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #17
0
        def _fast_query_all_names(cls, thing1s, thing2s, data=True):
            """looks up all the relationships between thing1_ids and thing2_ids
            and caches them
            """
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in thing1s)
            thing2_dict = dict((t._id, t) for t in thing2s)

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            pairs = set((x, y)
                        for x in thing1_ids
                        for y in thing2_ids)

            def items_db(pairs):
                t1_ids = set()
                t2_ids = set()
                for t1, t2 in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)

                q = cls._query(cls.c._thing1_id == t1_ids,
                               cls.c._thing2_id == t2_ids,
                               eager_load = True,
                               data = data)

                rel_ids = {}
                for rel in q:
                    l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), [])
                    l.append(rel._id)

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = []

                return rel_ids

            res = sgm(cache, pairs, items_db, prefix)
            #convert the keys back into objects
            #we can assume the rels will be in the cache and just call
            #_byID lots
            res_obj = {}
            for k, rids in res.iteritems():
                for rid in rids:
                    obj_key = (thing1_dict[k[0]], thing2_dict[k[1]])
                    result = cls._byID(rid, data=data) if rid else None
                    if res_obj.get(obj_key) is None:
                        res_obj[obj_key] = result

            return res_obj
Example #18
0
def location_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    location_by_ip = sgm(
        cache=g.gencache,
        keys=ips,
        miss_fn=_location_by_ips,
        prefix='geoip:loc_',
        time=GEOIP_CACHE_TIME,
    )
    if is_single and location_by_ip:
        return location_by_ip[ips[0]]
    else:
        return location_by_ip
Example #19
0
def organization_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    organization_by_ip = sgm(
        cache=g.gencache,
        keys=ips,
        miss_fn=_organization_by_ips,
        prefix='geoip:org_',
        time=GEOIP_CACHE_TIME,
    )
    if is_single and organization_by_ip:
        return organization_by_ip[ips[0]]
    else:
        return organization_by_ip
Example #20
0
def normalized_rising(sr_ids):
    if not sr_ids:
        return []

    tuples_by_srid = sgm(g.cache,
                         sr_ids,
                         miss_fn=get_rising_tuples,
                         prefix='normalized_rising',
                         time=g.page_cache_time)

    merged = heapq.merge(*tuples_by_srid.values())

    return [link_name for norm_score, score, link_name in merged]
Example #21
0
def get_live_promotions(sr_names):
    sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names]
    promos_by_sanitized_name = sgm(g.cache,
                                   sanitized_names,
                                   miss_fn=_get_live_promotions,
                                   prefix='live_promotions',
                                   time=60,
                                   stale=True)
    promos_by_srname = {
        REVERSED_NAMES.get(name, name): val
        for name, val in promos_by_sanitized_name.iteritems()
    }
    return itertools.chain.from_iterable(promos_by_srname.itervalues())
Example #22
0
    def _by_name(cls, names, stale=False, _update=False):
        #lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        for name in names:
            lname = name.lower()

            if lname in cls._specials:
                ret[name] = cls._specials[lname]
            else:
                to_fetch[lname] = name

        if to_fetch:

            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames,
                               cls.c._spam == (True, False),
                               limit=len(lnames),
                               data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames, )
                    raise

                return dict((sr.name.lower(), sr._id) for sr in srs)

            srs = {}
            srids = sgm(g.cache,
                        to_fetch.keys(),
                        _fetch,
                        prefix='subreddit.byname',
                        stale=stale)
            if srids:
                srs = cls._byID(srids.values(),
                                data=True,
                                return_dict=False,
                                stale=stale)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, 'Subreddit %s' % name
        else:
            return ret
Example #23
0
    def _by_name(cls, names, _update = False):
        #lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        _specials = dict(friends = Friends,
                         randnsfw = RandomNSFW,
                         random = Random,
                         mod = Mod,
                         contrib = Contrib,
                         all = All)

        for name in names:
            lname = name.lower()

            if lname in _specials:
                ret[name] = _specials[lname]
            else:
                to_fetch[lname] = name

        if to_fetch:
            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames,
                               cls.c._spam == (True, False),
                               limit = len(lnames),
                               data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames,)
                    raise

                return dict((sr.name.lower(), sr._id)
                            for sr in srs)

            srs = {}
            srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname')
            if srids:
                srs = cls._byID(srids.values(), data=True, return_dict=False)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, 'Subreddit %s' % name
        else:
            return ret
Example #24
0
def moderator_messages(sr_ids):
    from r2.models import Subreddit

    def multi_load_tree(sr_ids):
        srs = Subreddit._byID(sr_ids, return_dict=False)
        res = {}
        for sr in srs:
            trees = subreddit_messages_nocache(sr)
            if trees:
                res[sr._id] = trees
        return res

    res = sgm(g.permacache, sr_ids, miss_fn=multi_load_tree, prefix=sr_messages_key(""))

    return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)
Example #25
0
 def get(cls, sr_ids):
     """Return a dictionary of sr_id -> list of ads for each of sr_ids"""
     # Mangling: Caller convention is to use empty string for FRONT_PAGE
     sr_ids = [(sr_id or cls.FRONT_PAGE) for sr_id in sr_ids]
     adweights = sgm(cls.cache,
                     sr_ids,
                     cls._load_multi,
                     prefix=cls.cache_prefix,
                     stale=True)
     results = {
         sr_id: cls.from_columns(adweights[sr_id])
         for sr_id in adweights
     }
     if cls.FRONT_PAGE in results:
         results[''] = results.pop(cls.FRONT_PAGE)
     return results
Example #26
0
def moderator_messages(sr_ids):
    from r2.models import Subreddit

    srs = Subreddit._byID(sr_ids)
    sr_ids = [sr_id for sr_id, sr in srs.iteritems() if sr.is_moderator_with_perms(c.user, "mail")]

    def multi_load_tree(sr_ids):
        res = {}
        for sr_id in sr_ids:
            trees = subreddit_messages_nocache(srs[sr_id])
            if trees:
                res[sr_id] = trees
        return res

    res = sgm(g.permacache, sr_ids, miss_fn=multi_load_tree, prefix=sr_messages_key(""))

    return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)
Example #27
0
def moderator_messages(user):
    from r2.models import Subreddit
    sr_ids = Subreddit.reverse_moderator_ids(user)

    def multi_load_tree(sr_ids):
        srs = Subreddit._byID(sr_ids, return_dict = False)
        res = {}
        for sr in srs:
            trees = subreddit_messages_nocache(sr)
            if trees:
                res[sr._id] = trees
        return res

    res = sgm(g.permacache, sr_ids, miss_fn = multi_load_tree,
              prefix = sr_messages_key(""))

    return sorted(chain(*res.values()), key = tree_sort_fn, reverse = True)
Example #28
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound('huge thing_id in %r' % ids)

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cache, ids, items_db, prefix)

        #check to see if we found everything we asked for
        for i in ids:
            if i not in bases:
                missing = [i for i in ids if i not in bases]
                raise NotFound, '%s %s' % (cls.__name__, missing)
            if bases[i] and bases[i]._id != i:
                g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" %
                            (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()


        if data:
            need = [v for v in bases.itervalues() if not v._loaded]
            if need:
                cls._load_multi(need)

        #e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #29
0
    def get_items(self, *a, **kw):
        """Wrapper around builder's get_items that caches the rendering."""
        builder_items = self.builder.get_items(*a, **kw)

        #render cache
        #fn to render non-boring items
        fullnames = {}
        for i in self.builder.item_iter(builder_items):
            rs = c.render_style
            key = i.render_class.cache_key(i)
            if key:
                fullnames[key + rs + c.lang] = i

        def render_items(names):
            r = {}
            for i in names:
                item = fullnames[i]
                r[i] = item.render()
            return r

        rendered_items = sgm(g.rendercache,
                             fullnames,
                             render_items,
                             'render_',
                             time=g.page_cache_time)

        #replace the render function
        for k, v in rendered_items.iteritems():

            def make_fn(v):
                default = c.render_style
                default_render = fullnames[k].render

                def r(style=default):
                    if style != c.render_style:
                        return default_render(style=style)
                    return v

                return r

            fullnames[k].render = make_fn(v)

        return builder_items
Example #30
0
def moderator_messages(sr_ids):
    from r2.models import Subreddit

    srs = Subreddit._byID(sr_ids)
    sr_ids = [sr_id for sr_id, sr in srs.iteritems()
              if sr.is_moderator_with_perms(c.user, 'mail')]

    def multi_load_tree(sr_ids):
        res = {}
        for sr_id in sr_ids:
            trees = subreddit_messages_nocache(srs[sr_id])
            if trees:
                res[sr_id] = trees
        return res

    res = sgm(g.permacache, sr_ids, miss_fn = multi_load_tree,
              prefix = sr_messages_key(""))

    return sorted(chain(*res.values()), key = tree_sort_fn, reverse = True)
Example #31
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            #avoid race condition when incrmenting int props (data int
            #props are set in load_multi)
            for prop in cls._int_props:
                keys = dict((i, getattr(item, prop))
                            for i, item in items.iteritems())
                cache.set_multi(keys, prefix + prop + '_' )

            return items

        bases = sgm(cache, ids, items_db, prefix)

        #check to see if we found everything we asked for
        if any(i not in bases for i in ids):
            missing = [i for i in ids if i not in bases]
            raise NotFound, '%s %s' % (cls.__name__, missing)

        if data:
            need = [v for v in bases.itervalues() if not v._loaded]
            if need:
                cls._load_multi(need)

        #e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #32
0
    def _by_name(cls, names, stale=False, _update=False):
        # lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        for name in names:
            lname = name.lower()

            if lname in cls._specials:
                ret[name] = cls._specials[lname]
            else:
                to_fetch[lname] = name

        if to_fetch:

            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit=len(lnames), data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames,)
                    raise

                return dict((sr.name.lower(), sr._id) for sr in srs)

            srs = {}
            srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix="subreddit.byname", stale=stale)
            if srids:
                srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, "Subreddit %s" % name
        else:
            return ret
Example #33
0
File: thing.py Project: cmak/reddit
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            #avoid race condition when incrmenting int props (data int
            #props are set in load_multi)
            for prop in cls._int_props:
                keys = dict((i, getattr(item, prop))
                            for i, item in items.iteritems())
                cache.set_multi(keys, prefix + prop + '_' )

            return items

        bases = sgm(cache, ids, items_db, prefix)

        if not bases:
            raise NotFound, '%s %s' % (cls.__name__, ids)

        if data:
            need = [v for v in bases.itervalues() if not v._loaded]
            if need:
                cls._load_multi(need)

        #e.g. add the sort prop
        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]]
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
    def _byID(cls, ids):
        ids, is_single = tup(ids, True)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            else:
                return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) and str(_id) for _id in ids)

        def lookup(l_ids):
            # TODO: if we get back max_column_count columns for a
            # given row, check a flag on the class as to whether to
            # refetch for more of them. This could be important with
            # large Views, for instance
            rows = cls._cf.multiget(l_ids, column_count=max_column_count)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                l_ret[t._id] = t

            return l_ret

        ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix())

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__,
                                        ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]

        return ret
Example #35
0
    def get_items(self, *a, **kw):
        """Wrapper around builder's get_items that caches the rendering."""
        builder_items = self.builder.get_items(*a, **kw)

        #render cache
        #fn to render non-boring items
        fullnames = {}
        for i in self.builder.item_iter(builder_items):
            rs = c.render_style
            key = i.cache_key(i)
            if key:
                fullnames[key + rs + c.lang] = i

        def render_items(names):
            r = {}
            for i in names:
                item = fullnames[i]
                r[i] = item.render()
            return r

        rendered_items = sgm(cache, fullnames, render_items, 'render_',
                             time = g.page_cache_time)

        #replace the render function
        for k, v in rendered_items.iteritems():
            def make_fn(v):
                default = c.render_style
                default_render = fullnames[k].render
                def r(style = default):
                    if style != c.render_style:
                        return default_render(style = style)
                    return v
                return r
            fullnames[k].render = make_fn(v)
        
        return builder_items
Example #36
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True,
                        thing_data=False, timestamp_optimize = False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            # permute all of the pairs
            pairs = set((x, y, n)
                        for x in thing1_ids
                        for y in thing2_ids
                        for n in name)

            def items_db(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids,
                                   cls.c._thing2_id == t2_ids,
                                   cls.c._name == names,
                                   eager_load = eager_load,
                                   thing_data = thing_data,
                                   data = data)
                else:
                    q = []

                for rel in q:
                    #TODO an alternative for multiple
                    #relations with the same keys
                    #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), [])
                    #l.append(rel._id)
                    rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            res = sgm(cache, pairs, items_db, prefix)

            #convert the keys back into objects

            # populate up the local-cache in batch
            cls._byID(filter(None, res.values()), data=data)

            # now we can assume the rels will be in the cache and just
            # call _byID lots
            res_obj = {}
            for k, rid in res.iteritems():
                obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2])
                res_obj[obj_key] = cls._byID(rid, data=data) if rid else None
                
            return res_obj
Example #37
0
    def reported(cls, users=None, things=None, return_dict=True, amount=None):

        # nothing given, nothing to give back
        if not users and not things:
            return {} if return_dict else []

        if users: users = tup(users)
        if things: things = tup(things)

        # if both are given, we can use fast_query
        if users and things:
            return cls.fastreported(users, things)

        # type_dict stores id keyed on (type, rel_key)
        type_dict = {}

        # if users, we have to search all the rel types on thing1_id
        if users:
            db_key = '_thing1_id'
            uid = [t._id for t in users]
            for key in cls.rels.keys():
                type_dict[(Account, key)] = uid

        # if things, we have to search only on types present in the list
        if things:
            db_key = '_thing2_id'
            for t in things:
                key = (t.__class__, (Account, t.__class__))
                type_dict.setdefault(key, []).append(t._id)

        def db_func(rel, db_key, amount):
            def _db_func(ids):
                q = rel._query(getattr(rel.c, db_key) == ids, data=True)
                if amount is not None:
                    q._filter(rel.c._name == str(amount))
                r_ids = {}

                # fill up the report listing from the query
                for r in q:
                    key = getattr(r, db_key)
                    r_ids.setdefault(key, []).append(r._id)

                # add blanks where no results were returned
                for i in ids:
                    if i not in r_ids:
                        r_ids[i] = []

                return r_ids

            return _db_func

        rval = []
        for (thing_class, rel_key), ids in type_dict.iteritems():
            rel = cls.rels[rel_key]
            prefix = cls._cache_prefix(rel, thing_class, amount=amount)

            # load from cache
            res = sgm(cache, ids, db_func(rel, db_key, amount), prefix)

            # append *objects* to end of list
            res1 = []
            for x in res.values():
                res1.extend(x)
            if res1:
                rval.extend(rel._byID(res1, data=True, return_dict=False))

        if return_dict:
            return dict(((r._thing1, r._thing2, cls._field), r) for r in rval)
        return rval
Example #38
0
        def _fast_query(cls,
                        thing1s,
                        thing2s,
                        name,
                        data=True,
                        eager_load=True,
                        thing_data=False,
                        thing_stale=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""

            cache_key_lookup = dict()

            # We didn't find these keys in the cache, look them up in the
            # database
            def lookup_rel_ids(uncached_keys):
                rel_ids = {}

                # Lookup thing ids and name from cache key
                t1_ids = set()
                t2_ids = set()
                names = set()
                for cache_key in uncached_keys:
                    (thing1, thing2, name) = cache_key_lookup[cache_key]
                    t1_ids.add(thing1._id)
                    t2_ids.add(thing2._id)
                    names.add(name)

                q = cls._query(cls.c._thing1_id == t1_ids,
                               cls.c._thing2_id == t2_ids,
                               cls.c._name == names)

                for rel in q:
                    rel_ids[cls._fast_cache_key_from_parts(
                        cls.__name__, rel._thing1_id, rel._thing2_id,
                        str(rel._name))] = rel._id

                for cache_key in uncached_keys:
                    if cache_key not in rel_ids:
                        rel_ids[cache_key] = None

                return rel_ids

            # make lookups for thing ids and names
            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            names = map(str, tup(name))

            # permute all of the pairs via cartesian product
            rel_tuples = itertools.product(thing1_dict.values(),
                                           thing2_dict.values(), names)

            # create cache keys for all permutations and initialize lookup
            for t in rel_tuples:
                thing1, thing2, name = t
                cache_key = cls._fast_cache_key_from_parts(
                    cls.__name__, thing1._id, thing2._id, name)
                cache_key_lookup[cache_key] = t

            # get the relation ids from the cache or query the db
            res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids)

            # get the relation objects
            rel_ids = {
                rel_id
                for rel_id in res.itervalues() if rel_id is not None
            }
            rels = cls._byID_rel(rel_ids,
                                 data=data,
                                 eager_load=eager_load,
                                 thing_data=thing_data,
                                 thing_stale=thing_stale)

            # Takes aggregated results from cache and db (res) and transforms
            # the values from ids to Relations.
            res_obj = {}
            for cache_key, rel_id in res.iteritems():
                t = cache_key_lookup[cache_key]
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[t] = rel

            return res_obj
Example #39
0
    def _byID(cls, ids, properties=None):
        ids, is_single = tup(ids, True)

        if properties is not None:
            asked_properties = frozenset(properties)
            willask_properties = set(properties)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) and str(_id) for _id in ids)

        def reject_bad_partials(cached, still_need):
            # tell sgm that the match it found in the cache isn't good
            # enough if it's a partial that doesn't include our
            # properties. we still need to look those items up to get
            # the properties that we're after
            stillfind = set()

            for k, v in cached.iteritems():
                if properties is None:
                    if v._partial is not None:
                        # there's a partial in the cache but we're not
                        # looking for partials
                        stillfind.add(k)
                elif v._partial is not None and not asked_properties.issubset(
                        v._partial):
                    # we asked for a partial, and this is a partial,
                    # but it doesn't have all of the properties that
                    # we need
                    stillfind.add(k)

                    # other callers in our request are now expecting
                    # to find the properties that were on that
                    # partial, so we'll have to preserve them
                    for prop in v._partial:
                        willask_properties.add(prop)

            for k in stillfind:
                del cached[k]
                still_need.add(k)

        def lookup(l_ids):
            # TODO: if we get back max_column_count columns for a
            # given row, check a flag on the class as to whether to
            # refetch for more of them. This could be important with
            # large Views, for instance

            if properties is None:
                rows = cls._cf.multiget(l_ids, column_count=max_column_count)
            else:
                rows = cls._cf.multiget(l_ids, columns=willask_properties)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                if properties is not None:
                    # make sure that the item is marked as a _partial
                    t._partial = willask_properties
                l_ret[t._id] = t

            return l_ret

        ret = cache.sgm(thing_cache,
                        ids,
                        lookup,
                        prefix=cls._cache_prefix(),
                        found_fn=reject_bad_partials)

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__, ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]

        return ret
Example #40
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True,
                        thing_data=False, thing_stale=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""

            cache_key_lookup = dict()

            # We didn't find these keys in the cache, look them up in the
            # database
            def lookup_rel_ids(uncached_keys):
                rel_ids = {}

                # Lookup thing ids and name from cache key
                t1_ids = set()
                t2_ids = set()
                names = set()
                for cache_key in uncached_keys:
                    (thing1, thing2, name) = cache_key_lookup[cache_key]
                    t1_ids.add(thing1._id)
                    t2_ids.add(thing2._id)
                    names.add(name)

                q = cls._query(
                        cls.c._thing1_id == t1_ids,
                        cls.c._thing2_id == t2_ids,
                        cls.c._name == names)

                for rel in q:
                    rel_ids[cls._fast_cache_key_from_parts(
                        cls.__name__,
                        rel._thing1_id,
                        rel._thing2_id,
                        str(rel._name)
                    )] = rel._id

                for cache_key in uncached_keys:
                    if cache_key not in rel_ids:
                        rel_ids[cache_key] = None

                return rel_ids

            # make lookups for thing ids and names
            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            names = map(str, tup(name))

            # permute all of the pairs via cartesian product
            rel_tuples = itertools.product(
                thing1_dict.values(),
                thing2_dict.values(),
                names)

            # create cache keys for all permutations and initialize lookup
            for t in rel_tuples:
                thing1, thing2, name = t
                cache_key = cls._fast_cache_key_from_parts(
                    cls.__name__,
                    thing1._id,
                    thing2._id,
                    name)
                cache_key_lookup[cache_key] = t

            # get the relation ids from the cache or query the db
            res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids)

            # get the relation objects
            rel_ids = {rel_id for rel_id in res.itervalues()
                              if rel_id is not None}
            rels = cls._byID_rel(
                rel_ids,
                data=data,
                eager_load=eager_load,
                thing_data=thing_data,
                thing_stale=thing_stale)

            # Takes aggregated results from cache and db (res) and transforms
            # the values from ids to Relations.
            res_obj = {}
            for cache_key, rel_id in res.iteritems():
                t = cache_key_lookup[cache_key]
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[t] = rel

            return res_obj
Example #41
0
    def _byID(cls,
              ids,
              data=False,
              return_dict=True,
              stale=False,
              ignore_missing=False):
        ids, single = tup(ids, ret_is_single=True)
        prefix = thing_prefix(cls.__name__)

        for x in ids:
            if not isinstance(x, (int, long)):
                raise ValueError('non-integer thing_id in %r' % ids)
            if x > tdb.MAX_THING_ID:
                raise NotFound('huge thing_id in %r' % ids)
            elif x < tdb.MIN_THING_ID:
                raise NotFound('negative thing_id in %r' % ids)

        if not single and not ids:
            if return_dict:
                return {}
            else:
                return []

        cls.record_lookup(data=data, delta=len(ids))

        def count_found(ret, still_need):
            cls._cache.stats.cache_report(hits=len(ret),
                                          misses=len(still_need),
                                          cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            # caching happens in sgm, but is less intrusive to count here
            cls.record_cache_write(event="cache", delta=len(items))

            return items

        bases = sgm(cls._cache,
                    ids,
                    items_db,
                    prefix,
                    time=THING_CACHE_TTL,
                    stale=stale,
                    found_fn=count_found,
                    stat_subname=cls.__name__)

        # Check to see if we found everything we asked for
        missing = []
        for i in ids:
            if i not in bases:
                missing.append(i)
            elif bases[i] and bases[i]._id != i:
                g.log.error(
                    "thing.py: Doppleganger on byID: %s got %s for %s" %
                    (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)
        for i in missing:
            ids.remove(i)

        if data:
            need = []
            for v in bases.itervalues():
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)

        if single:
            return bases[ids[0]] if ids else None
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #42
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True,
                        thing_data=False, timestamp_optimize = False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            def can_skip_lookup(t1, t2, name):
                # we can't possibly have voted on things that were
                # created after the last time we voted. for relations
                # that have an invariant like this we can avoid doing
                # these lookups as long as the relation takes
                # responsibility for keeping the timestamp up-to-date
                thing1 = thing1_dict[t1]
                thing2 = thing2_dict[t2]

                last_done = thing_utils.get_last_modified_for_cls(
                    thing1, cls._type_name)

                if not last_done:
                    return False

                if thing2._date > last_done:
                    return True

                return False

            # permute all of the pairs
            pairs = set((x, y, n)
                        for x in thing1_ids
                        for y in thing2_ids
                        for n in name)

            def items_db(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    if timestamp_optimize and can_skip_lookup(t1, t2, name):
                        continue
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids,
                                   cls.c._thing2_id == t2_ids,
                                   cls.c._name == names,
                                   eager_load = eager_load,
                                   thing_data = thing_data,
                                   data = data)
                else:
                    q = []

                for rel in q:
                    #TODO an alternative for multiple
                    #relations with the same keys
                    #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), [])
                    #l.append(rel._id)
                    rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            res = sgm(cache, pairs, items_db, prefix)

            #convert the keys back into objects

            # populate up the local-cache in batch
            cls._byID(filter(None, res.values()), data=data)

            # now we can assume the rels will be in the cache and just
            # call _byID lots
            res_obj = {}
            for k, rid in res.iteritems():
                obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2])
                res_obj[obj_key] = cls._byID(rid, data=data) if rid else None
                
            return res_obj
Example #43
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True,
                        timestamp_optimize = False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            def can_skip_lookup(t1, t2, name):
                # we can't possibly have voted on things that were
                # created after the last time we voted. for relations
                # that have an invariant like this we can avoid doing
                # these lookups as long as the relation takes
                # responsibility for keeping the timestamp up-to-date
                thing1 = thing1_dict[t1]
                thing2 = thing2_dict[t2]

                # check to see if we have the history information
                if not thing1._loaded:
                    return False
                if not hasattr(thing1, 'fast_query_timestamp'):
                    return False

                last_done = thing1.fast_query_timestamp.get(cls._type_name,None)

                if not last_done:
                    return False

                if thing2._date > last_done:
                    return True

                return False

            # permute all of the pairs
            pairs = set((x, y, n)
                        for x in thing1_ids
                        for y in thing2_ids
                        for n in name)

            def items_db(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    if timestamp_optimize and can_skip_lookup(t1, t2, name):
                        continue
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                q = cls._query(cls.c._thing1_id == t1_ids,
                               cls.c._thing2_id == t2_ids,
                               cls.c._name == names,
                               eager_load = eager_load,
                               data = data)

                for rel in q:
                    #TODO an alternative for multiple
                    #relations with the same keys
                    #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), [])
                    #l.append(rel._id)
                    rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            res = sgm(cache, pairs, items_db, prefix)

            #convert the keys back into objects

            # populate up the local-cache in batch
            cls._byID(filter(None, res.values()), data=data)

            # now we can assume the rels will be in the cache and just
            # call _byID lots
            res_obj = {}
            for k, rid in res.iteritems():
                obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2])
                res_obj[obj_key] = cls._byID(rid, data=data) if rid else None
                
            return res_obj
Example #44
0
    def _byID(cls, ids, return_dict=True, properties=None):
        ids, is_single = tup(ids, True)

        if properties is not None:
            asked_properties = frozenset(properties)
            willask_properties = set(properties)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) or str(_id) for _id in ids)

        def reject_bad_partials(cached, still_need):
            # tell sgm that the match it found in the cache isn't good
            # enough if it's a partial that doesn't include our
            # properties. we still need to look those items up to get
            # the properties that we're after
            stillfind = set()

            for k, v in cached.iteritems():
                if properties is None:
                    if v._partial is not None:
                        # there's a partial in the cache but we're not
                        # looking for partials
                        stillfind.add(k)
                elif v._partial is not None and not asked_properties.issubset(v._partial):
                    # we asked for a partial, and this is a partial,
                    # but it doesn't have all of the properties that
                    # we need
                    stillfind.add(k)

                    # other callers in our request are now expecting
                    # to find the properties that were on that
                    # partial, so we'll have to preserve them
                    for prop in v._partial:
                        willask_properties.add(prop)

            for k in stillfind:
                del cached[k]
                still_need.add(k)

        def lookup(l_ids):
            # TODO: if we get back max_column_count columns for a
            # given row, check a flag on the class as to whether to
            # refetch for more of them. This could be important with
            # large Views, for instance

            if properties is None:
                rows = cls._cf.multiget(l_ids, column_count=max_column_count)
            else:
                rows = cls._cf.multiget(l_ids, columns = willask_properties)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                if properties is not None:
                    # make sure that the item is marked as a _partial
                    t._partial = willask_properties
                l_ret[t._id] = t

            return l_ret

        ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix(),
                        found_fn=reject_bad_partials)

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__,
                                        ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]
        elif return_dict:
            return ret
        else:
            return filter(None, (ret.get(i) for i in ids))
Example #45
0
    def _byID(cls,
              ids,
              data=False,
              return_dict=True,
              extra_props=None,
              stale=False,
              ignore_missing=False):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        for x in ids:
            if not isinstance(x, (int, long)):
                raise ValueError('non-integer thing_id in %r' % ids)
            if x > tdb.MAX_THING_ID:
                raise NotFound('huge thing_id in %r' % ids)
            elif x < tdb.MIN_THING_ID:
                raise NotFound('negative thing_id in %r' % ids)

        def count_found(ret, still_need):
            cls._cache.stats.cache_report(hits=len(ret),
                                          misses=len(still_need),
                                          cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cls._cache,
                    ids,
                    items_db,
                    prefix,
                    stale=stale,
                    found_fn=count_found)

        # Check to see if we found everything we asked for
        missing = []
        for i in ids:
            if i not in bases:
                missing.append(i)
            elif bases[i] and bases[i]._id != i:
                g.log.error(
                    "thing.py: Doppleganger on byID: %s got %s for %s" %
                    (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)
        for i in missing:
            ids.remove(i)

        if data:
            need = []
            for v in bases.itervalues():
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)

        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]] if ids else None
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Example #46
0
def get_live_promotions(sr_names):
    promos_by_srname = sgm(g.cache, sr_names, miss_fn=_get_live_promotions,
                           prefix='live_promotions', time=60)
    return itertools.chain.from_iterable(promos_by_srname.itervalues())
Example #47
0
    def _byID(cls, ids, data=True, return_dict=True, stale=False,
              ignore_missing=False):
        # data props are ALWAYS loaded, data keyword is meaningless
        ids, single = tup(ids, ret_is_single=True)

        for x in ids:
            if not isinstance(x, (int, long)):
                raise ValueError('non-integer thing_id in %r' % ids)
            if x > tdb.MAX_THING_ID:
                raise NotFound('huge thing_id in %r' % ids)
            elif x < tdb.MIN_THING_ID:
                raise NotFound('negative thing_id in %r' % ids)

        if not single and not ids:
            if return_dict:
                return {}
            else:
                return []

        cls.record_lookup(data=data, delta=len(ids))

        def count_found_and_reject_unloaded(ret, still_need):
            unloaded_ids = {
                _id for _id, thing in ret.iteritems() if not thing._loaded}
            for _id in unloaded_ids:
                del ret[_id]
                still_need.add(_id)

            cls._cache.stats.cache_report(
                hits=len(ret), misses=len(still_need),
                cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def get_things_from_db(ids):
            props_by_id = cls._get_item(cls._type_id, ids)
            data_props_by_id = cls._get_data(cls._type_id, ids)

            try:
                essentials = object.__getattribute__(cls, "_essentials")
            except AttributeError:
                essentials = ()

            things_by_id = {}
            for _id, props in props_by_id.iteritems():
                thing = cls._build(_id, props)
                data_props = data_props_by_id.get(_id, {})
                thing._t.update(data_props)
                thing._loaded = True

                for data_prop in essentials:
                    if data_prop not in thing._t:
                        print "Warning: %s is missing %s" % (thing._fullname, data_prop)

                things_by_id[_id] = thing

            # caching happens in sgm, but is less intrusive to count here
            cls.record_cache_write(event="cache", delta=len(things_by_id))

            return things_by_id

        things_by_id = sgm(cls._cache, ids, miss_fn=get_things_from_db,
            prefix=cls._cache_prefix(), time=THING_CACHE_TTL, stale=stale,
            found_fn=count_found_and_reject_unloaded, stat_subname=cls.__name__)

        # Check to see if we found everything we asked for
        missing = [_id for _id in ids if _id not in things_by_id]
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)

        if missing:
            ids = [_id for _id in ids if _id not in missing]

        if single:
            return things_by_id[ids[0]] if ids else None
        elif return_dict:
            return things_by_id
        else:
            return filter(None, (things_by_id.get(_id) for _id in ids))
Example #48
0
        def _fast_query(cls,
                        thing1s,
                        thing2s,
                        name,
                        data=True,
                        eager_load=True,
                        thing_data=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            # permute all of the pairs
            pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids
                        for n in name)

            def lookup_rel_ids(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids,
                                   cls.c._thing2_id == t2_ids,
                                   cls.c._name == names)
                else:
                    q = []

                for rel in q:
                    rel_ids[(rel._thing1_id, rel._thing2_id,
                             rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            # get the relation ids from the cache or query the db
            res = sgm(cls._cache, pairs, lookup_rel_ids, prefix)

            # get the relation objects
            rel_ids = {
                rel_id
                for rel_id in res.itervalues() if rel_id is not None
            }
            rels = cls._byID_rel(rel_ids,
                                 data=data,
                                 eager_load=eager_load,
                                 thing_data=thing_data)

            res_obj = {}
            for (thing1_id, thing2_id, name), rel_id in res.iteritems():
                pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name)
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[pair] = rel

            return res_obj
Example #49
0
    def _byID(cls,
              ids,
              data=True,
              return_dict=True,
              stale=False,
              ignore_missing=False):
        # data props are ALWAYS loaded, data keyword is meaningless
        ids, single = tup(ids, ret_is_single=True)

        for x in ids:
            if not isinstance(x, (int, long)):
                raise ValueError('non-integer thing_id in %r' % ids)
            if x > tdb.MAX_THING_ID:
                raise NotFound('huge thing_id in %r' % ids)
            elif x < tdb.MIN_THING_ID:
                raise NotFound('negative thing_id in %r' % ids)

        if not single and not ids:
            if return_dict:
                return {}
            else:
                return []

        cls.record_lookup(data=data, delta=len(ids))

        def count_found_and_reject_unloaded(ret, still_need):
            unloaded_ids = {
                _id
                for _id, thing in ret.iteritems() if not thing._loaded
            }
            for _id in unloaded_ids:
                del ret[_id]
                still_need.add(_id)

            if cls._cache.stats:
                cls._cache.stats.cache_report(hits=len(ret),
                                              misses=len(still_need),
                                              cache_name='sgm.%s' %
                                              cls.__name__)

        def get_things_from_db(ids):
            props_by_id = cls._get_item(cls._type_id, ids)
            data_props_by_id = cls._get_data(cls._type_id, ids)

            things_by_id = {}
            for _id, props in props_by_id.iteritems():
                thing = cls._build(_id, props)
                data_props = data_props_by_id.get(_id, {})
                thing._t.update(data_props)
                thing._loaded = True

                if not all(data_prop in thing._t
                           for data_prop in cls._essentials):
                    # a Thing missing an essential prop is invalid
                    # this can happen if a process looks up the Thing as it's
                    # created but between when the props and the data props are
                    # written
                    g.log.error("%s missing essentials, got %s", thing,
                                thing._t)
                    g.stats.simple_event("thing.load.missing_essentials")
                    continue

                things_by_id[_id] = thing

            # caching happens in sgm, but is less intrusive to count here
            cls.record_cache_write(event="cache", delta=len(things_by_id))

            return things_by_id

        things_by_id = sgm(cls._cache,
                           ids,
                           miss_fn=get_things_from_db,
                           prefix=cls._cache_prefix(),
                           time=THING_CACHE_TTL,
                           stale=stale,
                           found_fn=count_found_and_reject_unloaded,
                           stat_subname=cls.__name__)

        # Check to see if we found everything we asked for
        missing = [_id for _id in ids if _id not in things_by_id]
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)

        if missing:
            ids = [_id for _id in ids if _id not in missing]

        if single:
            return things_by_id[ids[0]] if ids else None
        elif return_dict:
            return things_by_id
        else:
            return filter(None, (things_by_id.get(_id) for _id in ids))
Example #50
0
 def cup_info_multi(cls, ids):
     ids = [int(i) for i in ids]
     # Is this dumb? Why call sgm() with miss_fn=None, rather than just
     # calling g.hardcache.get_multi()?
     return sgm(g.hardcache, ids, miss_fn=None, prefix="cup_info-")
Example #51
0
    def _by_name(cls, names, stale=False, _update=False):
        '''
        Usages: 
        1. Subreddit._by_name('funny') # single sr name
        Searches for a single subreddit. Returns a single Subreddit object or 
        raises NotFound if the subreddit doesn't exist.
        2. Subreddit._by_name(['aww','iama']) # list of sr names
        Searches for a list of subreddits. Returns a dict mapping srnames to 
        Subreddit objects. Items that were not found are ommitted from the dict.
        If no items are found, an empty dict is returned.
        '''
        #lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        for name in names:
            lname = name.lower()

            if lname in cls._specials:
                ret[name] = cls._specials[lname]
            elif len(lname) > Subreddit.MAX_SRNAME_LENGTH:
                g.log.debug(
                    "Subreddit._by_name() ignoring invalid srname (too long): %s",
                    lname)
            else:
                to_fetch[lname] = name

        if to_fetch:

            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames,
                               cls.c._spam == (True, False),
                               limit=len(lnames),
                               data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames, )
                    raise

                return dict((sr.name.lower(), sr._id) for sr in srs)

            srs = {}
            srids = sgm(g.cache,
                        to_fetch.keys(),
                        _fetch,
                        prefix='subreddit.byname',
                        stale=stale)
            if srids:
                srs = cls._byID(srids.values(),
                                data=True,
                                return_dict=False,
                                stale=stale)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, 'Subreddit %s' % name
        else:
            return ret
Example #52
0
def get_live_promotions(sr_names):
    promos_by_srname = sgm(g.cache, sr_names, miss_fn=_get_live_promotions,
                           prefix='live_promotions', time=60)
    return itertools.chain.from_iterable(promos_by_srname.itervalues())