def get_hot(srs, only_fullnames=False): """Get the (fullname, hotness, epoch_seconds) for the hottest links in a subreddit. Use the query-cache to avoid some lookups if we can.""" from r2.lib.db.thing import Query from r2.lib.db.queries import CachedResults ret = [] queries = [sr.get_links('hot', 'all') for sr in srs] # fetch these all in one go cachedresults = filter(lambda q: isinstance(q, CachedResults), queries) CachedResults.fetch_multi(cachedresults) for q in queries: if isinstance(q, Query): links = cached_query(q, sr) res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links] elif isinstance(q, CachedResults): # we're relying on an implementation detail of # CachedResults here, where it's storing tuples that look # exactly like the return-type we want, to make our # sorting a bit cheaper res = list(q.data) # remove any that are too old age_limit = epoch_seconds(utils.timeago('%d days' % g.HOT_PAGE_AGE)) res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit] ret.append(res) return ret
def get_hot(srs, only_fullnames=False): """Get the (fullname, hotness, epoch_seconds) for the hottest links in a subreddit. Use the query-cache to avoid some lookups if we can.""" from r2.lib.db.thing import Query from r2.lib.db.queries import CachedResults ret = [] queries = [sr.get_links("hot", "all") for sr in srs] # fetch these all in one go cachedresults = filter(lambda q: isinstance(q, CachedResults), queries) CachedResults.fetch_multi(cachedresults) for q in queries: if isinstance(q, Query): links = cached_query(q, sr) res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links] elif isinstance(q, CachedResults): # we're relying on an implementation detail of # CachedResults here, where it's storing tuples that look # exactly like the return-type we want, to make our # sorting a bit cheaper res = list(q.data) # remove any that are too old age_limit = epoch_seconds(utils.timeago("%d days" % g.HOT_PAGE_AGE)) res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit] ret.append(res) return ret
def _run_changed(msgs, chan): start = datetime.now(g.tz) changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() add = set() # an item can request that only its boost fields be updated, # so we need to separate those out for item in changed: fname = item["fullname"] boost_only = item.get("boost_only", False) if fname in add: # we're already going to do all of the work continue if boost_only: boost.add(fname) else: if fname in boost: # we've previously seen an instance of this fname # that requested that only its boosts be updated, # but now we have to update the whole thing boost.remove(fname) add.add(fname) things = Thing._by_fullname(boost | add, data=True, return_dict=True) boost_time = add_time = 0.0 if boost: boost_time = inject([things[fname] for fname in boost], boost_only=True) if add: add_time = inject([things[fname] for fname in add]) totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start) print( "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)" % ( start, len(changed), len(add), add_time, len(boost), boost_time, totaltime, len(changed) - len(things), msgs[-1].delivery_info.get("message_count", "unknown"), ) )
def _run_changed(msgs, chan): start = datetime.now(g.tz) changed = map(lambda x: strordict_fullname(x.body), msgs) boost = set() add = set() # an item can request that only its boost fields be updated, # so we need to separate those out for item in changed: fname = item['fullname'] boost_only = item.get('boost_only', False) if fname in add: # we're already going to do all of the work continue if boost_only: boost.add(fname) else: if fname in boost: # we've previously seen an instance of this fname # that requested that only its boosts be updated, # but now we have to update the whole thing boost.remove(fname) add.add(fname) things = Thing._by_fullname(boost | add, data=True, return_dict=True) boost_time = add_time = 0.0 if boost: boost_time = inject([things[fname] for fname in boost], boost_only=True) if add: add_time = inject([things[fname] for fname in add]) totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start) print( "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)" % ( start, len(changed), len(add), add_time, len(boost), boost_time, totaltime, len(changed) - len(things), msgs[-1].delivery_info.get('message_count', 'unknown'), ))
def time_listings(times = ('year','month','week','day','hour')): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times) @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: sr_id = link.sr_id if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) yield ('sr-top-%s-%d' % (tkey, sr_id), sc, timestamp, fname) yield ('sr-controversial-%s-%d' % (tkey, sr_id), contr, timestamp, fname) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/controversial/%s/%s' % (tkey, domain), contr, timestamp, fname) mr_tools.mr_map(process)
def process(link): assert link.thing_type == 'link' author_id = link.author_id timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) yield 'user-submitted-%d' % author_id, timestamp, fname if not link.spam: sr_id = link.sr_id ups, downs = link.ups, link.downs yield ('sr-hot-all-%d' % sr_id, _hot(ups, downs, timestamp), timestamp, fname) yield 'sr-new-all-%d' % sr_id, timestamp, fname yield 'sr-top-all-%d' % sr_id, score(ups, downs), timestamp, fname yield ('sr-controversial-all-%d' % sr_id, controversy(ups, downs), timestamp, fname) for time in '1 year', '1 month', '1 week', '1 day', '1 hour': if timestamp > epoch_seconds(timeago(time)): tkey = time.split(' ')[1] yield ('sr-top-%s-%d' % (tkey, sr_id), score(ups, downs), timestamp, fname) yield ('sr-controversial-%s-%d' % (tkey, sr_id), controversy(ups, downs), timestamp, fname)
def normalized_hot(sr_ids, obey_age_limit=True): timer = g.stats.get_timer("normalized_hot") timer.start() if not sr_ids: return [] tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples, prefix='normalized_hot', time=g.page_cache_time) if obey_age_limit: cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE) oldest = epoch_seconds(cutoff) else: oldest = 0. merged = heapq.merge(*tuples_by_srid.values()) generator = (link_name for ehot, hot, link_name, timestamp in merged if timestamp > oldest) ret = list(itertools.islice(generator, MAX_LINKS)) timer.stop() return ret
def time_listings(times = ('year','month','week','day','hour', 'all')): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all') if 'all' in times: oldests['all'] = 0 @mr_tools.dataspec_m_thing(('author_id', int),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: author_id = link.author_id ups, downs = link.ups, link.downs sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for tkey, oldest in oldests.iteritems(): if timestamp > oldest: yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp, fname) yield ('user-controversial-%s-%d' % (tkey, author_id), contr, timestamp, fname) if tkey == 'all': yield ('user-new-%s-%d' % (tkey, author_id), timestamp, timestamp, fname) yield ('user-hot-%s-%d' % (tkey, author_id), h, timestamp, fname) mr_tools.mr_map(process)
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None): timer = g.stats.get_timer("normalized_hot") timer.start() if not sr_ids: return [] if ageweight and feature.is_enabled("scaled_normalized_hot"): tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight) else: tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples, prefix='normalized_hot', time=g.page_cache_time) if obey_age_limit: cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE) oldest = epoch_seconds(cutoff) else: oldest = 0. merged = heapq.merge(*tuples_by_srid.values()) generator = (link_name for ehot, hot, link_name, timestamp in merged if timestamp > oldest) ret = list(itertools.islice(generator, MAX_LINKS)) timer.stop() return ret
def time_listings(times=('year', 'month', 'week', 'day', 'hour', 'all')): oldests = dict( (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all') if 'all' in times: oldests['all'] = 0 @mr_tools.dataspec_m_thing( ('author_id', int), ) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: author_id = link.author_id ups, downs = link.ups, link.downs sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for tkey, oldest in oldests.iteritems(): if timestamp > oldest: yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp, fname) yield ('user-controversial-%s-%d' % (tkey, author_id), contr, timestamp, fname) if tkey == 'all': yield ('user-new-%s-%d' % (tkey, author_id), timestamp, timestamp, fname) yield ('user-hot-%s-%d' % (tkey, author_id), h, timestamp, fname) mr_tools.mr_map(process)
def make_period_link(interval, date): date = date.replace(tzinfo=g.tz) # won't be necessary after tz fixup if interval == "month": if date.month != 12: end = date.replace(month=date.month + 1) else: end = date.replace(month=1, year=date.year + 1) else: end = date + timedelta_by_name(interval) query = urllib.urlencode({ "syntax": "cloudsearch", "restrict_sr": "on", "sort": "top", "q": "timestamp:{:d}..{:d}".format(int(epoch_seconds(date)), int(epoch_seconds(end))), }) return "/r/%s/search?%s" % (c.site.name, query)
def inject(things, boost_only=False): things = [x for x in things if isinstance(x, indextank_indexed_types)] update_things = [x for x in things if not x._spam and not x._deleted and x.promoted is None and getattr(x, 'sr_id', None) != -1] delete_things = [x for x in things if x._spam or x._deleted] if update_things: maps = maps_from_things(update_things, boost_only = boost_only) indexstart = epoch_seconds(datetime.now(g.tz)) if update_things: inject_maps(maps, boost_only=boost_only) if delete_things: for thing in delete_things: delete_thing(thing) return epoch_seconds(datetime.now(g.tz)) - indexstart
def _get_sort_value(comment, sort, link=None, children=None): if sort == "_date": return epoch_seconds(comment._date) if sort == '_qa': # Responder is usually the OP, but there could be support for adding # other answerers in the future. responder_ids = link.responder_ids return comment._qa(children, responder_ids) return getattr(comment, sort)
def _get_cutoffs(intervals): cutoffs = {} for interval in intervals: if interval == "all": cutoffs["all"] = 0.0 else: cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval)) return cutoffs
def _get_sort_value(comment, sort, link, children=None): if sort == "_date": return epoch_seconds(comment._date) if sort == '_qa': # Responder is usually the OP, but there could be support for adding # other answerers in the future. responder_ids = link.responder_ids return comment._qa(children, responder_ids) return getattr(comment, sort)
def create(cls, thing1, thing2s, pgvote, vote_info): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 rowkey = cls._rowkey(pgvote._date.astimezone(VOTE_TIMEZONE).date()) colname = (voter._id36, votee._id36) details = {"direction": pgvote._name, "date": epoch_seconds(pgvote._date)} cls._set_values(rowkey, {colname: json.dumps(details)})
def time_listings(times=('all', )): oldests = dict( (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all") oldests['all'] = epoch_seconds(timeago('10 years')) @mr_tools.dataspec_m_thing( ("url", str), ) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) upvotes = upvotes(ups) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/%s/%s/%s' % (g.voting_upvote_path, tkey, domain), upvotes, timestamp, fname) yield ('domain/%s/%s/%s' % (g.voting_controversial_path, tkey, domain), contr, timestamp, fname) if tkey == "all": yield ('domain/hot/%s/%s' % (tkey, domain), h, timestamp, fname) yield ('domain/new/%s/%s' % (tkey, domain), timestamp, timestamp, fname) mr_tools.mr_map(process)
def create(cls, thing1, thing2s, pgvote, vote_info): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 rowkey = cls._rowkey(pgvote._date.astimezone(VOTE_TIMEZONE).date()) colname = (voter._id36, votee._id36) details = { "direction": pgvote._name, "date": epoch_seconds(pgvote._date), } cls._set_values(rowkey, {colname: json.dumps(details)})
def time_listings(intervals): cutoff_by_interval = { interval: epoch_seconds(timeago("1 %s" % interval)) for interval in intervals } @mr_tools.dataspec_m_thing( ("url", str), ("sr_id", int), ("author_id", int), ) def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: for domain in UrlParser(thing.url).domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/controversial/%s/%s" % (interval, domain), thing_controversy, thing.timestamp, fname) mr_tools.mr_map(process)
def time_listings(times = ('all',)): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all") oldests['all'] = epoch_seconds(timeago('10 years')) @mr_tools.dataspec_m_thing(("url", str),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/controversial/%s/%s' % (tkey, domain), contr, timestamp, fname) if tkey == "all": yield ('domain/hot/%s/%s' % (tkey, domain), h, timestamp, fname) yield ('domain/new/%s/%s' % (tkey, domain), timestamp, timestamp, fname) mr_tools.mr_map(process)
def make_item_tuple(self, item): """Given a single 'item' from the result of a query build the tuple that will be stored in the query cache. It is effectively the fullname of the item after passing through the filter plus the columns of the unfiltered item to sort by.""" filtered_item = self.filter(item) lst = [filtered_item._fullname] for col in self.sort_cols: #take the property of the original attr = getattr(item, col) #convert dates to epochs to take less space if isinstance(attr, datetime): attr = epoch_seconds(attr) lst.append(attr) return tuple(lst)
def create(cls, thing1, thing2s, pgvote, vote_info): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 details = dict( direction=pgvote._name, date=epoch_seconds(pgvote._date), valid_user=pgvote.valid_user, valid_thing=pgvote.valid_thing, ip=getattr(pgvote, "ip", ""), ) if vote_info and isinstance(vote_info, basestring): details['vote_info'] = vote_info cls._set_values(votee._id36, {voter._id36: json.dumps(details)})
def create(cls, thing1, thing2s, pgvote): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 details = dict( direction=pgvote._name, date=epoch_seconds(pgvote._date), valid_user=pgvote.valid_user, valid_thing=pgvote.valid_thing, ip=getattr(pgvote, "ip", ""), organic=getattr(pgvote, "organic", False), ) cls._set_values(voter._id36, {votee._id36: json.dumps(details)})
def create(cls, thing1, thing2s, pgvote): assert len(thing2s) == 1 voter = pgvote._thing1 votee = pgvote._thing2 details = dict( direction=pgvote._name, date=epoch_seconds(pgvote._date), valid_user=pgvote.valid_user, valid_thing=pgvote.valid_thing, ip=getattr(pgvote, "ip", ""), organic=getattr(pgvote, "organic", False), ) cls._set_values(votee._id36, {voter._id36: json.dumps(details)})
def _make_item_tuple(self, item): """Return an item tuple from the result of a query. The item tuple is used to sort the items in a query without having to look them up. """ filtered_item = self.filter(item) lst = [filtered_item._fullname] for col in self.sort_cols: # take the property of the original attr = getattr(item, col) # convert dates to epochs to take less space if isinstance(attr, datetime.datetime): attr = epoch_seconds(attr) lst.append(attr) return tuple(lst)
def time_listings(intervals): cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval)) for interval in intervals} @mr_tools.dataspec_m_thing( ("url", str), ("sr_id", int), ("author_id", int), ) def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: for domain in UrlParser(thing.url).domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/controversial/%s/%s" % (interval, domain), thing_controversy, thing.timestamp, fname) mr_tools.mr_map(process)
def backfill_vote_details(cls): ninety_days = timedelta(days=90).total_seconds() for chunk in in_chunks(cls._all(), size=100): detail_chunk = defaultdict(dict) try: with VoterIPByThing._cf.batch( write_consistency_level=cls._write_consistency_level) as b: for vote_list in chunk: thing_id36 = vote_list._id thing_fullname = vote_list.votee_fullname details = vote_list.decode_details() for detail in details: voter_id36 = detail["voter_id"] if "ip" in detail and detail["ip"]: ip = detail["ip"] redacted = dict(detail) del redacted["ip"] cast = detail["date"] now = epoch_seconds( datetime.utcnow().replace(tzinfo=g.tz)) ttl = ninety_days - (now - cast) oneweek = "" if ttl < 3600 * 24 * 7: oneweek = "(<= one week left)" print "Inserting %s with IP ttl %d %s" % ( redacted, ttl, oneweek) detail_chunk[thing_id36][voter_id36] = json.dumps( redacted) if ttl <= 0: print "Skipping bogus ttl for %s: %d" % ( redacted, ttl) continue b.insert(thing_fullname, {voter_id36: ip}, ttl=ttl) except Exception: # Getting some really weird spurious errors here; complaints about negative # TTLs even though they can't possibly be negative, errors from cass # that have an explanation of "(why=')" # Just going to brute-force this through. We might lose 100 here and there # but mostly it'll be intact. pass for votee_id36, valuedict in detail_chunk.iteritems(): cls._set_values(votee_id36, valuedict)
def process(link): assert link.thing_type == "link" author_id = link.author_id timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) yield "user-submitted-%d" % author_id, timestamp, fname if not link.spam: sr_id = link.sr_id ups, downs = link.ups, link.downs yield ("sr-hot-all-%d" % sr_id, _hot(ups, downs, timestamp), timestamp, fname) yield "sr-new-all-%d" % sr_id, timestamp, fname yield "sr-top-all-%d" % sr_id, score(ups, downs), timestamp, fname yield ("sr-controversial-all-%d" % sr_id, controversy(ups, downs), timestamp, fname) for time in "1 year", "1 month", "1 week", "1 day", "1 hour": if timestamp > epoch_seconds(timeago(time)): tkey = time.split(" ")[1] yield ("sr-top-%s-%d" % (tkey, sr_id), score(ups, downs), timestamp, fname) yield ("sr-controversial-%s-%d" % (tkey, sr_id), controversy(ups, downs), timestamp, fname)
def get_hot_tuples(sr_ids, ageweight=None): queries_by_sr_id = {sr_id: _get_links(sr_id, sort='hot', time='all') for sr_id in sr_ids} CachedResults.fetch_multi(queries_by_sr_id.values(), stale=True) tuples_by_srid = {sr_id: [] for sr_id in sr_ids} now_seconds = epoch_seconds(datetime.now(g.tz)) for sr_id, q in queries_by_sr_id.iteritems(): if not q.data: continue hot_factor = get_hot_factor(q.data[0], now_seconds, ageweight) for link_name, hot, timestamp in q.data[:MAX_PER_SUBREDDIT]: effective_hot = hot / hot_factor # heapq.merge sorts from smallest to largest so we need to flip # ehot and hot to get the hottest links first tuples_by_srid[sr_id].append( (-effective_hot, -hot, link_name, timestamp) ) return tuples_by_srid
def backfill_vote_details(cls): ninety_days = timedelta(days=90).total_seconds() for chunk in in_chunks(cls._all(), size=100): detail_chunk = defaultdict(dict) try: with VoterIPByThing._cf.batch(write_consistency_level=cls._write_consistency_level) as b: for vote_list in chunk: thing_id36 = vote_list._id thing_fullname = vote_list.votee_fullname details = vote_list.decode_details() for detail in details: voter_id36 = detail["voter_id"] if "ip" in detail and detail["ip"]: ip = detail["ip"] redacted = dict(detail) del redacted["ip"] cast = detail["date"] now = epoch_seconds(datetime.utcnow().replace(tzinfo=g.tz)) ttl = ninety_days - (now - cast) oneweek = "" if ttl < 3600 * 24 * 7: oneweek = "(<= one week left)" print "Inserting %s with IP ttl %d %s" % (redacted, ttl, oneweek) detail_chunk[thing_id36][voter_id36] = json.dumps(redacted) if ttl <= 0: print "Skipping bogus ttl for %s: %d" % (redacted, ttl) continue b.insert(thing_fullname, {voter_id36: ip}, ttl=ttl) except Exception: # Getting some really weird spurious errors here; complaints about negative # TTLs even though they can't possibly be negative, errors from cass # that have an explanation of "(why=')" # Just going to brute-force this through. We might lose 100 here and there # but mostly it'll be intact. pass for votee_id36, valuedict in detail_chunk.iteritems(): cls._set_values(votee_id36, valuedict)
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None): timer = g.stats.get_timer("normalized_hot") timer.start() if not sr_ids: return [] if not feature.is_enabled("scaled_normalized_hot"): ageweight = None tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight) if obey_age_limit: cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE) oldest = epoch_seconds(cutoff) else: oldest = 0. merged = heapq.merge(*tuples_by_srid.values()) generator = (link_name for ehot, hot, link_name, timestamp in merged if timestamp > oldest) ret = list(itertools.islice(generator, MAX_LINKS)) timer.stop() return ret
def time_listings(times = ('year','month','week','day','hour')): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times) @mr_tools.dataspec_m_thing(('sr_id', int),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam: sr_id = link.sr_id ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: yield ('sr-top-%s-%d' % (tkey, sr_id), score(ups, downs), timestamp, fname) yield ('sr-controversial-%s-%d' % (tkey, sr_id), controversy(ups, downs), timestamp, fname) mr_tools.mr_map(process)
def _restrict_recent(recent): now = datetime.now(g.tz) since = epoch_seconds(now - recent) return 'timestamp:%i..' % since
def _restrict_recent(recent): now = datetime.now(g.tz) since = epoch_seconds(now - recent) return 'timestamp:[%i TO *]' % since
def _get_sort_value(comment, sort): if sort == "_date": return epoch_seconds(comment._date) return getattr(comment, sort)
def date_to_adzerk(d): utc_date = d - promote.timezone_offset epoch_milliseconds = int(epoch_seconds(utc_date) * 1000) return '/Date(%s)/' % epoch_milliseconds
def _serialize_date(cls, date): return str(epoch_seconds(date))