def port_cassavotes(): from r2.models import Vote, Account, Link, Comment from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote from r2.lib.db.tdb_cassandra import CL from r2.lib.utils import fetch_things2, to36, progress ts = [(Vote.rel(Account, Link), CassandraLinkVote), (Vote.rel(Account, Comment), CassandraCommentVote)] dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic']) for prel, crel in ts: vq = prel._query(sort=desc('_date'), data=True, eager_load=False) vq = fetch_things2(vq) vq = progress(vq, persec=True) for v in vq: t1 = to36(v._thing1_id) t2 = to36(v._thing2_id) cv = crel(thing1_id = t1, thing2_id = t2, date=v._date, name=v._name) for dkey, dval in v._t.iteritems(): if dkey in dataattrs: setattr(cv, dkey, dval) cv._commit(write_consistency_level=CL.ONE)
def port_cassavotes(): from r2.models import Vote, Account, Link, Comment from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote from r2.lib.db.tdb_cassandra import CL from r2.lib.utils import fetch_things2, to36, progress ts = [(Vote.rel(Account, Link), CassandraLinkVote), (Vote.rel(Account, Comment), CassandraCommentVote)] dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic']) for prel, crel in ts: vq = prel._query(sort=desc('_date'), data=True, eager_load=False) vq = fetch_things2(vq) vq = progress(vq, persec=True) for v in vq: t1 = to36(v._thing1_id) t2 = to36(v._thing2_id) cv = crel(thing1_id = t1, thing2_id = t2, date=v._date, name=v._name) for dkey, dval in v._t.iteritems(): if dkey in dataattrs: setattr(cv, dkey, dval) cv._commit(write_consistency_level=CL.ONE)
def vote_stats(config): stats = {} link_votes = Vote.rel(Account, Link) comment_votes = Vote.rel(Account, Comment) for name, rel in (('link', link_votes), ('comment', comment_votes)): table = get_rel_table(rel._type_id)[0] q = table.count(table.c.date > timeago('1 day')) stats[name+'_vote_count_past_day'] = q.execute().fetchone()[0] stats['vote_count_past_day'] = stats['link_vote_count_past_day'] + stats['comment_vote_count_past_day'] return stats
def vote_stats(config, ranges): stats = {} link_votes = Vote.rel(Account, Link) comment_votes = Vote.rel(Account, Comment) for name, rel in (('link', link_votes), ('comment', comment_votes)): table = get_rel_table(rel._type_id)[0] q = table.count((table.c.date > ranges['yesterday'][0]) & (table.c.date < ranges['yesterday'][1])) stats[name + '_vote_count_yesterday'] = q.execute().fetchone()[0] stats['vote_count_yesterday'] = stats['link_vote_count_yesterday'] + stats[ 'comment_vote_count_yesterday'] return stats
def vote_stats(config, ranges): stats = {} link_votes = Vote.rel(Account, Link) comment_votes = Vote.rel(Account, Comment) for name, rel in (('link', link_votes), ('comment', comment_votes)): table = get_rel_table(rel._type_id)[0] q = table.count( (table.c.date > ranges['yesterday'][0]) & (table.c.date < ranges['yesterday'][1])) stats[name+'_vote_count_yesterday'] = q.execute().fetchone()[0] stats['vote_count_yesterday'] = stats['link_vote_count_yesterday'] + stats['comment_vote_count_yesterday'] return stats
def user_vote_change_comments(period=None, user=None): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) amount = sa.cast(rt.c.name, sa.Integer) cols = [ author_dt.c.value, sa.func.sum(sa.case([(amount > 0, amount)], else_=0)), sa.func.sum(sa.case([(amount < 0, amount * -1)], else_=0)), ] query = sa.and_( author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == "author_id", comment_tt.c.thing_id == rt.c.thing2_id ) if period is not None: earliest = datetime.now(g.tz) - timedelta(0, period) query.clauses.extend((rt.c.date >= earliest, comment_tt.c.date >= earliest)) if user is not None: query.clauses.append(author_dt.c.value == str(user._id)) s = sa.select(cols, query, group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r[0]), (r[1], r[2])) for r in rows]
def read_votes(self, cls2, karma_kind, kv_namespace): STEP = 100 rel = Vote.rel(Account, cls2) max_id = self.max_rel_type_id(rel) id_start = int(self.state.kvstore.get(kv_namespace + '.cur_read_id', '0')) print('Scanning {0}. Highest vote id is {1}; starting at {2}'.format( rel._type_name, max_id, id_start)) for id_low in xrange(id_start, max_id + 1, STEP): votes = list(self.query_rel_id_range(rel, id_low, id_low + STEP)) print('{0}: {1}, {2} of {3}'.format( datetime.now().isoformat(' '), rel._type_name, id_low, max_id)) for vote in votes: thing = cls2._byID(vote._thing2_id, data=True) amt = int(vote._name) # can be either positive or negative self.state.tally_karma(thing.author_id, thing.sr_id, karma_kind, amt) if votes: max_id = max(v._id for v in votes) self.state.kvstore[kv_namespace + '.cur_read_id'] = str(max_id + 1) self.state.commit() print('Done with {0}!'.format(rel._type_name))
def user_vote_change_links(period=None, user=None): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append( (sa.cast(link_dt.c.value,sa.Integer) == subreddit._id, subreddit.post_karma_multiplier) ) cases.append( (True, g.post_karma_multiplier) ) # The default article multiplier query = sa.and_(author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id) if period is not None: earliest = datetime.now(g.tz) - timedelta(0, period) query.clauses.extend((rt.c.date >= earliest, link_tt.c.date >= earliest)) if user is not None: query.clauses.append(author_dt.c.value == str(user._id)) s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases))], query, group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def export_rel_votes(self, votes_on_cls, table): # Vote.vote(c.user, link, action == 'like', request.ip) processed = 0 rel = Vote.rel(Account, votes_on_cls) max_id = self.max_rel_type_id(rel) print >>sys.stderr, "%d %s to process" % (max_id, table.name) for vote_id in xrange(max_id): try: vote = rel._byID(vote_id, data=True) except NotFound: continue try: row = ( vote._id, vote._thing1_id, # Account vote._thing2_id, # Link/Comment (votes_on_cls) vote._name, # Vote value vote._date ) except AttributeError: print >>sys.stderr, " vote with id %d is broken, skipping" % vote_id continue table.insert(values=row).execute() processed += 1 self.update_progress(processed)
def user_vote_change_links(period = '1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append( (sa.cast(link_dt.c.value,sa.Integer) == subreddit._id, subreddit.post_karma_multiplier) ) cases.append( (True, g.post_karma_multiplier) ) # The default article multiplier date = utils.timeago(period) s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases))], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_tt.c.date >= date, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id), group_by = author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def user_vote_change_comments(period=None, user=None): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) amount = sa.cast(rt.c.name, sa.Integer) cols = [ author_dt.c.value, sa.func.sum(sa.case([(amount > 0, amount)], else_=0)), sa.func.sum(sa.case([(amount < 0, amount * -1)], else_=0)), ] query = sa.and_(author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', comment_tt.c.thing_id == rt.c.thing2_id) if period is not None: earliest = datetime.now(g.tz) - timedelta(0, period) query.clauses.extend( (rt.c.date >= earliest, comment_tt.c.date >= earliest)) if user is not None: query.clauses.append(author_dt.c.value == str(user._id)) s = sa.select(cols, query, group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r[0]), (r[1], r[2])) for r in rows]
def vote_scan(self, cls2, karma_kind, mult_func): rel = Vote.rel(Account, cls2) votes = list(rel._query()) for vote in votes: thing = cls2._byID(vote._thing2_id, data=True) sr = thing.subreddit_slow mult = 1 #mult_func(thing) amt = int(vote._name) gravity = 'ups' if amt >= 0 else 'downs' key = 'karma_{0}_{1}_{2}'.format(gravity, karma_kind, sr.name) self.new_values[thing.author_id][key] += abs(amt * mult)
def user_vote_change_links(period=None, user=None): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append((sa.cast(link_dt.c.value, sa.Integer) == subreddit._id, subreddit.post_karma_multiplier)) cases.append( (True, g.post_karma_multiplier)) # The default article multiplier weight_cases = sa.case(cases) amount = sa.cast(rt.c.name, sa.Integer) cols = [ author_dt.c.value, sa.func.sum(sa.case([(amount > 0, amount * weight_cases)], else_=0)), sa.func.sum( sa.case([(amount < 0, amount * -1 * weight_cases)], else_=0)), ] query = sa.and_(author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id) if period is not None: # Note: limiting this to only recent items is intentional # http://lesswrong.com/r/discussion/lw/ghy/minor_separate_upvotes_and_downvotes_implimented/8d7f earliest = datetime.now(g.tz) - timedelta(0, period) query.clauses.extend( (rt.c.date >= earliest, link_tt.c.date >= earliest)) if user is not None: query.clauses.append(author_dt.c.value == str(user._id)) s = sa.select(cols, query, group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r[0]), (r[1], r[2])) for r in rows]
def get_likes(user, items): if not user or not items: return {} res = {} # check the prequeued_vote_keys keys = {} for item in items: if (user, item) in res: continue key = prequeued_vote_key(user, item) keys[key] = (user, item) if keys: r = g.cache.get_multi(keys.keys()) for key, v in r.iteritems(): res[keys[key]] = (True if v == '1' else False if v == '-1' else None) # avoid requesting items that they can't have voted on (we're # still using the tdb_sql Thing API for this). TODO: we should do # this before the prequeued_vote_keys lookup, so that in extreme # cases we can avoid hitting the cache for them at all, but in the # current state that precludes brand new comments from appearing # to have been voted on for item in items: if (user, item) in res: continue # we can only vote on links and comments if isinstance(item, (Link, Comment)): rel = Vote.rel(user.__class__, item.__class__) if rel._can_skip_lookup(user, item): res[(user, item)] = None else: res[(user, item)] = None # now hit Cassandra with the remainder likes = Vote.likes(user, [i for i in items if (user, i) not in res]) res.update(likes) return res
def get_likes(user, items): if not user or not items: return {} res = {} # check the prequeued_vote_keys keys = {} for item in items: if (user, item) in res: continue key = prequeued_vote_key(user, item) keys[key] = (user, item) if keys: r = g.cache.get_multi(keys.keys()) for key, v in r.iteritems(): res[keys[key]] = (True if v == '1' else False if v == '-1' else None) # avoid requesting items that they can't have voted on (we're # still using the tdb_sql Thing API for this). TODO: we should do # this before the prequeued_vote_keys lookup, so that in extreme # cases we can avoid hitting the cache for them at all, but in the # current state that precludes brand new comments from appearing # to have been voted on for item in items: if (user, item) in res: continue # we can only vote on links and comments if isinstance(item, (Link, Comment)): rel = Vote.rel(user.__class__, item.__class__) if rel._can_skip_lookup(user, item): res[(user, item)] = None else: res[(user, item)] = None # now hit Cassandra with the remainder likes = Vote.likes(user, [i for i in items if (user, i) not in res]) res.update(likes) return res
def user_vote_change_links(period=None, user=None): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append( (sa.cast(link_dt.c.value,sa.Integer) == subreddit._id, subreddit.post_karma_multiplier) ) cases.append( (True, g.post_karma_multiplier) ) # The default article multiplier weight_cases = sa.case(cases) amount = sa.cast(rt.c.name, sa.Integer) cols = [ author_dt.c.value, sa.func.sum(sa.case([(amount > 0, amount * weight_cases)], else_=0)), sa.func.sum(sa.case([(amount < 0, amount * -1 * weight_cases)], else_=0)), ] query = sa.and_(author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id) if period is not None: # Note: limiting this to only recent items is intentional # http://lesswrong.com/r/discussion/lw/ghy/minor_separate_upvotes_and_downvotes_implimented/8d7f earliest = datetime.now(g.tz) - timedelta(0, period) query.clauses.extend((rt.c.date >= earliest, link_tt.c.date >= earliest)) if user is not None: query.clauses.append(author_dt.c.value == str(user._id)) s = sa.select(cols, query, group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r[0]), (r[1], r[2])) for r in rows]
def top_user_change(period = '1 day'): rel = Vote.rel(Account, Link) rt, account, link, dt = tdb.get_rel_table(rel._type_id) author = dt.alias() date = utils.timeago(period) s = sa.select([author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == 'author_id'), group_by = author.c.value, order_by = sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit = 10) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def user_vote_change_comments(period = '1 day'): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id'), group_by = author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def top_user_change(period="1 day"): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account, link, dt = type.rel_table aliases = tdb.alias_generator() author = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == "author_id"), group_by=author.c.value, order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit=10, ) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def top_user_change(period='1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account, link, dt = type.rel_table aliases = tdb.alias_generator() author = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == 'author_id'), group_by=author.c.value, order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit=10) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def user_vote_change_links(period='1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append((sa.cast(link_dt.c.value, sa.Integer) == subreddit._id, subreddit.post_karma_multiplier)) cases.append( (True, g.post_karma_multiplier)) # The default article multiplier date = utils.timeago(period) s = sa.select([ author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases)) ], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_tt.c.date >= date, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id), group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def user_vote_change_comments(period='1 day'): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', comment_tt.c.thing_id == rt.c.thing2_id, comment_tt.c.date >= date), group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def rel_query(rel, thing_id, name, filters = []): """General relationship query.""" q = rel._query(rel.c._thing1_id == thing_id, rel.c._t2_deleted == False, rel.c._name == name, sort = desc('_date'), eager_load = True, thing_data = not g.use_query_cache ) if filters: q._filter(*filters) return q vote_rel = Vote.rel(Account, Link) cached_userrel_query = cached_query(UserQueryCache, filter_thing2) cached_srrel_query = cached_query(SubredditQueryCache, filter_thing2) migrating_cached_userrel_query = migrating_cached_query(UserQueryCache, filter_thing2) migrating_cached_srrel_query = migrating_cached_query(SubredditQueryCache, filter_thing2) @migrating_cached_userrel_query def get_liked(user): return rel_query(vote_rel, user, '1') @migrating_cached_userrel_query def get_disliked(user): return rel_query(vote_rel, user, '-1') @cached_userrel_query
def user_rel_query(rel, user, name, filters=[]): """General user relationship query.""" q = rel._query(rel.c._thing1_id == user._id, rel.c._t2_deleted == False, rel.c._name == name, sort=desc('_date'), eager_load=True, thing_data=not g.use_query_cache) if filters: q._filter(*filters) return make_results(q, filter_thing2) vote_rel = Vote.rel(Account, Link) def get_liked(user): return user_rel_query(vote_rel, user, '1') def get_disliked(user): return user_rel_query(vote_rel, user, '-1') def get_hidden(user): return user_rel_query(SaveHide, user, 'hide') def get_saved(user):