def get_all_comments(self): from r2.lib.db import queries from r2.models import Comment from r2.controllers.errors import UserRequiredException if not c.user_is_loggedin: raise UserRequiredException friends = self.get_important_friends(c.user._id) if not friends: return [] if g.use_query_cache: # with the precomputer enabled, this Subreddit only supports # being sorted by 'new'. it would be nice to have a # cleaner UI than just blatantly ignoring their sort, # though sort = 'new' time = 'all' friends = Account._byID(friends, return_dict=False) crs = [ queries.get_comments(friend, sort, time) for friend in friends ] return queries.MergedCachedResults(crs) else: q = Comment._query(Comment.c.author_id == friends, sort=desc('_date'), data=True) return q
def get_deleted_comments(user_id): return Comment._query( Comment.c.author_id == user_id, Comment.c._deleted == True, Comment.c._spam == (True, False), sort=db_sort("new"), )
def get_all_comments(self): from r2.lib.db import queries from r2.models import Comment from r2.controllers.errors import UserRequiredException if not c.user_is_loggedin: raise UserRequiredException friends = self.get_important_friends(c.user._id) if not friends: return [] if g.use_query_cache: # with the precomputer enabled, this Subreddit only supports # being sorted by 'new'. it would be nice to have a # cleaner UI than just blatantly ignoring their sort, # though sort = "new" time = "all" friends = Account._byID(friends, return_dict=False) crs = [queries.get_comments(friend, sort, time) for friend in friends] return queries.MergedCachedResults(crs) else: q = Comment._query(Comment.c.author_id == friends, sort=desc("_date"), data=True) return q
def post_process_post(self, post): """Perform post processsing to rewrite URLs and generate mapping between old and new permalinks""" post.article = self.rewrite_ob_urls(post.article) post._commit() comments = Comment._query(Comment.c.link_id == post._id, data=True) for comment in comments: comment.body = self.rewrite_ob_urls(comment.body) comment._commit()
def post_process_post(self, post): """Perform post processsing to rewrite URLs and generate mapping between old and new permalinks""" post.article = self.rewrite_ob_urls(post.article) post._commit() comments = Comment._query(Comment.c.link_id == post._id, data = True) for comment in comments: comment.body = self.rewrite_ob_urls(comment.body) comment._commit()
def comment_exists(post, comment): # Check if this comment already exists using brutal compare on content # BeautifulSoup is used to parse as HTML in order to remove markup content = ''.join(BeautifulSoup(comment['body']).findAll(text=True)) key = re_non_alphanum.sub('', content) existing_comments = Comment._query(Comment.c.link_id == post._id, Comment.c.ob_imported == True, data=True) for existing_comment in existing_comments: author = Account._byID(existing_comment.author_id, data=True) content = ''.join(BeautifulSoup(existing_comment.body).findAll(text=True)) existing_key = re_non_alphanum.sub('', content) if key == existing_key: print " Skipping existing %s" % comment_excerpt(comment) return True # else: # print "%s *|NOT|* %s" % (key, existing_key) return False
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data = True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] iteration_count = 0 while todo: if iteration_count > MAX_ITERATIONS: raise Exception("bad comment tree for link %s" % link_id) more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) iteration_count += 1 num_children[cm_id] = num num_comments = sum(1 for c in comments if not c._deleted) return cids, comment_tree, depth, num_children, num_comments
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] iteration_count = 0 while todo: if iteration_count > MAX_ITERATIONS: raise Exception("bad comment tree for link %s" % link_id) more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) iteration_count += 1 num_children[cm_id] = num num_comments = sum(1 for c in comments if not c._deleted) return cids, comment_tree, depth, num_children, num_comments
def fix_images(dryrun=True): from r2.models import Link, Comment links = Link._query(Link.c.ob_permalink != None, data=True) for link in links: ob_url = link.ob_permalink.strip() print "Processing %s" % ob_url new_content = process_content(link.article) if not dryrun: link.article = new_content link._commit() comments = Comment._query(Comment.c.link_id == link._id, data=True) for comment in comments: new_content = process_content(comment.body) if not dryrun: comment.body = new_content comment._commit()
def fix_images(dryrun=True): from r2.models import Link, Comment links = Link._query(Link.c.ob_permalink != None, data = True) for link in links: ob_url = link.ob_permalink.strip() print "Processing %s" % ob_url new_content = process_content(link.article) if not dryrun: link.article = new_content link._commit() comments = Comment._query(Comment.c.link_id == link._id, data = True) for comment in comments: new_content = process_content(comment.body) if not dryrun: comment.body = new_content comment._commit()
def _load_br_criticisms(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data = True) comments = list(q) brs = [c for c in comments if c.bestresponse==True] #print "SQUIRRREL!" #print comments #print link_id outs = [c._id for c in brs] ups = [c.parent_id for c in brs if c.parent_id] while ups: brs = [c for c in comments if c._id in ups] outs.extend([c._id for c in brs]) ups = [c.parent_id for c in brs if c.parent_id] return outs
def _populate(after_id=None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date")) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate=estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, "link_id"), chunk) update_comment_votes(chunk, write_consistency_level=tdb_cassandra.CL.ONE)
def spam_account_comments(self, account, query_limit=10000, spam_limit=500): from r2.lib.db.operators import asc, desc, timeago q = Comment._query(Comment.c.author_id == account._id, Link.c._spam == False, sort=desc('_date'), data=False) q._limit = query_limit things = list(q) processed = 0 for item in things: if processed < spam_limit: verdict = getattr(item, "verdict", None) if not verdict or not verdict.endswith("-approved"): processed += 1 admintools.spam(item, auto=False, moderator_banned=False, banner=None, train_spam=True)
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query( Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] while todo: more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) num_children[cm_id] = num return cids, comment_tree, depth, num_children
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] while todo: more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) num_children[cm_id] = num return cids, comment_tree, depth, num_children
def _populate(after_id = None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam==(True,False), Comment.c._deleted==(True,False), sort=desc('_date')) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate = estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, 'link_id'), chunk) update_comment_votes(chunk, write_consistency_level = tdb_cassandra.CL.ONE)
def get_spam_comments(sr_id): return Comment._query(Comment.c.sr_id == sr_id, Comment.c._spam == True, sort = db_sort('new'))
def _query_comments(self, *args): kwargs = {'data': True} q = Comment._query(*args, **kwargs) comments = list(q) return comments
Subreddit, ) LINK_GILDING_START = datetime(2014, 2, 1, 0, 0, tzinfo=g.tz) COMMENT_GILDING_START = datetime(2012, 10, 1, 0, 0, tzinfo=g.tz) queries = [ Link._query( Link.c.gildings != 0, Link.c._date > LINK_GILDING_START, data=True, sort=desc('_date'), ), Comment._query( Comment.c.gildings != 0, Comment.c._date > COMMENT_GILDING_START, data=True, sort=desc('_date'), ), ] seconds_by_srid = defaultdict(int) gilding_price = g.gold_month_price.pennies for q in queries: for things in fetch_things2(q, chunks=True, chunk_size=100): print things[0]._fullname for thing in things: seconds_per_gilding = calculate_server_seconds( gilding_price, thing._date) seconds_by_srid[thing.sr_id] += int(thing.gildings *
def get_all_comments(): """the master /comments page""" return Comment._query(sort=desc('_date'))
def get_all_comments(): """the master /comments page""" q = Comment._query(sort=desc('_date')) return make_results(q)
def get_all_comments(): """the master /comments page""" q = Comment._query(sort = desc('_date')) return make_results(q)
def get_spam_comments(sr): q_c = Comment._query(Comment.c.sr_id == sr._id, Comment.c._spam == True, sort=db_sort("new")) return make_results(q_c)
def get_reported_comments(sr): q_c = Comment._query( Comment.c.reported != 0, Comment.c.sr_id == sr._id, Comment.c._spam == False, sort=db_sort("new") ) return make_results(q_c)
def get_spam_filtered_comments(sr_id): return Comment._query(Comment.c.sr_id == sr_id, Comment.c._spam == True, Comment.c.verdict != 'mod-removed', sort = db_sort('new'))
def get_reported_comments(sr_id): return Comment._query(Comment.c.reported != 0, Comment.c.sr_id == sr_id, Comment.c._spam == False, sort = db_sort('new'))
def _get_criticisms(user_id, sort, time): q = Comment._query(Comment.c.author_id == user_id, Comment.c.criticism==True, sort = db_sort(sort),data=True) return make_results(q)
def get_deleted_comments(user_id): return Comment._query(Comment.c.author_id == user_id, Comment.c._deleted == True, Comment.c._spam == (True, False), sort=db_sort('new'))
def get_spam_comments(sr): q_c = Comment._query(Comment.c.sr_id == sr._id, Comment.c._spam == True, sort=db_sort('new')) return make_results(q_c)
def _get_sr_comments(sr_id): """the subreddit /r/foo/comments page""" q = Comment._query(Comment.c.sr_id == sr_id, sort = desc('_date')) return make_results(q)
def get_reported_comments(sr): q_c = Comment._query(Comment.c.reported != 0, Comment.c.sr_id == sr._id, Comment.c._spam == False, sort=db_sort('new')) return make_results(q_c)
from collections import defaultdict from datetime import datetime from pylons import g from r2.lib.db.operators import desc from r2.lib.utils import fetch_things2 from r2.models import calculate_server_seconds, Comment, Link, Subreddit LINK_GILDING_START = datetime(2014, 2, 1, 0, 0, tzinfo=g.tz) COMMENT_GILDING_START = datetime(2012, 10, 1, 0, 0, tzinfo=g.tz) queries = [ Link._query(Link.c.gildings != 0, Link.c._date > LINK_GILDING_START, data=True, sort=desc("_date")), Comment._query(Comment.c.gildings != 0, Comment.c._date > COMMENT_GILDING_START, data=True, sort=desc("_date")), ] seconds_by_srid = defaultdict(int) gilding_price = g.gold_month_price.pennies for q in queries: for things in fetch_things2(q, chunks=True, chunk_size=100): print things[0]._fullname for thing in things: seconds_per_gilding = calculate_server_seconds(gilding_price, thing._date) seconds_by_srid[thing.sr_id] += int(thing.gildings * seconds_per_gilding) for sr_id, seconds in seconds_by_srid: sr = Subreddit._byID(sr_id, data=True) print "%s: %s seconds" % (sr.name, seconds)