def _mock_comment(id=1, author_id=1, link_id=1, sr_id=1, can_comment=True, can_view_promo=True, is_moderator=False, **kwargs): kwargs['id'] = id kwargs['author_id'] = author_id kwargs['link_id'] = link_id comment = Comment(**kwargs) VByName.run = MagicMock(return_value=comment) link = Link(id=link_id) Link._byID = MagicMock(return_value=link) sr = Subreddit(id=sr_id) comment.subreddit_slow = sr link.subreddit_slow = sr Subreddit.can_comment = MagicMock(return_value=can_comment) Link.can_view_promo = MagicMock(return_value=can_view_promo) Subreddit.is_moderator = MagicMock(return_value=is_moderator) return comment
def _run_new_comment(msg): fname = msg.body comment = Comment._by_fullname(fname, data=True) sr = Subreddit._byID(comment.sr_id) add_queries( [get_all_comments(), get_sr_comments(sr)], insert_items=[comment])
def process_comments_on_post(post, comments): for comment in comments: if comment_exists(post, comment): continue # Prepare data for import ip = '127.0.0.1' naive_date = datetime.datetime.strptime(comment['dateCreated'], DATE_FORMAT) local_date = INPUT_TIMEZONE.localize(naive_date, is_dst=False) # Pick the non daylight savings time utc_date = local_date.astimezone(pytz.utc) # Determine account to use for this comment account = get_or_create_account(comment['author']) if not dryrun: # Create new comment new_comment, inbox_rel = Comment._new(account, post, None, comment['body'], ip, date=utc_date) new_comment.is_html = True new_comment.ob_imported = True new_comment._commit() try: print " Imported as '%s' %s" % (account.name.decode('utf-8').encode('utf-8'), comment_excerpt(comment).decode('utf-8').encode('utf-8')) except UnicodeError: print " Imported comment"
def process_comment(self, comment_data, comment, post): # Prepare data for import ip = '127.0.0.1' if comment_data: naive_date = datetime.datetime.strptime(comment_data['dateCreated'], DATE_FORMAT) local_date = INPUT_TIMEZONE.localize(naive_date, is_dst=False) # Pick the non daylight savings time utc_date = local_date.astimezone(pytz.utc) # Determine account to use for this comment account = self._get_or_create_account(comment_data['author'], comment_data['authorEmail']) if comment_data and not comment: # Create new comment comment, inbox_rel = Comment._new(account, post, None, comment_data['body'], ip, date=utc_date) comment.is_html = True comment.ob_imported = True comment._commit() elif comment_data and comment: # Overwrite existing comment comment.author_id = account._id comment.body = comment_data['body'] comment.ip = ip comment._date = utc_date comment.is_html = True comment.ob_imported = True comment._commit() elif not comment_data and comment: # Not enough comment data being imported to overwrite all comments print 'WARNING: More comments in lesswrong than we are importing, ignoring additional comment in lesswrong'
def process_message(msg): from r2.lib.db.queries import ( add_queries, add_to_commentstree_q, get_comments, ) vote_data = json.loads(msg.body) hook = hooks.get_hook('vote.validate_vote_data') if hook.call_until_return(msg=msg, vote_data=vote_data) is False: # Corrupt records in the queue. Ignore them. print "Ignoring invalid vote by %s on %s %s" % ( vote_data.get('user_id', '<unknown>'), vote_data.get('thing_fullname', '<unknown>'), vote_data) return timer = g.stats.get_timer("comment_vote_processor") timer.start() user = Account._byID(vote_data.pop("user_id")) comment = Comment._by_fullname(vote_data.pop("thing_fullname")) print "Processing vote by %s on %s %s" % (user, comment, vote_data) try: vote = Vote( user, comment, direction=vote_data["direction"], date=datetime.utcfromtimestamp(vote_data["date"]), data=vote_data["data"], event_data=vote_data.get("event_data"), ) except TypeError as e: # a vote on an invalid type got in the queue, just skip it g.log.exception("Invalid type: %r", e.message) return vote.commit() timer.intermediate("create_vote_object") vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score comment_valid = not (comment._spam or comment._deleted) if vote_valid and comment_valid: author = Account._byID(comment.author_id) add_queries( queries=[get_comments(author, sort, 'all') for sort in SORTS], insert_items=comment, ) timer.intermediate("author_queries") # update the score periodically when a comment has many votes update_threshold = g.live_config['comment_vote_update_threshold'] update_period = g.live_config['comment_vote_update_period'] num_votes = comment.num_votes if num_votes <= update_threshold or num_votes % update_period == 0: add_to_commentstree_q(comment) timer.stop() timer.flush()
def activate_names_requested_in(link): tree = get_comment_tree(link) acceptable_names = [] if tree.tree: top_level_cids = tree.tree[None] comments = chain.from_iterable(Comment._byID(chunk, return_dict=False, data=True) for chunk in in_chunks(top_level_cids)) for comment in sorted(comments, key=lambda c: c._ups, reverse=True): if comment._spam or comment._deleted: continue sanitized = comment.body.strip() match = valid_name_re.search(sanitized) if match: acceptable_names.append((comment, match.group(1))) # we activate one name for each 100% of rev goal met names = acceptable_names[:link.revenue_bucket] activate_names(link, names) activated_names = [name for comment, name in names] link.server_names = activated_names link.flair_text = ", ".join(activated_names) if names else "/dev/null" link.flair_css_class = "goal-bucket-%d" % link.revenue_bucket link._commit()
def get_all_comments(self): from r2.lib.db import queries from r2.models import Comment from r2.controllers.errors import UserRequiredException if not c.user_is_loggedin: raise UserRequiredException friends = self.get_important_friends(c.user._id) if not friends: return [] if g.use_query_cache: # with the precomputer enabled, this Subreddit only supports # being sorted by 'new'. it would be nice to have a # cleaner UI than just blatantly ignoring their sort, # though sort = 'new' time = 'all' friends = Account._byID(friends, return_dict=False) crs = [ queries.get_comments(friend, sort, time) for friend in friends ] return queries.MergedCachedResults(crs) else: q = Comment._query(Comment.c.author_id == friends, sort=desc('_date'), data=True) return q
def _run_commentstree(msgs, chan): comments = Comment._by_fullname([msg.body for msg in msgs], data=True, return_dict=False) print 'Processing %r' % (comments, ) add_comment_tree(comments)
def get_deleted_comments(user_id): return Comment._query( Comment.c.author_id == user_id, Comment.c._deleted == True, Comment.c._spam == (True, False), sort=db_sort("new"), )
def _mock_comment(self, id=1, author_id=1, link_id=1, sr_id=1, can_comment=True, can_view_promo=True, is_moderator=False, **kwargs): kwargs['id'] = id kwargs['author_id'] = author_id kwargs['link_id'] = link_id kwargs['sr_id'] = sr_id comment = Comment(**kwargs) self.autopatch(VByName, "run", return_value=comment) link = Link(id=link_id, sr_id=sr_id) self.autopatch(Link, "_byID", return_value=link) sr = Subreddit(id=sr_id) self.autopatch(Subreddit, "_byID", return_value=sr) self.autopatch(Subreddit, "can_comment", return_value=can_comment) self.autopatch(Link, "can_view_promo", return_value=can_view_promo) self.autopatch(Subreddit, "is_moderator", return_value=is_moderator) return comment
def process_comment(self, comment_data, comment, post, comment_dictionary): # Prepare data for import ip = '127.0.0.1' if comment_data: naive_date = datetime.datetime.strptime(comment_data['dateCreated'], DATE_FORMAT) local_date = INPUT_TIMEZONE.localize(naive_date, is_dst=False) # Pick the non daylight savings time utc_date = local_date.astimezone(pytz.utc) # Determine account to use for this comment account = self._get_or_create_account(comment_data['author'], comment_data['authorEmail']) if comment_data and not comment_data['author'].endswith("| The Effective Altruism Blog"): if not comment: # Create new comment comment, inbox_rel = Comment._new(account, post, None, comment_data['body'], ip, date=utc_date) if str(comment_data['commentParent']) in comment_dictionary: comment.parent_id = comment_dictionary[str(comment_data['commentParent'])] comment.is_html = True comment.ob_imported = True comment._commit() comment_dictionary[str(comment_data['commentId'])] = comment._id else: # Overwrite existing comment if str(comment_data['commentParent']) in comment_dictionary: comment.parent_id = comment_dictionary[str(comment_data['commentParent'])] comment.author_id = account._id comment.body = comment_data['body'] comment.ip = ip comment._date = utc_date comment.is_html = True comment.ob_imported = True comment._commit() comment_dictionary[str(comment_data['commentId'])] = comment._id
def rollback_account_votes(self, account): query = LinkVotesByAccount._cf.xget(account._id36) for thing_id, vote_state in query: link = Link._byID36(thing_id) if int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.onon]): # 3 link._incr("_ups", -1) link._incr("_downs", -1) elif int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.onoff]): # 4 link._incr("_ups", -1) elif int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.offon]): # 5 link._incr("_downs", -1) query = CommentVotesByAccount._cf.xget(account._id36) for thing_id, vote_state in query: comment = Comment._byID36(thing_id) if int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.onon]): # 3 comment._incr("_ups", -1) comment._incr("_downs", -1) elif int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.onoff]): # 4 comment._incr("_ups", -1) elif int(vote_state) == int( Vote.SERIALIZED_DIRECTIONS[Vote.DIRECTIONS.offon]): # 5 comment._incr("_downs", -1)
def get_all_comments(self): from r2.lib.db import queries from r2.models import Comment from r2.controllers.errors import UserRequiredException if not c.user_is_loggedin: raise UserRequiredException friends = self.get_important_friends(c.user._id) if not friends: return [] if g.use_query_cache: # with the precomputer enabled, this Subreddit only supports # being sorted by 'new'. it would be nice to have a # cleaner UI than just blatantly ignoring their sort, # though sort = "new" time = "all" friends = Account._byID(friends, return_dict=False) crs = [queries.get_comments(friend, sort, time) for friend in friends] return queries.MergedCachedResults(crs) else: q = Comment._query(Comment.c.author_id == friends, sort=desc("_date"), data=True) return q
def _run_new_comment(msg): fname = msg.body comment = Comment._by_fullname(fname,data=True) sr = Subreddit._byID(comment.sr_id) add_queries([get_all_comments(), get_sr_comments(sr)], insert_items = [comment])
def _mock_comment(id=1, author_id=1, link_id=1, sr_id=1, **kwargs): kwargs['id'] = id kwargs['author_id'] = author_id kwargs['link_id'] = link_id comment = Comment(**kwargs) VByName.run = MagicMock(return_value=comment) link = Link(id=link_id) Link._byID = MagicMock(return_value=link) sr = Subreddit(id=sr_id) comment.subreddit_slow = MagicMock(return_value=sr) comment.subreddit_slow.is_moderator = MagicMock(return_value=False) link.subreddit = sr return comment
def _run_new_comments(msgs, chan): fnames = [msg.body for msg in msgs] comments = Comment._by_fullname(fnames, data=True, return_dict=False) add_queries([get_all_comments()], insert_items=comments) bysrid = _by_srid(comments, False) for srid, sr_comments in bysrid.iteritems(): add_queries([_get_sr_comments(srid)], insert_items=sr_comments)
def post_process_post(self, post): """Perform post processsing to rewrite URLs and generate mapping between old and new permalinks""" post.article = self.rewrite_ob_urls(post.article) post._commit() comments = Comment._query(Comment.c.link_id == post._id, data=True) for comment in comments: comment.body = self.rewrite_ob_urls(comment.body) comment._commit()
def post_process_post(self, post): """Perform post processsing to rewrite URLs and generate mapping between old and new permalinks""" post.article = self.rewrite_ob_urls(post.article) post._commit() comments = Comment._query(Comment.c.link_id == post._id, data = True) for comment in comments: comment.body = self.rewrite_ob_urls(comment.body) comment._commit()
def comment_reply_effect(comment): if comment.parent_id is not None: parent = Comment._byID(comment.parent_id, data=True) else: parent = Link._byID(comment.link_id, data=True) all_effects = effects.get_all_effects([parent._fullname]) parent_effects = all_effects.get(parent._fullname, []) for item_name in parent_effects: item = items.get_item(item_name) item.on_reply(c.user, parent)
def _populate(after_id=None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date")) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate=estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, "link_id"), chunk) update_comment_votes(chunk, write_consistency_level=tdb_cassandra.CL.ONE)
def perform_actions(self, item, data): """Execute all the rule's actions against the item.""" for key, target in self.targets.iteritems(): target_item = self.get_target_item(item, data, key) target.perform_actions(target_item, data) if self.comment: comment = self.build_message(self.comment, item, data, disclaimer=True) # TODO: shouldn't have to do all this manually if isinstance(item, Comment): link = data["link"] parent_comment = item else: link = item parent_comment = None new_comment, inbox_rel = Comment._new( ACCOUNT, link, parent_comment, comment, None) new_comment.distinguished = "yes" new_comment._commit() queries.queue_vote(ACCOUNT, new_comment, True, None) queries.new_comment(new_comment, inbox_rel) g.stats.simple_event("automoderator.comment") if self.modmail: message = self.build_message(self.modmail, item, data, permalink=True) subject = replace_placeholders( self.modmail_subject, data, self.matches) subject = subject[:100] new_message, inbox_rel = Message._new(ACCOUNT, data["subreddit"], subject, message, None) new_message.distinguished = "yes" new_message._commit() queries.new_message(new_message, inbox_rel) g.stats.simple_event("automoderator.modmail") if self.message and not data["author"]._deleted: message = self.build_message(self.message, item, data, disclaimer=True, permalink=True) subject = replace_placeholders( self.message_subject, data, self.matches) subject = subject[:100] new_message, inbox_rel = Message._new(ACCOUNT, data["author"], subject, message, None) queries.new_message(new_message, inbox_rel) g.stats.simple_event("automoderator.message") PerformedRulesByThing.mark_performed(item, self)
def _populate(after_id = None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam==(True,False), Comment.c._deleted==(True,False), sort=desc('_date')) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate = estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, 'link_id'), chunk) update_comment_votes(chunk, write_consistency_level = tdb_cassandra.CL.ONE)
def validate_blob(custom): """Validate payment_blob and return a dict with everything looked up.""" ret = {} if not custom: raise GoldException('no custom') payment_blob = g.hardcache.get('payment_blob-%s' % str(custom)) if not payment_blob: raise GoldException('no payment_blob') if not ('account_id' in payment_blob and 'account_name' in payment_blob): raise GoldException('no account_id') try: buyer = Account._byID(payment_blob['account_id'], data=True) ret['buyer'] = buyer except NotFound: raise GoldException('bad account_id') if not buyer.name.lower() == payment_blob['account_name'].lower(): raise GoldException('buyer mismatch') goldtype = payment_blob['goldtype'] ret['goldtype'] = goldtype if goldtype == 'gift': recipient_name = payment_blob.get('recipient', None) if not recipient_name: raise GoldException('gift missing recpient') try: recipient = Account._by_name(recipient_name) ret['recipient'] = recipient except NotFound: raise GoldException('bad recipient') comment_fullname = payment_blob.get('comment', None) if comment_fullname: try: ret['comment'] = Comment._by_fullname(comment_fullname) except NotFound: raise GoldException('bad comment') ret['signed'] = payment_blob.get('signed', False) giftmessage = payment_blob.get('giftmessage') giftmessage = _force_unicode(giftmessage) if giftmessage else None ret['giftmessage'] = giftmessage elif goldtype not in ('onetime', 'autorenew', 'creddits'): raise GoldException('bad goldtype') return ret
def _run_commentstree(msg): fname = msg.body comment = Comment._by_fullname(fname, data=True) link = Link._byID(comment.link_id, data=True) try: add_comment_tree(comment, link) except KeyError: # Hackity hack. Try to recover from a corrupted comment # tree print "Trying to fix broken comments-tree." link_comments(link._id, _update=True) add_comment_tree(comment, link)
def activate_names(link, names): for comment, name in names: # find a slot to assign a name to. we'll prefer nodes that are # currently empty, and failing that find the least-recently-modified # node. ROOT = "/gold/server-names" slot_names = g.zookeeper.get_children(ROOT) slots = [(slot_name, g.zookeeper.get(os.path.join(ROOT, slot_name))) for slot_name in slot_names] slots.sort(key=lambda (path, (data, stat)): (bool(data), stat.mtime)) slot_path = os.path.join(ROOT, slots[0][0]) comment_data = { 'name': str(name), 'permalink': comment.make_permalink_slow() } g.zookeeper.set(slot_path, json.dumps(comment_data)) lock = g.zookeeper.Lock(slot_path) lock_contenders = lock.contenders() old_name = lock_contenders[0] if lock_contenders else "" old_name = old_name or "one of our servers" # reply to the user wp = WikiPage.get(SERVERNAME_SR, "templates/success-reply") template = random.choice(wp._get("content").split("\r\n---\r\n")) comment, inbox_rel = Comment._new( author=SYSTEM_ACCOUNT, link=link, parent=comment, body=template % { "old-name": old_name, "new-name": name, }, ip="127.0.0.1", ) queries.new_comment(comment, inbox_rel) # update the link's text wp = WikiPage.get(SERVERNAME_SR, "templates/goldisms") goldism = random.choice(wp._get("content").split("\r\n---\r\n")) wp = WikiPage.get(SERVERNAME_SR, "templates/selftext-success") template = wp._get("content") link.selftext = template % { "old-name": old_name, "new-name": name, "goldism": goldism, } link._commit()
def validate_blob(custom): """Validate payment_blob and return a dict with everything looked up.""" ret = {} if not custom: raise GoldException("no custom") payment_blob = g.hardcache.get("payment_blob-%s" % str(custom)) if not payment_blob: raise GoldException("no payment_blob") if not ("account_id" in payment_blob and "account_name" in payment_blob): raise GoldException("no account_id") try: buyer = Account._byID(payment_blob["account_id"], data=True) ret["buyer"] = buyer except NotFound: raise GoldException("bad account_id") if not buyer.name.lower() == payment_blob["account_name"].lower(): raise GoldException("buyer mismatch") goldtype = payment_blob["goldtype"] ret["goldtype"] = goldtype if goldtype == "gift": recipient_name = payment_blob.get("recipient", None) if not recipient_name: raise GoldException("gift missing recpient") try: recipient = Account._by_name(recipient_name) ret["recipient"] = recipient except NotFound: raise GoldException("bad recipient") comment_fullname = payment_blob.get("comment", None) if comment_fullname: try: ret["comment"] = Comment._by_fullname(comment_fullname) except NotFound: raise GoldException("bad comment") ret["signed"] = payment_blob.get("signed", False) giftmessage = payment_blob.get("giftmessage") giftmessage = _force_unicode(giftmessage) if giftmessage else None ret["giftmessage"] = giftmessage elif goldtype not in ("onetime", "autorenew", "creddits"): raise GoldException("bad goldtype") return ret
def activate_names(link, names): for comment, name in names: # find a slot to assign a name to. we'll prefer nodes that are # currently empty, and failing that find the least-recently-modified # node. ROOT = "/gold/server-names" slot_names = g.zookeeper.get_children(ROOT) slots = [(slot_name, g.zookeeper.get(os.path.join(ROOT, slot_name))) for slot_name in slot_names] slots.sort(key=lambda (path, (data, stat)): (bool(data), stat.mtime)) slot_path = os.path.join(ROOT, slots[0][0]) comment_data = {'name': str(name), 'permalink': comment.make_permalink_slow()} g.zookeeper.set(slot_path, json.dumps(comment_data)) lock = g.zookeeper.Lock(slot_path) lock_contenders = lock.contenders() old_name = lock_contenders[0] if lock_contenders else "" old_name = old_name or "one of our servers" # reply to the user wp = WikiPage.get(SERVERNAME_SR, "templates/success-reply") template = random.choice(wp._get("content").split("\r\n---\r\n")) comment, inbox_rel = Comment._new( author=SYSTEM_ACCOUNT, link=link, parent=comment, body=template % { "old-name": old_name, "new-name": name, }, ip="127.0.0.1", ) queries.queue_vote(SYSTEM_ACCOUNT, comment, dir=True, ip="127.0.0.1") queries.new_comment(comment, inbox_rel) # update the link's text wp = WikiPage.get(SERVERNAME_SR, "templates/goldisms") goldism = random.choice(wp._get("content").split("\r\n---\r\n")) wp = WikiPage.get(SERVERNAME_SR, "templates/selftext-success") template = wp._get("content") link.selftext = template % { "old-name": old_name, "new-name": name, "goldism": goldism, } link._commit()
def fix_bare_links(apply=False): from r2.models import Comment from r2.lib.db.thing import NotFound fbefore = codecs.open('fix_bare_links_before.txt', 'w', 'utf-8') fafter = codecs.open('fix_bare_links_after.txt', 'w', 'utf-8') comment_id = 1 try: # The comments are retrieved like this to prevent the API from # attempting to load all comments at once and then iterating over them while True: comment = Comment._byID(comment_id, data=True) if (hasattr(comment, 'ob_imported') and comment.ob_imported) and (hasattr(comment, 'is_html') and comment.is_html): body = comment.body if isinstance(body, str): try: body = body.decode('utf-8') except UnicodeDecodeError: print >> sys.stderr, "UnicodeDecodeError, using 'ignore' error mode, comment: %d" % comment._id body = body.decode('utf-8', errors='ignore') new_content = rewrite_bare_links(body) if new_content != body: print >> fbefore, body print >> fafter, new_content if apply: comment.body = new_content comment._commit() try: print >> sys.stderr, "Rewrote comment %s" % comment.make_permalink_slow( ).encode('utf-8') except UnicodeError: print >> sys.stderr, "Rewrote comment with id: %d" % comment._id comment_id += 1 except NotFound: # Assumes that comment ids are sequential and never deleted # (which I believe to true) -- wjm print >> sys.stderr, "Comment %d not found, exiting" % comment_id return
def message_notification_email(data): """Queues a system email for a new message notification.""" from r2.lib.pages import MessageNotificationEmail MAX_EMAILS_PER_DAY = 1000 MESSAGE_THROTTLE_KEY = 'message_notification_emails' # If our counter's expired, initialize it again. g.cache.add(MESSAGE_THROTTLE_KEY, 0, time=24 * 60 * 60) for datum in data.itervalues(): datum = json.loads(datum) user = Account._byID36(datum['to'], data=True) comment = Comment._by_fullname(datum['comment'], data=True) # In case a user has enabled the preference while it was enabled for # them, but we've since turned it off. We need to explicitly state the # user because we're not in the context of an HTTP request from them. if not feature.is_enabled('orangereds_as_emails', user=user): continue if g.cache.get(MESSAGE_THROTTLE_KEY) > MAX_EMAILS_PER_DAY: raise Exception( 'Message notification emails: safety limit exceeded!') mac = generate_notification_email_unsubscribe_token( datum['to'], user_email=user.email, user_password_hash=user.password) base = g.https_endpoint or g.origin unsubscribe_link = base + '/mail/unsubscribe/%s/%s' % (datum['to'], mac) templateData = { 'sender_username': datum.get('from', ''), 'comment': comment, 'permalink': datum['permalink'], 'unsubscribe_link': unsubscribe_link, } _system_email( user.email, MessageNotificationEmail(**templateData).render(style='email'), Email.Kind.MESSAGE_NOTIFICATION, from_address=g.notification_email) g.stats.simple_event('email.message_notification.queued') g.cache.incr(MESSAGE_THROTTLE_KEY)
def process_comment(self, comment_data, comment, post, comment_dictionary): # Prepare data for import ip = '127.0.0.1' if comment_data: naive_date = datetime.datetime.strptime( comment_data['dateCreated'], DATE_FORMAT) local_date = INPUT_TIMEZONE.localize( naive_date, is_dst=False) # Pick the non daylight savings time utc_date = local_date.astimezone(pytz.utc) # Determine account to use for this comment account = self._get_or_create_account(comment_data['author'], comment_data['authorEmail']) if comment_data and not comment_data['author'].endswith( "| The Effective Altruism Blog"): if not comment: # Create new comment comment, inbox_rel = Comment._new(account, post, None, comment_data['body'], ip, date=utc_date) if str(comment_data['commentParent']) in comment_dictionary: comment.parent_id = comment_dictionary[str( comment_data['commentParent'])] comment.is_html = True comment.ob_imported = True comment._commit() comment_dictionary[str( comment_data['commentId'])] = comment._id else: # Overwrite existing comment if str(comment_data['commentParent']) in comment_dictionary: comment.parent_id = comment_dictionary[str( comment_data['commentParent'])] comment.author_id = account._id comment.body = comment_data['body'] comment.ip = ip comment._date = utc_date comment.is_html = True comment.ob_imported = True comment._commit() comment_dictionary[str( comment_data['commentId'])] = comment._id
def comment_exists(post, comment): # Check if this comment already exists using brutal compare on content # BeautifulSoup is used to parse as HTML in order to remove markup content = ''.join(BeautifulSoup(comment['body']).findAll(text=True)) key = re_non_alphanum.sub('', content) existing_comments = Comment._query(Comment.c.link_id == post._id, Comment.c.ob_imported == True, data=True) for existing_comment in existing_comments: author = Account._byID(existing_comment.author_id, data=True) content = ''.join(BeautifulSoup(existing_comment.body).findAll(text=True)) existing_key = re_non_alphanum.sub('', content) if key == existing_key: print " Skipping existing %s" % comment_excerpt(comment) return True # else: # print "%s *|NOT|* %s" % (key, existing_key) return False
def _run_commentstree(msgs, chan): fnames = [msg.body for msg in msgs] comments = Comment._by_fullname(fnames, data=True, return_dict=False) links = Link._byID(set(cm.link_id for cm in comments), data=True, return_dict=True) # add the comment to the comments-tree for comment in comments: l = links[comment.link_id] try: add_comment_tree(comment, l) except KeyError: # Hackity hack. Try to recover from a corrupted # comment tree print "Trying to fix broken comments-tree." link_comments(l._id, _update=True) add_comment_tree(comment, l)
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] iteration_count = 0 while todo: if iteration_count > MAX_ITERATIONS: raise Exception("bad comment tree for link %s" % link_id) more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) iteration_count += 1 num_children[cm_id] = num num_comments = sum(1 for c in comments if not c._deleted) return cids, comment_tree, depth, num_children, num_comments
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data = True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] iteration_count = 0 while todo: if iteration_count > MAX_ITERATIONS: raise Exception("bad comment tree for link %s" % link_id) more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) iteration_count += 1 num_children[cm_id] = num num_comments = sum(1 for c in comments if not c._deleted) return cids, comment_tree, depth, num_children, num_comments
def fix_bare_links(apply=False): from r2.models import Comment from r2.lib.db.thing import NotFound fbefore = codecs.open('fix_bare_links_before.txt', 'w', 'utf-8') fafter = codecs.open('fix_bare_links_after.txt', 'w', 'utf-8') comment_id = 1 try: # The comments are retrieved like this to prevent the API from # attempting to load all comments at once and then iterating over them while True: comment = Comment._byID(comment_id, data=True) if (hasattr(comment, 'ob_imported') and comment.ob_imported) and (hasattr(comment, 'is_html') and comment.is_html): body = comment.body if isinstance(body, str): try: body = body.decode('utf-8') except UnicodeDecodeError: print >>sys.stderr, "UnicodeDecodeError, using 'ignore' error mode, comment: %d" % comment._id body = body.decode('utf-8', errors='ignore') new_content = rewrite_bare_links(body) if new_content != body: print >>fbefore, body print >>fafter, new_content if apply: comment.body = new_content comment._commit() try: print >>sys.stderr, "Rewrote comment %s" % comment.make_permalink_slow().encode('utf-8') except UnicodeError: print >>sys.stderr, "Rewrote comment with id: %d" % comment._id comment_id += 1 except NotFound: # Assumes that comment ids are sequential and never deleted # (which I believe to true) -- wjm print >>sys.stderr, "Comment %d not found, exiting" % comment_id return
def message_notification_email(data): """Queues a system email for a new message notification.""" from r2.lib.pages import MessageNotificationEmail MAX_EMAILS_PER_DAY = 1000 MESSAGE_THROTTLE_KEY = 'message_notification_emails' # If our counter's expired, initialize it again. g.cache.add(MESSAGE_THROTTLE_KEY, 0, time=24*60*60) for datum in data.itervalues(): datum = json.loads(datum) user = Account._byID36(datum['to'], data=True) comment = Comment._by_fullname(datum['comment'], data=True) # In case a user has enabled the preference while it was enabled for # them, but we've since turned it off. We need to explicitly state the # user because we're not in the context of an HTTP request from them. if not feature.is_enabled('orangereds_as_emails', user=user): continue if g.cache.get(MESSAGE_THROTTLE_KEY) > MAX_EMAILS_PER_DAY: raise Exception( 'Message notification emails: safety limit exceeded!') mac = generate_notification_email_unsubscribe_token( datum['to'], user_email=user.email, user_password_hash=user.password) base = g.https_endpoint or g.origin unsubscribe_link = base + '/mail/unsubscribe/%s/%s' % (datum['to'], mac) templateData = { 'sender_username': datum.get('from', ''), 'comment': comment, 'permalink': datum['permalink'], 'unsubscribe_link': unsubscribe_link, } _system_email(user.email, MessageNotificationEmail(**templateData).render(style='email'), Email.Kind.MESSAGE_NOTIFICATION, from_address=g.notification_email) g.stats.simple_event('email.message_notification.queued') g.cache.incr(MESSAGE_THROTTLE_KEY)
def comment_event(self, new_comment, request=None, context=None): """Create a 'comment' event for event-collector. new_comment: An r2.models.Comment object request, context: Should be pylons.request & pylons.c respectively """ from r2.models import Comment, Link event = Event( topic="comment_events", event_type="ss.comment", time=new_comment._date, request=request, context=context, truncatable_field="comment_body", ) event.add("comment_id", new_comment._id) event.add("comment_fullname", new_comment._fullname) event.add_text("comment_body", new_comment.body) post = Link._byID(new_comment.link_id) event.add("post_id", post._id) event.add("post_fullname", post._fullname) event.add("post_created_ts", to_epoch_milliseconds(post._date)) if post.promoted: event.add("post_is_promoted", bool(post.promoted)) if new_comment.parent_id: parent = Comment._byID(new_comment.parent_id) else: # If this is a top-level comment, parent is the same as the post parent = post event.add("parent_id", parent._id) event.add("parent_fullname", parent._fullname) event.add("parent_created_ts", to_epoch_milliseconds(parent._date)) event.add("user_neutered", new_comment.author_slow._spam) event.add_subreddit_fields(new_comment.subreddit_slow) self.save_event(event)
def fix_images(dryrun=True): from r2.models import Link, Comment links = Link._query(Link.c.ob_permalink != None, data = True) for link in links: ob_url = link.ob_permalink.strip() print "Processing %s" % ob_url new_content = process_content(link.article) if not dryrun: link.article = new_content link._commit() comments = Comment._query(Comment.c.link_id == link._id, data = True) for comment in comments: new_content = process_content(comment.body) if not dryrun: comment.body = new_content comment._commit()
def fix_images(dryrun=True): from r2.models import Link, Comment links = Link._query(Link.c.ob_permalink != None, data=True) for link in links: ob_url = link.ob_permalink.strip() print "Processing %s" % ob_url new_content = process_content(link.article) if not dryrun: link.article = new_content link._commit() comments = Comment._query(Comment.c.link_id == link._id, data=True) for comment in comments: new_content = process_content(comment.body) if not dryrun: comment.body = new_content comment._commit()
def on_use(self, user, target): link = Link._byID(target.link_id) comment_tree = get_comment_tree(link) child_ids = comment_tree.tree[target._id] grandchild_ids = [] for child_id in child_ids: grandchild_ids.extend(comment_tree.tree[child_id]) comments = Comment._byID(child_ids + grandchild_ids, data=True, return_dict=True) children = [comments[cid] for cid in child_ids] grandchildren = [comments[cid] for cid in grandchild_ids] for comment in itertools.chain([target], children, grandchildren): effects.add_effect(user, comment, self.item_name) self.apply_damage_and_log(user, [target], self.direct_damage) self.apply_damage_and_log(user, children, self.child_damage) self.apply_damage_and_log(user, grandchildren, self.grandchild_damage)
def _load_br_criticisms(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), optimize_rules=True, data = True) comments = list(q) brs = [c for c in comments if c.bestresponse==True] #print "SQUIRRREL!" #print comments #print link_id outs = [c._id for c in brs] ups = [c.parent_id for c in brs if c.parent_id] while ups: brs = [c for c in comments if c._id in ups] outs.extend([c._id for c in brs]) ups = [c.parent_id for c in brs if c.parent_id] return outs
def spam_account_comments(self, account, query_limit=10000, spam_limit=500): from r2.lib.db.operators import asc, desc, timeago q = Comment._query(Comment.c.author_id == account._id, Link.c._spam == False, sort=desc('_date'), data=False) q._limit = query_limit things = list(q) processed = 0 for item in things: if processed < spam_limit: verdict = getattr(item, "verdict", None) if not verdict or not verdict.endswith("-approved"): processed += 1 admintools.spam(item, auto=False, moderator_banned=False, banner=None, train_spam=True)
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query( Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] while todo: more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) num_children[cm_id] = num return cids, comment_tree, depth, num_children
def _load_link_comments(link_id): from r2.models import Comment q = Comment._query(Comment.c.link_id == link_id, Comment.c._deleted == (True, False), Comment.c._spam == (True, False), data=True) comments = list(q) cids = [c._id for c in comments] #make a tree comment_tree = {} for cm in comments: p_id = cm.parent_id comment_tree.setdefault(p_id, []).append(cm._id) #calculate the depths depth = {} level = 0 cur_level = comment_tree.get(None, ()) while cur_level: next_level = [] for cm_id in cur_level: depth[cm_id] = level next_level.extend(comment_tree.get(cm_id, ())) cur_level = next_level level += 1 #calc the number of children num_children = {} for cm_id in cids: num = 0 todo = [cm_id] while todo: more = comment_tree.get(todo.pop(0), ()) num += len(more) todo.extend(more) num_children[cm_id] = num return cids, comment_tree, depth, num_children
def _get_sr_comments(sr_id): """the subreddit /r/foo/comments page""" q = Comment._query(Comment.c.sr_id == sr_id, sort = desc('_date')) return make_results(q)
def get_all_comments(): """the master /comments page""" q = Comment._query(sort = desc('_date')) return make_results(q)
def link_comments_and_sort(link_id, sort): from r2.models import Comment, CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) # performance hack: preload these into the LocalCache at the same # time g.permacache.get_multi( [comments_key(link_id), parent_comments_key(link_id)]) cids, cid_tree, depth, num_children = link_comments(link_id) # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key( link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) # load the parents key = parent_comments_key(link_id) parents = g.permacache.get(key) if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with g.make_lock(lock_key(link_id)): # reload from the cache so the sorter and parents are # maximally consistent r = g.permacache.get(comments_key(link_id)) cids, cid_tree, depth, num_children = r key = parent_comments_key(link_id) if not parents: parents = _parent_dict_from_tree(cid_tree) g.permacache.set(key, parents) return cids, cid_tree, depth, num_children, parents, sorter
Subreddit, ) LINK_GILDING_START = datetime(2014, 2, 1, 0, 0, tzinfo=g.tz) COMMENT_GILDING_START = datetime(2012, 10, 1, 0, 0, tzinfo=g.tz) queries = [ Link._query( Link.c.gildings != 0, Link.c._date > LINK_GILDING_START, data=True, sort=desc('_date'), ), Comment._query( Comment.c.gildings != 0, Comment.c._date > COMMENT_GILDING_START, data=True, sort=desc('_date'), ), ] seconds_by_srid = defaultdict(int) gilding_price = g.gold_month_price.pennies for q in queries: for things in fetch_things2(q, chunks=True, chunk_size=100): print things[0]._fullname for thing in things: seconds_per_gilding = calculate_server_seconds( gilding_price, thing._date) seconds_by_srid[thing.sr_id] += int(thing.gildings *