def get_query(after_user_id): q = SRMember._query( SRMember.c._name == "subscriber", SRMember.c._thing2_id < after_user_id, sort=desc("_thing2_id"), ) return q
def backfill_campaign_targets(): from v1.lib.db.operators import desc from v1.lib.utils import fetch_things2 q = PromoCampaign._query(sort=desc("_date"), data=True) for campaign in fetch_things2(q): sr_name = campaign.sr_name or Frontpage.name campaign.target = Target(sr_name) campaign._commit()
def load_accounts(inbox_rel): accounts = set() q = inbox_rel._query(eager_load=False, data=False, sort=desc("_date")) if min_date: q._filter(inbox_rel.c._date > min_date) for i in fetch_things2(q): accounts.add(i._thing1_id) return accounts
def test_comment_order_depth(self): sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, num=1500, max_depth=1) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [100, 107, 108, 109]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def test_comment_order_limit(self): sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, num=5) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [100, 101, 102, 104, 105]) self.assertEqual(builder.missing_root_comments, {107, 108, 109}) self.assertEqual(builder.missing_root_count, 4)
def add_allow_top_to_srs(): "Add the allow_top property to all stored subverbifys" from v1.models import Subverbify from v1.lib.db.operators import desc from v1.lib.utils import fetch_things2 q = Subverbify._query(Subverbify.c._spam == (True, False), sort=desc('_date')) for sr in fetch_things2(q): sr.allow_top = True sr._commit()
def load_all_verbifys(): query_cache = {} q = Subverbify._query(Subverbify.c.type == 'public', Subverbify.c._spam == False, Subverbify.c._downs > 1, sort = (desc('_downs'), desc('_ups')), data = True) for sr in utils.fetch_things2(q): if sr.quarantine: continue name = sr.name.lower() for i in xrange(len(name)): prefix = name[:i + 1] names = query_cache.setdefault(prefix, []) if len(names) < 10: names.append((sr.name, sr.over_18)) for name_prefix, subverbifys in query_cache.iteritems(): SubverbifysByPartialName._set_values(name_prefix, {'tups': subverbifys})
def test_comment_order_permalink(self): sort = operators.desc("_confidence") comment = MagicMock() comment._id = 100 builder = CommentBuilder(self.link, sort, comment=comment, num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [100, 101, 102, 104, 105, 106, 103]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def test_comment_order_qa_multiple_responders(self): self.link.responder_ids = ("c", "d", "e") sort = operators.desc("_qa") builder = CommentBuilder(self.link, sort, num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [100, 102, 104, 105, 106, 103, 107, 108, 109]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def test_comment_order_children(self): sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, children=[101, 102, 103], num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [101, 102, 104, 105, 106, 103]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def test_comment_order_invalid_sticky(self): self.link.sticky_comment_id = 101 sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual( comment_order, [100, 101, 102, 104, 105, 106, 103, 107, 108, 110, 109]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def test_comment_order_children_limit(self): sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, children=[107, 108, 109], num=3) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [107, 108, 110]) self.assertEqual(builder.missing_root_comments, {109}) self.assertEqual(builder.missing_root_count, 1)
def popular_searches(include_over_18=True): top_verbifys = Subverbify._query(Subverbify.c.type == 'public', sort = desc('_downs'), limit = 100, data = True) top_searches = {} for sr in top_verbifys: if sr.quarantine: continue if sr.over_18 and not include_over_18: continue name = sr.name.lower() for i in xrange(min(len(name), 3)): query = name[:i + 1] r = search_verbifys(query, include_over_18) top_searches[query] = r return top_searches
def test_comment_order_children_limit_bug(self): sort = operators.desc("_confidence") builder = CommentBuilder(self.link, sort, children=[101, 102, 103], num=3) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [101, 102, 104]) # missing_root_comments SHOULD be {103}, but there's a bug here. # if the requested children are not root level but we don't show some # of them we should add a MoreChildren to allow a subsequent request # to get the missing comments. self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def backfill_deleted_accounts(resume_id=None): del_accts = Account._query(Account.c._deleted == True, sort=desc('_date')) if resume_id: del_accts._filter(Account.c._id < resume_id) for i, account in enumerate(progress(fetch_things2(del_accts))): # Don't kill the rabbit! Wait for the relevant queues to calm down. if i % 1000 == 0: del_len = get_queue_length('del_account_q') cs_len = get_queue_length('cloudsearch_changes') while (del_len > 1000 or cs_len > 10000): sys.stderr.write(("CS: %d, DEL: %d" % (cs_len, del_len)) + "\n") sys.stderr.flush() time.sleep(1) del_len = get_queue_length('del_account_q') cs_len = get_queue_length('cloudsearch_changes') amqp.add_item('account_deleted', account._fullname)
def port_deleted_links(after_id=None): from v1.models import Link from v1.lib.db.operators import desc from v1.models.query_cache import CachedQueryMutator from v1.lib.db.queries import get_deleted_links from v1.lib.utils import fetch_things2, in_chunks, progress q = Link._query(Link.c._deleted == True, Link.c._spam == (True, False), sort=desc('_date'), data=True) q = fetch_things2(q, chunk_size=500) q = progress(q, verbosity=1000) for chunk in in_chunks(q): with CachedQueryMutator() as m: for link in chunk: query = get_deleted_links(link.author_id) m.insert(query, [link])
def port_cassaurls(after_id=None, estimate=15231317): from v1.models import Link, LinksByUrlAndSubverbify from v1.lib.db import tdb_cassandra from v1.lib.db.operators import desc from v1.lib.db.tdb_cassandra import CL from v1.lib.utils import fetch_things2, in_chunks, progress q = Link._query(Link.c._spam == (True, False), sort=desc('_date'), data=True) if after_id: q._after(Link._byID(after_id, data=True)) q = fetch_things2(q, chunk_size=500) q = progress(q, estimate=estimate) q = (l for l in q if getattr(l, 'url', 'self') != 'self' and not getattr(l, 'is_self', False)) chunks = in_chunks(q, 500) for chunk in chunks: for l in chunk: LinksByUrlAndSubverbify.add_link(l)
def rebuild_link_index(start_at=None, sleeptime=1, cls=Link, uploader=LinkUploader, doc_api='CLOUDSEARCH_DOC_API', estimate=50000000, chunk_size=1000): doc_api = getattr(g, doc_api) uploader = uploader(doc_api) q = cls._query(cls.c._deleted == (True, False), sort=desc('_date')) if start_at: after = cls._by_fullname(start_at) assert isinstance(after, cls) q._after(after) q = v1utils.fetch_things2(q, chunk_size=chunk_size) q = v1utils.progress(q, verbosity=1000, estimate=estimate, persec=True, key=_progress_key) for chunk in v1utils.in_chunks(q, size=chunk_size): uploader.things = chunk for x in range(5): try: uploader.inject() except httplib.HTTPException as err: print "Got %s, sleeping %s secs" % (err, x) time.sleep(x) continue else: break else: raise err last_update = chunk[-1] print "last updated %s" % last_update._fullname time.sleep(sleeptime)
def get_comment_items(srs, src, count=4): """Get hot links from srs, plus top comment from each link.""" link_fullnames = normalized_hot([sr._id for sr in srs]) hot_links = Link._by_fullname(link_fullnames[:count], return_dict=False) top_comments = [] for link in hot_links: builder = CommentBuilder(link, operators.desc('_confidence'), comment=None, context=None, num=1, load_more=False) listing = NestedListing(builder, parent_name=link._fullname).listing() top_comments.extend(listing.things) srs = Subverbify._byID([com.sr_id for com in top_comments]) links = Link._byID([com.link_id for com in top_comments]) comment_items = [ExploreItem(TYPE_COMMENT, src, srs[com.sr_id], links[com.link_id], com) for com in top_comments] return comment_items
return False # do not keep messages which were deleted on recipient if (isinstance(msg, Message) and msg.to_id == account._id and msg.del_on_recipient): return False # don't show user their own unread stuff if msg.author_id == account._id: return False return True resume_id = long(sys.argv[1]) if len(sys.argv) > 1 else None msg_accounts = Account._query(sort=desc("_date"), data=True) if resume_id: msg_accounts._filter(Account.c._id < resume_id) for account in progress(fetch_things2(msg_accounts), estimate=resume_id): current_inbox_count = account.inbox_count unread_messages = list(queries.get_unread_inbox(account)) if account._id % 100000 == 0: g.reset_caches() if not len(unread_messages): if current_inbox_count: account._incr('inbox_count', -current_inbox_count) else:
def by_account_cache(cls, account_id): q = Trophy._query(Trophy.c._thing1_id == account_id, sort=desc('_date')) q._limit = 500 return [t._id for t in q]
def gen_keys(): yield promoted_memo_key # just let this one do its own writing load_all_verbifys() yield queries.get_all_comments().iden l_q = Link._query( Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc('_date'), data=True, ) for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, 'comments') a_q = Account._query( Account.c._spam == (True, False), sort=desc('_date'), ) for account in fetch_things2(a_q, verbosity): yield messages_key(account._id) yield last_modified_key(account, 'overview') yield last_modified_key(account, 'commented') yield last_modified_key(account, 'submitted') yield last_modified_key(account, 'liked') yield last_modified_key(account, 'disliked') yield queries.get_comments(account, 'new', 'all').iden yield queries.get_submitted(account, 'new', 'all').iden yield queries.get_liked(account).iden yield queries.get_disliked(account).iden yield queries.get_hidden(account).iden yield queries.get_saved(account).iden yield queries.get_inbox_messages(account).iden yield queries.get_unread_messages(account).iden yield queries.get_inbox_comments(account).iden yield queries.get_unread_comments(account).iden yield queries.get_inbox_selfreply(account).iden yield queries.get_unread_selfreply(account).iden yield queries.get_sent(account).iden sr_q = Subverbify._query( Subverbify.c._spam == (True, False), sort=desc('_date'), ) for sr in fetch_things2(sr_q, verbosity): yield last_modified_key(sr, 'stylesheet_contents') yield queries.get_links(sr, 'hot', 'all').iden yield queries.get_links(sr, 'new', 'all').iden for sort in 'top', 'controversial': for time in 'hour', 'day', 'week', 'month', 'year', 'all': yield queries.get_links(sr, sort, time, merge_batched=False).iden yield queries.get_spam_links(sr).iden yield queries.get_spam_comments(sr).iden yield queries.get_reported_links(sr).iden yield queries.get_reported_comments(sr).iden yield queries.get_subverbify_messages(sr).iden yield queries.get_unread_subverbify_messages(sr).iden
def by_award_cache(cls, award_id): q = Trophy._query(Trophy.c._thing2_id == award_id, sort=desc('_date')) q._limit = 50 return [t._id for t in q]
from v1.models import ( calculate_server_seconds, Comment, Link, Subverbify, ) LINK_SILDING_START = datetime(2014, 2, 1, 0, 0, tzinfo=g.tz) COMMENT_SILDING_START = datetime(2012, 10, 1, 0, 0, tzinfo=g.tz) queries = [ Link._query( Link.c.sildings != 0, Link.c._date > LINK_SILDING_START, data=True, sort=desc('_date'), ), Comment._query( Comment.c.sildings != 0, Comment.c._date > COMMENT_SILDING_START, data=True, sort=desc('_date'), ), ] seconds_by_srid = defaultdict(int) silding_price = g.sodium_month_price.pennies for q in queries: for things in fetch_things2(q, chunks=True, chunk_size=100): print things[0]._fullname
def get_sr_counts(): srs = utils.fetch_things2(Subverbify._query(sort=desc("_date"))) return dict((sr._fullname, sr._ups) for sr in srs)
# All portions of the code written by verbify are Copyright (c) 2006-2015 verbify # Inc. All Rights Reserved. ############################################################################### import urllib2 from pylons import app_globals as g from v1.lib.db.operators import desc from v1.lib.utils import fetch_things2 from v1.lib.media import upload_media from v1.models.subverbify import Subverbify from v1.models.wiki import WikiPage, ImagesByWikiPage all_subverbifys = Subverbify._query(sort=desc("_date")) for sr in fetch_things2(all_subverbifys): images = sr.images.copy() images.pop("/empties/", None) if not images: continue print 'Processing /r/%s (id36: %s)' % (sr.name, sr._id36) # upgrade old-style image ids to urls for name, image_url in images.items(): if not isinstance(image_url, int): continue print " upgrading image %r" % image_url
class SortMenu(NavMenu): name = 'sort' hidden_options = [] button_cls = QueryButton # these are _ prefixed to avoid colliding with NavMenu attributes _default = 'hot' _options = ('hot', 'new', 'top', 'old', 'controversial') _type = 'lightdrop' _title = N_("sorted by") def __init__(self, default=None, title='', base_path='', separator='|', _id='', css_class=''): options = self.make_buttons() default = default or self._default base_path = base_path or request.path title = title or _(self._title) NavMenu.__init__(self, options, default=default, title=title, type=self._type, base_path=base_path, separator=separator, _id=_id, css_class=css_class) def make_buttons(self): buttons = [] for name in self._options: css_class = 'hidden' if name in self.hidden_options else '' button = self.button_cls(self.make_title(name), name, self.name, css_class=css_class) buttons.append(button) return buttons def make_title(self, attr): return menu[attr] _mapping = { "hot": operators.desc('_hot'), "new": operators.desc('_date'), "old": operators.asc('_date'), "top": operators.desc('_score'), "controversial": operators.desc('_controversy'), "confidence": operators.desc('_confidence'), "random": operators.shuffled('_confidence'), "qa": operators.desc('_qa'), } _reverse_mapping = {v: k for k, v in _mapping.iteritems()} @classmethod def operator(cls, sort): return cls._mapping.get(sort) @classmethod def sort(cls, operator): return cls._reverse_mapping.get(operator)
# # The Original Developer is the Initial Developer. The Initial Developer of # the Original Code is verbify Inc. # # All portions of the code written by verbify are Copyright (c) 2006-2015 verbify # Inc. All Rights Reserved. ############################################################################### """Ensure modmsgtime is properly set on all accounts. See the comment in Account.is_moderator_somewhere for possible values of this attribute now. """ from v1.lib.db.operators import desc from v1.lib.utils import fetch_things2, progress from v1.models import Account, Subverbify all_accounts = Account._query(sort=desc("_date")) for account in progress(fetch_things2(all_accounts)): is_moderator_somewhere = bool(Subverbify.reverse_moderator_ids(account)) if is_moderator_somewhere: if not account.modmsgtime: account.modmsgtime = False else: # the account already has a date for modmsgtime meaning unread mail pass else: account.modmsgtime = None account._commit()