def test_get_slave(self, mock_get_replica): with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') multidb.get_slave() assert mock_get_replica.called assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning)
def update_user_ratings(): """Update add-on author's ratings.""" cursor = connections[multidb.get_slave()].cursor() # We build this query ahead of time because the cursor complains about data # truncation if it does the parameters. Also, this query is surprisingly # quick, <1sec for 6100 rows returned q = """ SELECT addons_users.user_id as user_id, AVG(rating) as avg_rating FROM reviews INNER JOIN versions INNER JOIN addons_users INNER JOIN addons ON reviews.version_id = versions.id AND addons.id = versions.addon_id AND addons_users.addon_id = addons.id WHERE reviews.reply_to IS NULL AND reviews.rating > 0 AND addons.status IN (%s) GROUP BY addons_users.user_id """ % (",".join(map(str, VALID_STATUSES))) cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [update_user_ratings_task.subtask(args=[chunk]) for chunk in chunked(d, 1000)] TaskSet(ts).apply_async()
def recs(): start = time.time() cursor = connections[multidb.get_slave()].cursor() cursor.execute(""" SELECT addon_id, collection_id FROM synced_addons_collections ac INNER JOIN addons ON (ac.addon_id=addons.id AND inactive=0 AND status=4 AND addontype_id <> 9 AND current_version IS NOT NULL) ORDER BY addon_id, collection_id """) qs = cursor.fetchall() recs_log.info('%.2fs (query) : %s rows' % (time.time() - start, len(qs))) addons = _group_addons(qs) recs_log.info('%.2fs (groupby) : %s addons' % ((time.time() - start), len(addons))) if not len(addons): return # Check our memory usage. try: p = subprocess.Popen('%s -p%s -o rss' % (settings.PS_BIN, os.getpid()), shell=True, stdout=subprocess.PIPE) recs_log.info('%s bytes' % ' '.join(p.communicate()[0].split())) except Exception: log.error('Could not call ps', exc_info=True) sim = recommend.similarity # Locals are faster. sims, start, timers = {}, [time.time()], {'calc': [], 'sql': []} def write_recs(): calc = time.time() timers['calc'].append(calc - start[0]) try: _dump_recs(sims) except Exception: recs_log.error('Error dumping recommendations. SQL issue.', exc_info=True) sims.clear() timers['sql'].append(time.time() - calc) start[0] = time.time() for idx, (addon, collections) in enumerate(addons.iteritems(), 1): xs = [(other, sim(collections, cs)) for other, cs in addons.iteritems()] # Sort by similarity and keep the top N. others = sorted(xs, key=operator.itemgetter(1), reverse=True) sims[addon] = [(k, v) for k, v in others[:11] if k != addon] if idx % 50 == 0: write_recs() else: write_recs() avg_len = sum(len(v) for v in addons.itervalues()) / float(len(addons)) recs_log.info('%s addons: average length: %.2f' % (len(addons), avg_len)) recs_log.info('Processing time: %.2fs' % sum(timers['calc'])) recs_log.info('SQL time: %.2fs' % sum(timers['sql']))
def update_perf(): cursor = connections[multidb.get_slave()].cursor() # The baseline is where addon_id is null. cursor.execute( "SELECT AVG(average) FROM perf_results WHERE addon_id IS NULL") baseline = cursor.fetchone()[0] # The perf_results table is a mess right now, so pull out one row # for each addon by finding the MAX(created) and then the AVG(average) # since there are many rows with the same (addon, created). # This scheme completely ignores app, os, and test. cursor.execute(""" SELECT J.addon_id, AVG(average) av FROM perf_results P INNER JOIN (SELECT addon_id, MAX(created) c FROM perf_results GROUP BY addon_id) J ON ((P.addon_id=J.addon_id) AND P.created=J.c) WHERE test='ts' GROUP BY P.addon_id HAVING av > %s""", (baseline,)) # A bunch of (addon, perf_average) pairs. perf = cursor.fetchall() with establish_connection() as conn: for chunk in chunked(perf, 25): tasks.update_perf.apply_async(args=[baseline, chunk], connection=conn) cursor.close()
def test_pinned_reads(self): """Test PinningMasterSlaveRouter.db_for_read() when pinned and when not.""" router = PinningMasterSlaveRouter() eq_(router.db_for_read(TestModel), get_slave()) pin_this_thread() eq_(router.db_for_read(TestModel), MASTER_DATABASE)
def test_pinned_reads(self): """Test PinningMasterSlaveRouter.db_for_read() when pinned and when not.""" router = PinningMasterSlaveRouter() eq_(router.db_for_read(None), get_slave()) pin_this_thread() eq_(router.db_for_read(None), DEFAULT_DB_ALIAS)
def test_db_write_decorator(self): def read_view(req): eq_(router.db_for_read(None), get_slave()) return HttpResponse() @db_write def write_view(req): eq_(router.db_for_read(None), DEFAULT_DB_ALIAS) return HttpResponse() router = PinningMasterSlaveRouter() eq_(router.db_for_read(None), get_slave()) write_view(HttpRequest()) read_view(HttpRequest())
def test_db_write_decorator(self): def read_view(req): eq_(router.db_for_read(TestModel), get_slave()) return HttpResponse() @db_write def write_view(req): eq_(router.db_for_read(TestModel), MASTER_DATABASE) return HttpResponse() router = PinningMasterSlaveRouter() eq_(router.db_for_read(TestModel), get_slave()) write_view(HttpRequest()) read_view(HttpRequest())
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" raise_if_reindex_in_progress("amo") cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_download_totals(): """Update add-on total and average downloads.""" cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if settings.IGNORE_NON_CRITICAL_CRONS: return cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 1000)] TaskSet(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(`count`) FROM update_counts USE KEY (`addon_and_count`) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() with establish_connection() as conn: for chunk in chunked(d, 1000): _update_addon_average_daily_users.apply_async(args=[chunk], connection=conn)
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] group(ts).apply_async()
def update_addon_download_totals(): """Update add-on total and average downloads.""" cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) JOIN addons ON download_counts.addon_id=addons.id WHERE addons.status != %s GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q, [amo.STATUS_DELETED]) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_download_totals(): """Update add-on total and average downloads.""" cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) JOIN addons ON download_counts.addon_id=addons.id WHERE addons.addontype_id != %s AND addons.status != %s GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q, [amo.ADDON_WEBAPP, amo.STATUS_DELETED]) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_download_totals(): """Update add-on total and average downloads.""" if settings.IGNORE_NON_CRITICAL_CRONS: return cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 1000)] TaskSet(ts).apply_async()
def get_trans(items): if not items: return connection = connections[multidb.get_slave()] cursor = connection.cursor() model = items[0].__class__ sql, params = build_query(model, connection) item_dict = dict((item.pk, item) for item in items) ids = ','.join(map(str, item_dict.keys())) cursor.execute(sql.format(ids='(%s)' % ids), tuple(params)) step = len(trans_fields) for row in cursor.fetchall(): # We put the item's pk as the first selected field. item = item_dict[row[0]] for index, field in enumerate(model._meta.translated_fields): start = 1 + step * index t = Translation(*row[start:start + step]) if t.id is not None and t.localized_string is not None: setattr(item, field.name, t)
def get_trans(items): if not items: return connection = connections[multidb.get_slave()] cursor = connection.cursor() model = items[0].__class__ sql, params = build_query(model, connection) item_dict = dict((item.pk, item) for item in items) ids = ",".join(map(str, item_dict.keys())) cursor.execute(sql.format(ids="(%s)" % ids), tuple(params)) step = len(trans_fields) for row in cursor.fetchall(): # We put the item's pk as the first selected field. item = item_dict[row[0]] for index, field in enumerate(model._meta.translated_fields): start = 1 + step * index t = Translation(*row[start : start + step]) if t.id is not None and t.localized_string is not None: setattr(item, field.name, t)
def update_addon_download_totals(): """Update add-on total and average downloads.""" if not waffle.switch_is_active('local-statistics-processing'): return False cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) JOIN addons ON download_counts.addon_id=addons.id WHERE addons.status != %s GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q, [amo.STATUS_DELETED]) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 250)] group(ts).apply_async()
def collections_add_slugs(): """Give slugs to any slugless collections.""" q = Collection.objects.filter(slug=None) ids = q.values_list('id', flat=True) task_log.info('%s collections without names' % len(ids)) max_length = Collection._meta.get_field('slug').max_length cnt = itertools.count() # Chunk it so we don't do huge queries. for chunk in chunked(ids, 300): for c in q.no_cache().filter(id__in=chunk): c.slug = c.nickname or slugify(c.name)[:max_length] if not c.slug: c.slug = 'collection' c.save(force_update=True) task_log.info(u'%s. %s => %s' % (next(cnt), c.name, c.slug)) # Uniquify slug names by user. cursor = connections[multidb.get_slave()].cursor() dupes = cursor.execute(""" SELECT user_id, slug FROM ( SELECT user_id, slug, COUNT(1) AS cnt FROM collections c INNER JOIN collections_users cu ON c.id = cu.collection_id GROUP BY user_id, slug) j WHERE j.cnt > 1""") task_log.info('Uniquifying %s (user, slug) pairs' % dupes) cnt = itertools.count() for user, slug in cursor.fetchall(): q = Collection.objects.filter(slug=slug, collectionuser__user=user) # Skip the first one since it's unique without any appendage. for idx, c in enumerate(q[1:]): # Give enough space for appending a two-digit number. slug = c.slug[:max_length - 3] c.slug = u'%s-%s' % (slug, idx + 1) c.save(force_update=True) task_log.info(u'%s. %s => %s' % (next(cnt), slug, c.slug))
def test_db_for_read(self): self.assertEquals(MasterSlaveRouter().db_for_read(None), get_slave())
def test_allow_migrate(self): """Make sure allow_migrate() does the right thing for both masters and slaves""" router = MasterSlaveRouter() assert router.allow_migrate(DEFAULT_DB_ALIAS, None, None) assert not router.allow_migrate(get_slave(), None, None)
def test_db_for_read(self): eq_(MasterSlaveRouter().db_for_read(None), get_slave())
def read_view(req): eq_(router.db_for_read(None), get_slave()) return HttpResponse()
def test_allow_syncdb(self): """Make sure allow_syncdb() does the right thing for both masters and slaves""" router = MasterSlaveRouter() assert router.allow_syncdb(DEFAULT_DB_ALIAS, None) assert not router.allow_syncdb(get_slave(), None)
def test_allow_syncdb(self): """Make sure allow_syncdb() does the right thing for both masters and slaves""" router = MasterSlaveRouter() assert router.allow_syncdb(MASTER_DATABASE, None) assert router.allow_syncdb(get_slave(), None) == False assert router.allow_syncdb('other', None) == None
def test_db_for_read(self): eq_(MasterSlaveRouter().db_for_read(TestModel), get_slave())
def read_view(req): self.assertEquals(router.db_for_read(None), get_slave()) return HttpResponse()
def test_db_for_read_allowed_apps(self): load_setting('MULTIDB_APPS', ['myapp']) eq_(MasterSlaveRouter().db_for_read(TestModel), get_slave()) load_setting('MULTIDB_APPS', ['anotherapp']) assert not MasterSlaveRouter().db_for_read(TestModel) clear_setting('MULTIDB_APPS')
def test_allow_migrate(self): """Make sure allow_migrate() does the right thing for both masters and slaves""" router = MasterSlaveRouter() assert router.allow_migrate(DEFAULT_DB_ALIAS, 'dummy') assert not router.allow_migrate(get_slave(), 'dummy')