def update_perf(): cursor = connections[multidb.get_slave()].cursor() # The baseline is where addon_id is null. cursor.execute( "SELECT AVG(average) FROM perf_results WHERE addon_id IS NULL") baseline = cursor.fetchone()[0] # The perf_results table is a mess right now, so pull out one row # for each addon by finding the MAX(created) and then the AVG(average) # since there are many rows with the same (addon, created). # This scheme completely ignores app, os, and test. cursor.execute(""" SELECT J.addon_id, AVG(average) av FROM perf_results P INNER JOIN (SELECT addon_id, MAX(created) c FROM perf_results GROUP BY addon_id) J ON ((P.addon_id=J.addon_id) AND P.created=J.c) WHERE test='ts' GROUP BY P.addon_id HAVING av > %s""", (baseline,)) # A bunch of (addon, perf_average) pairs. perf = cursor.fetchall() with establish_connection() as conn: for chunk in chunked(perf, 25): tasks.update_perf.apply_async(args=[baseline, chunk], connection=conn) cursor.close()
def test_bulkrecordindexer_index_delete_by_record(app, queue): """Test utility class BulkRecordIndexer index/delete by record object.""" with app.app_context(): with establish_connection() as c: recid = uuid.uuid4() record = Record.create({'title': 'Test'}, id_=recid) db.session.commit() indexer = BulkRecordIndexer() indexer.index(record) indexer.delete(record) consumer = Consumer( connection=c, queue=indexer.mq_queue.name, exchange=indexer.mq_exchange.name, routing_key=indexer.mq_routing_key) messages = list(consumer.iterqueue()) [m.ack() for m in messages] assert len(messages) == 2 data0 = messages[0].decode() assert data0['id'] == str(recid) assert data0['op'] == 'index' data1 = messages[1].decode() assert data1['id'] == str(recid) assert data1['op'] == 'delete'
def update_user_ratings(): """Update add-on author's ratings.""" cursor = connections[multidb.get_slave()].cursor() # We build this query ahead of time because the cursor complains about data # truncation if it does the parameters. Also, this query is surprisingly # quick, <1sec for 6100 rows returned q = """ SELECT addons_users.user_id as user_id, AVG(rating) as avg_rating FROM reviews INNER JOIN versions INNER JOIN addons_users INNER JOIN addons ON reviews.version_id = versions.id AND addons.id = versions.addon_id AND addons_users.addon_id = addons.id WHERE reviews.reply_to IS NULL AND reviews.rating > 0 AND addons.status IN (%s) GROUP BY addons_users.user_id """ % (",".join(map(str, VALID_STATUSES))) cursor.execute(q) d = cursor.fetchall() cursor.close() with establish_connection() as conn: for chunk in chunked(d, 1000): _update_user_ratings.apply_async(args=[chunk], connection=conn)
def process_upvotes(): """Process all currently gathered clicks by saving them to the database.""" connection = establish_connection() consumer = Consumer( connection=connection, queue="test", exchange="test", routing_key="test", exchange_type="direct" ) # First process the messages: save the number of clicks # for every URL. upvotes_for_post = {} messages_for_post = {} for message in consumer.iterqueue(): id = message.body upvotes_for_post[id] = upvotes_for_post.get(id, 0) + 1 # We also need to keep the message objects so we can ack the # messages as processed when we are finished with them. if id in messages_for_post: messages_for_post[id].append(message) else: messages_for_post[id] = [message] # Then increment the clicks in the database so we only need # one UPDATE/INSERT for each URL. for id, vote_count in upvotes_for_post.items(): p = Post.objects.get(id=int(id)) # is id a string or int? p.upvotes += vote_count p.save() # Now that the clicks has been registered for this URL we can # acknowledge the messages [message.ack() for message in messages_for_post[id]] consumer.close() connection.close()
def update_package_activity(): """Recalculates package activity rating for all packages""" ids = Package.objects.all().values_list('id', flat=True) log.info("Updating package activity for %s packages" % len(ids)) with establish_connection() as conn: for chunk in chunked(ids, 100): tasks.calculate_activity_rating.apply_async(args=[chunk], connection=conn)
def process_actions(actions): """Process queue actions.""" queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) for action in actions: q = action(q)
def indexer_queue(app): """Bluk indexer celery queue.""" queue = app.config['INDEXER_MQ_QUEUE'] with establish_connection() as conn: q = queue(conn) yield q.declare() q.delete()
def revoke_tasks(self, request, queryset): connection = establish_connection() try: for state in queryset: revoke(state.task_id, connection=connection) finally: connection.close()
def due_prescriptions(self): prescriptions = Prescription.objects.filter( dose_cleared__isnotnull=True, date_dose_cleared__lt=now() ) connection = establish_connection()
def index_all(): """This reindexes all the known packages and libraries.""" ids = Package.objects.all().values_list('id', flat=True) log.info("Indexing %s packages" % len(ids)) with establish_connection() as conn: for chunk in chunked(ids, 100): tasks.index_all.apply_async(args=[chunk], connection=conn)
def addon_reviews_ratings(): """Update all add-on total_reviews and average/bayesian ratings.""" addons = Addon.objects.values_list('id', flat=True) with establish_connection() as conn: for chunk in chunked(addons, 100): tasks.cron_review_aggregate.apply_async(args=chunk, connection=conn)
def eventtop(): sys.stderr.write("-> celeryev: starting capture...\n") state = State() display = CursesMonitor(state) display.init_screen() refresher = DisplayThread(display) refresher.start() conn = establish_connection() recv = EventReceiver(conn, handlers={"*": state.event}) try: consumer = recv.consumer() consumer.consume() while True: try: conn.connection.drain_events() except socket.timeout: pass except Exception: refresher.shutdown = True refresher.join() display.resetscreen() raise except (KeyboardInterrupt, SystemExit): conn and conn.close() refresher.shutdown = True refresher.join() display.resetscreen()
def rate_limit_tasks(self, request, queryset): tasks = set([task.name for task in queryset]) opts = self.model._meta app_label = opts.app_label if request.POST.get("post"): rate = request.POST["rate_limit"] connection = establish_connection() try: for task_name in tasks: rate_limit(task_name, rate, connection=connection) finally: connection.close() return None context = { "title": _("Rate limit selection"), "queryset": queryset, "object_name": force_unicode(opts.verbose_name), "action_checkbox_name": helpers.ACTION_CHECKBOX_NAME, "opts": opts, "root_path": self.admin_site.root_path, "app_label": app_label, } return render_to_response(self.rate_limit_confirmation_template, context, context_instance=RequestContext(request))
def tick(self): """Run a tick, that is one iteration of the scheduler. Executes all due tasks.""" debug = self.logger.debug error = self.logger.error remaining_times = [] connection = establish_connection() try: for entry in self.schedule.values(): is_due, next_time_to_run = self.is_due(entry) if is_due: debug("Scheduler: Sending due task %s" % entry.name) try: result = self.apply_async(entry, connection=connection) except SchedulingError, exc: error("Scheduler: %s" % exc) else: debug("%s sent. id->%s" % (entry.name, result.task_id)) if next_time_to_run: remaining_times.append(next_time_to_run) finally: connection.close() return min(remaining_times + [self.max_interval])
def handle(self, *args, **kw): qs = Version.objects.filter(version_int=None) print 'Found %s versions that need updating' % qs.count() with establish_connection() as conn: for pks in chunked(list(qs.values_list('pk', flat=True)), 1000): add_version_int.delay(pks) print '... added to celery.'
def process_data(): connection = establish_connection() consumer = Consumer(connection=connection, queue="data", exchange="data", routing_key="increment_data", exchange_type="direct") data_to_save = {} for message in consumer.iterqueue(): data = message.body data = loads(data) checksum = data['checksum'] data_to_save.setdefault(checksum, []).append(data) from sentry.models import Group for checksum in data_to_save: data = data_to_save[checksum] merged_times_seen = len(data) data = merge_data(data) data['timestamp'] = datetime.strptime(data['timestamp'], '%Y-%m-%d %H:%M:%S') event = Group.objects.from_kwargs(**data) event.group.update(times_seen=F('times_seen') + merged_times_seen - 1) consumer.close() connection.close()
def test_bulkrecordindexer_index_delete_by_record_id(app, queue): """Test utility class BulkRecordIndexer index/delete by record id.""" with app.app_context(): with establish_connection() as c: indexer = BulkRecordIndexer() id1 = uuid.uuid4() indexer.index_by_id(id1) indexer.delete_by_id(id1) consumer = Consumer( connection=c, queue=indexer.mq_queue.name, exchange=indexer.mq_exchange.name, routing_key=indexer.mq_routing_key) messages = list(consumer.iterqueue()) [m.ack() for m in messages] assert len(messages) == 2 data0 = messages[0].decode() assert data0['id'] == str(id1) assert data0['op'] == 'index' data1 = messages[1].decode() assert data1['id'] == str(id1) assert data1['op'] == 'delete'
def handle(self, *args, **options): # Avoiding loops from amo.utils import chunked, slugify from users.models import UserProfile from users.tasks import _delete_users if not args: print "Usage: manage.py delete_users <file>" sys.exit(1) if not os.path.exists(args[0]): print "File not found: %s" % args[0] sys.exit(1) f = open(args[0], 'r') data = True print "Reading %s" % args[0] while data: data = f.readlines(100000) # 100000 is about 13500 user ids data = [x.strip() for x in data] # has newlines on it print "Sending %s users to celery" % len(data) with establish_connection() as conn: for chunk in chunked(data, 100): _delete_users.apply_async(args=[chunk], connection=conn) f.close() print "All done."
def test_indexer_bulk_index(app, queue): """Test delay indexing.""" with app.app_context(): with establish_connection() as c: indexer = RecordIndexer() id1 = uuid.uuid4() id2 = uuid.uuid4() indexer.bulk_index([id1, id2]) indexer.bulk_delete([id1, id2]) consumer = Consumer( connection=c, queue=indexer.mq_queue.name, exchange=indexer.mq_exchange.name, routing_key=indexer.mq_routing_key) messages = list(consumer.iterqueue()) [m.ack() for m in messages] assert len(messages) == 4 data0 = messages[0].decode() assert data0['id'] == str(id1) assert data0['op'] == 'index' data2 = messages[2].decode() assert data2['id'] == str(id1) assert data2['op'] == 'delete'
def eventtop(): sys.stderr.write("-> celeryev: starting capture...\n") state = State() display = CursesMonitor(state) display.init_screen() refresher = DisplayThread(display) refresher.start() conn = establish_connection() recv = EventReceiver(conn, handlers={"*": state.event}) try: consumer = recv.consumer() consumer.consume() while True: try: conn.connection.drain_events() except (socket.timeout, socket.error): pass except Exception: refresher.shutdown = True refresher.join() display.resetscreen() raise except (KeyboardInterrupt, SystemExit): conn and conn.close() refresher.shutdown = True refresher.join() display.resetscreen()
def process_bulk_queue(version_type=None, queue=None, es_bulk_kwargs=None, stats_only=True): """Process bulk indexing queue. :param str version_type: Elasticsearch version type. :param Queue queue: Queue tu use. :param str routing_key: Routing key to use. :param dict es_bulk_kwargs: Passed to :func:`elasticsearch:elasticsearch.helpers.bulk`. :param boolean stats_only: if `True` only report number of successful/failed operations instead of just number of successful and a list of error responses. Note: You can start multiple versions of this task. """ from .cli.index import connect_queue connected_queue = None if queue: connection = establish_connection() connected_queue = connect_queue(connection, queue) indexer = IlsRecordsIndexer(version_type=version_type, queue=connected_queue, routing_key=queue) return indexer.process_bulk_queue(es_bulk_kwargs=es_bulk_kwargs, stats_only=stats_only)
def update_addons_collections_downloads(): """Update addons+collections download totals.""" d = AddonCollectionCount.objects.values("addon", "collection").annotate(sum=Sum("count")) with establish_connection() as conn: for chunk in chunked(d, 600): tasks.update_addons_collections_downloads.apply_async(args=[chunk], connection=conn)
def update_collections_total(): """Update collections downloads totals.""" d = CollectionCount.objects.values("collection_id").annotate(sum=Sum("count")) with establish_connection() as conn: for chunk in chunked(d, 1000): tasks.update_collections_total.apply_async(args=[chunk], connection=conn)
def update_addons_current_version(): """Update the current_version field of the addons.""" d = Addon.objects.valid().exclude(type=amo.ADDON_PERSONA).values_list('id') with establish_connection() as conn: for chunk in chunked(d, 1000): _update_addons_current_version.apply_async(args=[chunk], connection=conn)
def connect(self, conn=None): if conn: conn.close() self.say("-> connecting to %s." % info.format_broker_info()) conn = establish_connection() conn.connect() self.say("-> connected.") return conn
def eventdump(): sys.stderr.write("-> celeryev: starting capture...\n") dumper = Dumper() conn = establish_connection() recv = EventReceiver(conn, handlers={"*": dumper.on_event}) try: recv.capture() except (KeyboardInterrupt, SystemExit): conn and conn.close()
def update_weekly_votes(): """Keep the num_votes_past_week value accurate.""" questions = Question.objects.all().values_list('pk', flat=True) with establish_connection() as conn: for chunk in chunked(questions, 200): update_question_vote_chunk.apply_async(args=[chunk], connection=conn)
def update_addons_current_version(): """Update the current_version field of the addons.""" d = (Addon.objects.filter(inactive=False, status__in=amo.VALID_STATUSES) .exclude(type=amo.ADDON_PERSONA).values_list('id')) with establish_connection() as conn: for chunk in chunked(d, 100): _update_addons_current_version.apply_async(args=[chunk], connection=conn)
def init_queue(name): """Initialize indexing queue. :papram name: Name of queue. """ with establish_connection() as connection: queue = connect_queue(connection, name) result = queue.declare() click.secho(f'Queue has been initialized: {result}', fg='green')
def create_producer(self): """Context manager that yields an instance of ``Producer``.""" with establish_connection() as conn: yield Producer( conn, exchange=self.mq_exchange, routing_key=self.mq_routing_key, auto_declare=True, )
def evdump(): sys.stderr.write("-> evdump: starting capture...\n") dumper = Dumper() conn = establish_connection() recv = EventReceiver(conn, handlers={"*": dumper.on_event}) try: recv.capture() except (KeyboardInterrupt, SystemExit): conn and conn.close()
def send_increment_upvotes(for_post_id): """Send a message for incrementing the click count for an URL.""" exchange = Exchange("test", type="direct") queue = Queue("test", exchange, routing_key="test") connection = establish_connection() channel = connection.channel() producer = Producer(channel, exchange, routing_key="test") producer.publish(str(for_post_id)) connection.close()
def rebuild_kb(): """Re-render all documents in the KB in chunks.""" cache.delete(settings.WIKI_REBUILD_TOKEN) d = Document.objects.using("default").filter(current_revision__isnull=False).values_list("id", flat=True) with establish_connection() as conn: for chunk in chunked(d, 100): _rebuild_kb_chunk.apply_async(args=[chunk], connection=conn)
def reindex_addons(): from . import tasks ids = (Addon.objects.values_list('id', flat=True) .filter(_current_version__isnull=False, status__in=amo.VALID_STATUSES, disabled_by_user=False)) with establish_connection() as conn: for chunk in chunked(sorted(list(ids)), 150): tasks.index_addons.apply_async(args=[chunk], connection=conn)
def fix(self, base, task): with establish_connection() as cxn: print 'Searching the nfs...' files = list(path.path(base).walkfiles('*%s' % suffix)) print '%s busted files under %s.' % (len(files), base) for src in files: dst = src.replace(suffix, '') log.info('Resizing %s to %s' % (src, dst)) task.apply_async(args=[src, dst], connection=cxn)
def rebuild_kb(): """Re-render all documents in the KB in chunks.""" cache.delete(settings.WIKI_REBUILD_TOKEN) d = (Document.objects.using('default').filter( current_revision__isnull=False).values_list('id', flat=True)) with establish_connection() as conn: for chunk in chunked(d, 100): _rebuild_kb_chunk.apply_async(args=[chunk], connection=conn)
def consumer(app, queue): """Get a consumer on the queue object for testing bulk operations.""" # TODO: Move this fixture to pytest-invenio with establish_connection() as c: yield Consumer( connection=c, queue=queue.name, exchange=queue.exchange.name, routing_key=queue.routing_key )
def consumer(app, queue): """Get a consumer on the queue object for testing bulk operations.""" # TODO: Move this fixture to pytest-invenio with establish_connection() as c: yield Consumer( connection=c, queue=app.config['INDEXER_MQ_QUEUE'].name, exchange=app.config['INDEXER_MQ_EXCHANGE'].name, routing_key=app.config['INDEXER_MQ_ROUTING_KEY'], )
def update_addons_collections_downloads(): """Update addons+collections download totals.""" d = (AddonCollectionCount.objects.values( 'addon', 'collection').annotate(sum=Sum('count'))) with establish_connection() as conn: for chunk in chunked(d, 600): tasks.update_addons_collections_downloads.apply_async( args=[chunk], connection=conn)
def update_collections_total(): """Update collections downloads totals.""" d = (CollectionCount.objects.values('collection_id').annotate( sum=Sum('count'))) with establish_connection() as conn: for chunk in chunked(d, 1000): tasks.update_collections_total.apply_async(args=[chunk], connection=conn)
def elasticsearch_index_init(alembic, verbose): """Initialize the elasticsearch indices and indexing queue.""" for _ in current_search.create(ignore=[400]): pass for _ in current_search.put_templates(ignore=[400]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) q.declare()
def update_collections_subscribers(): """Update collections subscribers totals.""" d = (CollectionWatcher.objects.values('collection_id').annotate( count=Count('collection')).extra(where=['DATE(created)=%s'], params=[datetime.date.today()])) with establish_connection() as conn: for chunk in chunked(d, 1000): _update_collections_subscribers.apply_async(args=[chunk], connection=conn)
def queue(app): """Declare an clean the indexer queue.""" # TODO: Move this fixture to pytest-invenio queue = app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) q.declare() q.purge() return queue
def elasticsearch_index_destroy(alembic, verbose): """Destroy the elasticsearch indices and indexing queue.""" for _ in current_search.delete(ignore=[400, 404]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) try: q.delete() except amqp.exceptions.NotFound: pass
def queue(app): """Get queue object for testing bulk operations.""" queue = app.config['INDEXER_MQ_QUEUE'] with app.app_context(): with establish_connection() as c: q = queue(c) q.declare() q.purge() return queue
def queue(app): """Get ScriptInfo object for testing CLI.""" queue = app.config['INDEXER_MQ_QUEUE'] with app.app_context(): with establish_connection() as c: q = queue(c) q.declare() q.purge() return queue