def unpublish(doc_locale_slug_pairs, log=None, completion_message=None, invalidate_cdn_cache=True): """ Delete one or more documents from the S3 bucket serving the document API. """ if not log: log = unpublish.get_logger() s3_bucket = get_s3_bucket() if not s3_bucket: log.info('Skipping unpublish of {!r}: no S3 bucket configured'.format( doc_locale_slug_pairs)) return keys_to_delete = (get_s3_key(locale=locale, slug=slug) for locale, slug in doc_locale_slug_pairs) for chunk in chunked(keys_to_delete, S3_MAX_KEYS_PER_DELETE): response = s3_bucket.delete_objects( Delete={ 'Objects': [{'Key': key} for key in chunk] } ) for info in response.get('Deleted', ()): log.info('Unpublished {}'.format(info['Key'])) for info in response.get('Errors', ()): log.error('Unable to unpublish {}: ({}) {}'.format( info['Key'], info['Code'], info['Message'])) if completion_message: log.info(completion_message) if invalidate_cdn_cache: request_cdn_cache_invalidation.delay(doc_locale_slug_pairs)
def unpublish(doc_locale_slug_pairs, log=None, completion_message=None): """ Delete one or more documents from the S3 bucket serving the document API. """ if not log: log = unpublish.get_logger() s3_bucket = get_s3_bucket() if not s3_bucket: log.info('Skipping unpublish of {!r}: no S3 bucket configured'.format( doc_locale_slug_pairs)) return keys_to_delete = (get_s3_key(locale=locale, slug=slug) for locale, slug in doc_locale_slug_pairs) for chunk in chunked(keys_to_delete, S3_MAX_KEYS_PER_DELETE): response = s3_bucket.delete_objects( Delete={ 'Objects': [{'Key': key} for key in chunk] } ) for info in response.get('Deleted', ()): log.info('Unpublished {}'.format(info['Key'])) for info in response.get('Errors', ()): log.error('Unable to unpublish {}: ({}) {}'.format( info['Key'], info['Code'], info['Message'])) if completion_message: log.info(completion_message)
def handle(self, *args, **options): Logger = namedtuple('Logger', 'info, error') log = Logger(info=self.stdout.write, error=self.stderr.write) if options['all'] or options['locale']: filters = {} if options['locale'] and not options['all']: locale = options['locale'] log.info( 'Publishing all documents in locale {}'.format(locale)) filters.update(locale=locale) else: locale = None log.info('Publishing all documents') chunk_size = max(options['chunk_size'], 1) docs = Document.objects.filter(**filters) doc_pks = docs.values_list('id', flat=True) num_docs = len(doc_pks) num_tasks = int(ceil(num_docs / float(chunk_size))) log.info('...found {} documents.'.format(num_docs)) # Let's publish the documents in a group of chunks, where the # tasks in the group can be run in parallel. tasks = [] for i, chunk in enumerate(chunked(doc_pks, chunk_size)): message = 'Published chunk #{} of {}'.format(i + 1, num_tasks) tasks.append(publish.si(chunk, completion_message=message)) if num_tasks == 1: msg = ('Launching a single task handling ' 'all {} documents.'.format(num_docs)) else: msg = ('Launching {} paralellizable tasks, each handling ' 'at most {} documents.'.format(num_tasks, chunk_size)) log.info(msg) group(*tasks).apply_async() else: paths = options['paths'] if not paths: raise CommandError( 'Need at least one document path to publish') doc_pks = [] get_doc_pk = Document.objects.values_list('id', flat=True).get for path in paths: if path.startswith('/'): path = path[1:] locale, sep, slug = path.partition('/') head, sep, tail = slug.partition('/') if head == 'docs': slug = tail try: doc_pk = get_doc_pk(locale=locale, slug=slug) except Document.DoesNotExist: msg = 'Document with locale={} and slug={} does not exist' log.error(msg.format(locale, slug)) else: doc_pks.append(doc_pk) publish(doc_pks, log=log)
def handle(self, *args, **options): Logger = namedtuple('Logger', 'info, error') log = Logger(info=self.stdout.write, error=self.stderr.write) if options['all'] or options['locale']: filters = {} if options['locale'] and not options['all']: locale = options['locale'] log.info('Publishing all documents in locale {}'.format(locale)) filters.update(locale=locale) else: locale = None log.info('Publishing all documents') chunk_size = max(options['chunk_size'], 1) docs = Document.objects.filter(**filters) doc_pks = docs.values_list('id', flat=True) num_docs = len(doc_pks) num_tasks = int(ceil(num_docs / float(chunk_size))) log.info('...found {} documents.'.format(num_docs)) # Let's publish the documents in a group of chunks, where the # tasks in the group can be run in parallel. tasks = [] for i, chunk in enumerate(chunked(doc_pks, chunk_size)): message = 'Published chunk #{} of {}'.format(i + 1, num_tasks) tasks.append(publish.si(chunk, completion_message=message)) if num_tasks == 1: msg = ('Launching a single task handling ' 'all {} documents.'.format(num_docs)) else: msg = ('Launching {} paralellizable tasks, each handling ' 'at most {} documents.'.format(num_tasks, chunk_size)) log.info(msg) group(*tasks).apply_async() else: paths = options['paths'] if not paths: raise CommandError('Need at least one document path to publish') doc_pks = [] get_doc_pk = Document.objects.values_list('id', flat=True).get for path in paths: if path.startswith('/'): path = path[1:] locale, sep, slug = path.partition('/') head, sep, tail = slug.partition('/') if head == 'docs': slug = tail try: doc_pk = get_doc_pk(locale=locale, slug=slug) except Document.DoesNotExist: msg = 'Document with locale={} and slug={} does not exist' log.error(msg.format(locale, slug)) else: doc_pks.append(doc_pk) publish(doc_pks, log=log)
def chain_clean_docs(doc_pks, user_pk): tasks = [] count = 0 total = len(doc_pks) n = int(ceil(total / 5)) chunks = chunked(doc_pks, n) for chunk in chunks: count += len(chunk) tasks.append(clean_document_chunk.si(chunk, user_pk)) percent_complete = int(ceil((count / total) * 100)) tasks.append( email_document_progress.si("clean_document", percent_complete, total)) chain(*tasks).apply_async()
def render_stale_documents(log=None): """Simple task wrapper for rendering stale documents""" stale_docs = Document.objects.get_by_stale_rendering().distinct() stale_docs_count = stale_docs.count() if stale_docs_count == 0: # not stale documents to render return if log is None: # fetch a logger in case none is given log = render_stale_documents.get_logger() log.info("Found %s stale documents" % stale_docs_count) stale_pks = stale_docs.values_list("pk", flat=True) render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)] chain(*render_tasks).apply_async()
def reindex_all(cls, chunk_size=500, index=None, percent=100): """Rebuild ElasticSearch indexes. :arg chunk_size: how many documents to bulk index as a single chunk. :arg index: the `Index` object to reindex into. Uses the current promoted index if none provided. :arg percent: 1 to 100--the percentage of the db to index. """ from kuma.search.models import Index from kuma.search.tasks import prepare_index, finalize_index from kuma.wiki.tasks import index_documents index = index or Index.objects.get_current() # Get the list of document IDs to index. indexable = WikiDocumentType.get_indexable(percent) total = len(indexable) total_chunks = int(ceil(total / chunk_size)) pre_task = prepare_index.si(index.pk) post_task = finalize_index.si(index.pk) if not total: # If there's no data we still create the index and finalize it. chain(pre_task, post_task).apply_async() else: index_tasks = [ index_documents.si(chunk, index.pk) for chunk in chunked(indexable, chunk_size) ] chord_flow(pre_task, index_tasks, post_task).apply_async() message = _( "Indexing %(total)d documents into %(total_chunks)d chunks of " "size %(size)d into index %(index)s." % { "total": total, "total_chunks": total_chunks, "size": chunk_size, "index": index.prefixed_name, } ) return message
def render_stale_documents(log=None): """Simple task wrapper for rendering stale documents""" stale_docs = Document.objects.get_by_stale_rendering().distinct() stale_docs_count = stale_docs.count() if stale_docs_count == 0: # not stale documents to render return if log is None: # fetch a logger in case none is given log = render_stale_documents.get_logger() log.info('Found %s stale documents' % stale_docs_count) stale_pks = stale_docs.values_list('pk', flat=True) render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)] chain(*render_tasks).apply_async()
def chain_render_docs(self, docs): tasks = [] count = 0 total = len(docs) n = int(ceil(total / 5)) chunks = chunked(docs, n) for chunk in chunks: count += len(chunk) tasks.append( render_document_chunk.si(chunk, self.cache_control, self.base_url, self.options['force'])) percent_complete = int(ceil((count / total) * 100)) tasks.append( email_render_document_progress.si(percent_complete, total)) # Make it so. chain(*tasks).apply_async()
def reindex_all(cls, chunk_size=500, index=None, percent=100): """Rebuild ElasticSearch indexes. :arg chunk_size: how many documents to bulk index as a single chunk. :arg index: the `Index` object to reindex into. Uses the current promoted index if none provided. :arg percent: 1 to 100--the percentage of the db to index. """ from kuma.search.models import Index from kuma.search.tasks import prepare_index, finalize_index from kuma.wiki.tasks import index_documents index = index or Index.objects.get_current() # Get the list of document IDs to index. indexable = WikiDocumentType.get_indexable(percent) total = len(indexable) total_chunks = int(ceil(total / chunk_size)) pre_task = prepare_index.si(index.pk) post_task = finalize_index.si(index.pk) if not total: # If there's no data we still create the index and finalize it. chain(pre_task, post_task).apply_async() else: index_tasks = [index_documents.si(chunk, index.pk) for chunk in chunked(indexable, chunk_size)] chord_flow(pre_task, index_tasks, post_task).apply_async() message = _( 'Indexing %(total)d documents into %(total_chunks)d chunks of ' 'size %(size)d into index %(index)s.' % { 'total': total, 'total_chunks': total_chunks, 'size': chunk_size, 'index': index.prefixed_name } ) return message
def render_stale_documents(log=None): """Simple task wrapper for rendering stale documents""" stale_docs = Document.objects.get_by_stale_rendering().distinct() stale_docs_count = stale_docs.count() if stale_docs_count == 0: # not stale documents to render return if log is None: # fetch a logger in case none is given log = render_stale_documents.get_logger() log.info("Found %s stale documents" % stale_docs_count) stale_pks = stale_docs.values_list("pk", flat=True) pre_task = acquire_render_lock.si() render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)] post_task = release_render_lock.si() chord_flow(pre_task, render_tasks, post_task).apply_async()
def chain_render_docs(self, docs, cache_control, base_url, force, invalidate_cdn_cache=False): tasks = [] count = 0 total = len(docs) n = int(ceil(total / 5)) chunks = chunked(docs, n) for chunk in chunks: count += len(chunk) tasks.append( render_document_chunk.si(chunk, cache_control, base_url, force, invalidate_cdn_cache)) percent_complete = int(ceil((count / total) * 100)) tasks.append( email_document_progress.si('render_document', percent_complete, total)) # Make it so. chain(*tasks).apply_async()
def render_stale_documents(log=None): """Simple task wrapper for rendering stale documents""" stale_docs = Document.objects.get_by_stale_rendering().distinct() stale_docs_count = stale_docs.count() if stale_docs_count == 0: # not stale documents to render return if log is None: # fetch a logger in case none is given log = render_stale_documents.get_logger() log.info('Found %s stale documents' % stale_docs_count) stale_pks = stale_docs.values_list('pk', flat=True) pre_task = acquire_render_lock.si() render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)] post_task = release_render_lock.si() chord_flow(pre_task, render_tasks, post_task).apply_async()
def handle(self, *args, **options): Logger = namedtuple("Logger", "info, error") log = Logger(info=self.stdout.write, error=self.stderr.write) if options["all"] or options["locale"]: if options["locale"] and options["all"]: raise CommandError( "Specifying --locale with --all is the same as --all" ) filters = {} if options["locale"]: locale = options["locale"] log.info("Publishing all documents in locale {}".format(locale)) filters.update(locale=locale) else: log.info("Publishing all documents") chunk_size = max(options["chunk_size"], 1) docs = Document.objects.filter(**filters) doc_pks = docs.values_list("id", flat=True) num_docs = len(doc_pks) num_tasks = int(ceil(num_docs / float(chunk_size))) log.info("...found {} documents.".format(num_docs)) # Let's publish the documents in a group of chunks, where the # tasks in the group can be run in parallel. tasks = [] for i, chunk in enumerate(chunked(doc_pks, chunk_size)): message = "Published chunk #{} of {}".format(i + 1, num_tasks) tasks.append( publish.si( chunk, completion_message=message, invalidate_cdn_cache=False ) ) if num_tasks == 1: msg = "Launching a single task handling " "all {} documents.".format( num_docs ) else: msg = ( "Launching {} paralellizable tasks, each handling " "at most {} documents.".format(num_tasks, chunk_size) ) log.info(msg) group(*tasks).apply_async() else: paths = options["paths"] if not paths: raise CommandError("Need at least one document path to publish") doc_pks = [] get_doc_pk = Document.objects.values_list("id", flat=True).get for path in paths: if path.startswith("/"): path = path[1:] locale, sep, slug = path.partition("/") head, sep, tail = slug.partition("/") if head == "docs": slug = tail try: doc_pk = get_doc_pk(locale=locale, slug=slug) except Document.DoesNotExist: msg = "Document with locale={} and slug={} does not exist" log.error(msg.format(locale, slug)) else: doc_pks.append(doc_pk) publish( doc_pks, log=log, invalidate_cdn_cache=(not options["skip_cdn_invalidation"]), )