Exemple #1
0
def unpublish(doc_locale_slug_pairs, log=None, completion_message=None,
              invalidate_cdn_cache=True):
    """
    Delete one or more documents from the S3 bucket serving the document API.
    """
    if not log:
        log = unpublish.get_logger()

    s3_bucket = get_s3_bucket()
    if not s3_bucket:
        log.info('Skipping unpublish of {!r}: no S3 bucket configured'.format(
            doc_locale_slug_pairs))
        return

    keys_to_delete = (get_s3_key(locale=locale, slug=slug)
                      for locale, slug in doc_locale_slug_pairs)

    for chunk in chunked(keys_to_delete, S3_MAX_KEYS_PER_DELETE):
        response = s3_bucket.delete_objects(
            Delete={
                'Objects': [{'Key': key} for key in chunk]
            }
        )
        for info in response.get('Deleted', ()):
            log.info('Unpublished {}'.format(info['Key']))
        for info in response.get('Errors', ()):
            log.error('Unable to unpublish {}: ({}) {}'.format(
                info['Key'], info['Code'], info['Message']))

    if completion_message:
        log.info(completion_message)

    if invalidate_cdn_cache:
        request_cdn_cache_invalidation.delay(doc_locale_slug_pairs)
Exemple #2
0
def unpublish(doc_locale_slug_pairs, log=None, completion_message=None):
    """
    Delete one or more documents from the S3 bucket serving the document API.
    """
    if not log:
        log = unpublish.get_logger()

    s3_bucket = get_s3_bucket()
    if not s3_bucket:
        log.info('Skipping unpublish of {!r}: no S3 bucket configured'.format(
            doc_locale_slug_pairs))
        return

    keys_to_delete = (get_s3_key(locale=locale, slug=slug)
                      for locale, slug in doc_locale_slug_pairs)

    for chunk in chunked(keys_to_delete, S3_MAX_KEYS_PER_DELETE):
        response = s3_bucket.delete_objects(
            Delete={
                'Objects': [{'Key': key} for key in chunk]
            }
        )
        for info in response.get('Deleted', ()):
            log.info('Unpublished {}'.format(info['Key']))
        for info in response.get('Errors', ()):
            log.error('Unable to unpublish {}: ({}) {}'.format(
                info['Key'], info['Code'], info['Message']))

    if completion_message:
        log.info(completion_message)
Exemple #3
0
 def handle(self, *args, **options):
     Logger = namedtuple('Logger', 'info, error')
     log = Logger(info=self.stdout.write, error=self.stderr.write)
     if options['all'] or options['locale']:
         filters = {}
         if options['locale'] and not options['all']:
             locale = options['locale']
             log.info(
                 'Publishing all documents in locale {}'.format(locale))
             filters.update(locale=locale)
         else:
             locale = None
             log.info('Publishing all documents')
         chunk_size = max(options['chunk_size'], 1)
         docs = Document.objects.filter(**filters)
         doc_pks = docs.values_list('id', flat=True)
         num_docs = len(doc_pks)
         num_tasks = int(ceil(num_docs / float(chunk_size)))
         log.info('...found {} documents.'.format(num_docs))
         # Let's publish the documents in a group of chunks, where the
         # tasks in the group can be run in parallel.
         tasks = []
         for i, chunk in enumerate(chunked(doc_pks, chunk_size)):
             message = 'Published chunk #{} of {}'.format(i + 1, num_tasks)
             tasks.append(publish.si(chunk, completion_message=message))
         if num_tasks == 1:
             msg = ('Launching a single task handling '
                    'all {} documents.'.format(num_docs))
         else:
             msg = ('Launching {} paralellizable tasks, each handling '
                    'at most {} documents.'.format(num_tasks, chunk_size))
         log.info(msg)
         group(*tasks).apply_async()
     else:
         paths = options['paths']
         if not paths:
             raise CommandError(
                 'Need at least one document path to publish')
         doc_pks = []
         get_doc_pk = Document.objects.values_list('id', flat=True).get
         for path in paths:
             if path.startswith('/'):
                 path = path[1:]
             locale, sep, slug = path.partition('/')
             head, sep, tail = slug.partition('/')
             if head == 'docs':
                 slug = tail
             try:
                 doc_pk = get_doc_pk(locale=locale, slug=slug)
             except Document.DoesNotExist:
                 msg = 'Document with locale={} and slug={} does not exist'
                 log.error(msg.format(locale, slug))
             else:
                 doc_pks.append(doc_pk)
         publish(doc_pks, log=log)
Exemple #4
0
 def handle(self, *args, **options):
     Logger = namedtuple('Logger', 'info, error')
     log = Logger(info=self.stdout.write, error=self.stderr.write)
     if options['all'] or options['locale']:
         filters = {}
         if options['locale'] and not options['all']:
             locale = options['locale']
             log.info('Publishing all documents in locale {}'.format(locale))
             filters.update(locale=locale)
         else:
             locale = None
             log.info('Publishing all documents')
         chunk_size = max(options['chunk_size'], 1)
         docs = Document.objects.filter(**filters)
         doc_pks = docs.values_list('id', flat=True)
         num_docs = len(doc_pks)
         num_tasks = int(ceil(num_docs / float(chunk_size)))
         log.info('...found {} documents.'.format(num_docs))
         # Let's publish the documents in a group of chunks, where the
         # tasks in the group can be run in parallel.
         tasks = []
         for i, chunk in enumerate(chunked(doc_pks, chunk_size)):
             message = 'Published chunk #{} of {}'.format(i + 1, num_tasks)
             tasks.append(publish.si(chunk, completion_message=message))
         if num_tasks == 1:
             msg = ('Launching a single task handling '
                    'all {} documents.'.format(num_docs))
         else:
             msg = ('Launching {} paralellizable tasks, each handling '
                    'at most {} documents.'.format(num_tasks, chunk_size))
         log.info(msg)
         group(*tasks).apply_async()
     else:
         paths = options['paths']
         if not paths:
             raise CommandError('Need at least one document path to publish')
         doc_pks = []
         get_doc_pk = Document.objects.values_list('id', flat=True).get
         for path in paths:
             if path.startswith('/'):
                 path = path[1:]
             locale, sep, slug = path.partition('/')
             head, sep, tail = slug.partition('/')
             if head == 'docs':
                 slug = tail
             try:
                 doc_pk = get_doc_pk(locale=locale, slug=slug)
             except Document.DoesNotExist:
                 msg = 'Document with locale={} and slug={} does not exist'
                 log.error(msg.format(locale, slug))
             else:
                 doc_pks.append(doc_pk)
         publish(doc_pks, log=log)
Exemple #5
0
def chain_clean_docs(doc_pks, user_pk):
    tasks = []
    count = 0
    total = len(doc_pks)
    n = int(ceil(total / 5))
    chunks = chunked(doc_pks, n)

    for chunk in chunks:
        count += len(chunk)
        tasks.append(clean_document_chunk.si(chunk, user_pk))
        percent_complete = int(ceil((count / total) * 100))
        tasks.append(
            email_document_progress.si("clean_document", percent_complete,
                                       total))

    chain(*tasks).apply_async()
Exemple #6
0
def render_stale_documents(log=None):
    """Simple task wrapper for rendering stale documents"""
    stale_docs = Document.objects.get_by_stale_rendering().distinct()
    stale_docs_count = stale_docs.count()
    if stale_docs_count == 0:
        # not stale documents to render
        return

    if log is None:
        # fetch a logger in case none is given
        log = render_stale_documents.get_logger()

    log.info("Found %s stale documents" % stale_docs_count)
    stale_pks = stale_docs.values_list("pk", flat=True)

    render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)]
    chain(*render_tasks).apply_async()
Exemple #7
0
    def reindex_all(cls, chunk_size=500, index=None, percent=100):
        """Rebuild ElasticSearch indexes.

        :arg chunk_size: how many documents to bulk index as a single chunk.
        :arg index: the `Index` object to reindex into. Uses the current
            promoted index if none provided.
        :arg percent: 1 to 100--the percentage of the db to index.

        """
        from kuma.search.models import Index
        from kuma.search.tasks import prepare_index, finalize_index
        from kuma.wiki.tasks import index_documents

        index = index or Index.objects.get_current()

        # Get the list of document IDs to index.
        indexable = WikiDocumentType.get_indexable(percent)

        total = len(indexable)
        total_chunks = int(ceil(total / chunk_size))

        pre_task = prepare_index.si(index.pk)
        post_task = finalize_index.si(index.pk)

        if not total:
            # If there's no data we still create the index and finalize it.
            chain(pre_task, post_task).apply_async()
        else:
            index_tasks = [
                index_documents.si(chunk, index.pk)
                for chunk in chunked(indexable, chunk_size)
            ]
            chord_flow(pre_task, index_tasks, post_task).apply_async()

        message = _(
            "Indexing %(total)d documents into %(total_chunks)d chunks of "
            "size %(size)d into index %(index)s."
            % {
                "total": total,
                "total_chunks": total_chunks,
                "size": chunk_size,
                "index": index.prefixed_name,
            }
        )
        return message
Exemple #8
0
def render_stale_documents(log=None):
    """Simple task wrapper for rendering stale documents"""
    stale_docs = Document.objects.get_by_stale_rendering().distinct()
    stale_docs_count = stale_docs.count()
    if stale_docs_count == 0:
        # not stale documents to render
        return

    if log is None:
        # fetch a logger in case none is given
        log = render_stale_documents.get_logger()

    log.info('Found %s stale documents' % stale_docs_count)
    stale_pks = stale_docs.values_list('pk', flat=True)

    render_tasks = [render_document_chunk.si(pks)
                    for pks in chunked(stale_pks, 5)]
    chain(*render_tasks).apply_async()
    def chain_render_docs(self, docs):
        tasks = []
        count = 0
        total = len(docs)
        n = int(ceil(total / 5))
        chunks = chunked(docs, n)

        for chunk in chunks:
            count += len(chunk)
            tasks.append(
                render_document_chunk.si(chunk, self.cache_control,
                                         self.base_url, self.options['force']))
            percent_complete = int(ceil((count / total) * 100))
            tasks.append(
                email_render_document_progress.si(percent_complete, total))

        # Make it so.
        chain(*tasks).apply_async()
Exemple #10
0
    def reindex_all(cls, chunk_size=500, index=None, percent=100):
        """Rebuild ElasticSearch indexes.

        :arg chunk_size: how many documents to bulk index as a single chunk.
        :arg index: the `Index` object to reindex into. Uses the current
            promoted index if none provided.
        :arg percent: 1 to 100--the percentage of the db to index.

        """
        from kuma.search.models import Index
        from kuma.search.tasks import prepare_index, finalize_index
        from kuma.wiki.tasks import index_documents

        index = index or Index.objects.get_current()

        # Get the list of document IDs to index.
        indexable = WikiDocumentType.get_indexable(percent)

        total = len(indexable)
        total_chunks = int(ceil(total / chunk_size))

        pre_task = prepare_index.si(index.pk)
        post_task = finalize_index.si(index.pk)

        if not total:
            # If there's no data we still create the index and finalize it.
            chain(pre_task, post_task).apply_async()
        else:
            index_tasks = [index_documents.si(chunk, index.pk)
                           for chunk in chunked(indexable, chunk_size)]
            chord_flow(pre_task, index_tasks, post_task).apply_async()

        message = _(
            'Indexing %(total)d documents into %(total_chunks)d chunks of '
            'size %(size)d into index %(index)s.' % {
                'total': total,
                'total_chunks': total_chunks,
                'size': chunk_size,
                'index': index.prefixed_name
            }
        )
        return message
Exemple #11
0
def render_stale_documents(log=None):
    """Simple task wrapper for rendering stale documents"""
    stale_docs = Document.objects.get_by_stale_rendering().distinct()
    stale_docs_count = stale_docs.count()
    if stale_docs_count == 0:
        # not stale documents to render
        return

    if log is None:
        # fetch a logger in case none is given
        log = render_stale_documents.get_logger()

    log.info("Found %s stale documents" % stale_docs_count)
    stale_pks = stale_docs.values_list("pk", flat=True)

    pre_task = acquire_render_lock.si()
    render_tasks = [render_document_chunk.si(pks) for pks in chunked(stale_pks, 5)]
    post_task = release_render_lock.si()

    chord_flow(pre_task, render_tasks, post_task).apply_async()
Exemple #12
0
    def chain_render_docs(self, docs, cache_control, base_url, force,
                          invalidate_cdn_cache=False):
        tasks = []
        count = 0
        total = len(docs)
        n = int(ceil(total / 5))
        chunks = chunked(docs, n)

        for chunk in chunks:
            count += len(chunk)
            tasks.append(
                render_document_chunk.si(chunk, cache_control, base_url,
                                         force, invalidate_cdn_cache))
            percent_complete = int(ceil((count / total) * 100))
            tasks.append(
                email_document_progress.si('render_document', percent_complete,
                                           total))

        # Make it so.
        chain(*tasks).apply_async()
Exemple #13
0
def render_stale_documents(log=None):
    """Simple task wrapper for rendering stale documents"""
    stale_docs = Document.objects.get_by_stale_rendering().distinct()
    stale_docs_count = stale_docs.count()
    if stale_docs_count == 0:
        # not stale documents to render
        return

    if log is None:
        # fetch a logger in case none is given
        log = render_stale_documents.get_logger()

    log.info('Found %s stale documents' % stale_docs_count)
    stale_pks = stale_docs.values_list('pk', flat=True)

    pre_task = acquire_render_lock.si()
    render_tasks = [render_document_chunk.si(pks)
                    for pks in chunked(stale_pks, 5)]
    post_task = release_render_lock.si()

    chord_flow(pre_task, render_tasks, post_task).apply_async()
Exemple #14
0
 def handle(self, *args, **options):
     Logger = namedtuple("Logger", "info, error")
     log = Logger(info=self.stdout.write, error=self.stderr.write)
     if options["all"] or options["locale"]:
         if options["locale"] and options["all"]:
             raise CommandError(
                 "Specifying --locale with --all is the same as --all"
             )
         filters = {}
         if options["locale"]:
             locale = options["locale"]
             log.info("Publishing all documents in locale {}".format(locale))
             filters.update(locale=locale)
         else:
             log.info("Publishing all documents")
         chunk_size = max(options["chunk_size"], 1)
         docs = Document.objects.filter(**filters)
         doc_pks = docs.values_list("id", flat=True)
         num_docs = len(doc_pks)
         num_tasks = int(ceil(num_docs / float(chunk_size)))
         log.info("...found {} documents.".format(num_docs))
         # Let's publish the documents in a group of chunks, where the
         # tasks in the group can be run in parallel.
         tasks = []
         for i, chunk in enumerate(chunked(doc_pks, chunk_size)):
             message = "Published chunk #{} of {}".format(i + 1, num_tasks)
             tasks.append(
                 publish.si(
                     chunk, completion_message=message, invalidate_cdn_cache=False
                 )
             )
         if num_tasks == 1:
             msg = "Launching a single task handling " "all {} documents.".format(
                 num_docs
             )
         else:
             msg = (
                 "Launching {} paralellizable tasks, each handling "
                 "at most {} documents.".format(num_tasks, chunk_size)
             )
         log.info(msg)
         group(*tasks).apply_async()
     else:
         paths = options["paths"]
         if not paths:
             raise CommandError("Need at least one document path to publish")
         doc_pks = []
         get_doc_pk = Document.objects.values_list("id", flat=True).get
         for path in paths:
             if path.startswith("/"):
                 path = path[1:]
             locale, sep, slug = path.partition("/")
             head, sep, tail = slug.partition("/")
             if head == "docs":
                 slug = tail
             try:
                 doc_pk = get_doc_pk(locale=locale, slug=slug)
             except Document.DoesNotExist:
                 msg = "Document with locale={} and slug={} does not exist"
                 log.error(msg.format(locale, slug))
             else:
                 doc_pks.append(doc_pk)
         publish(
             doc_pks,
             log=log,
             invalidate_cdn_cache=(not options["skip_cdn_invalidation"]),
         )