Example #1
0
    def handle(self, **options):
        """
        The idea is to update the last_contribution_date field.
        """
        today = datetime.now()
        yesterday = today - timedelta(days=1)

        # Support Forum answers
        user_ids = list(
            Answer.objects.filter(created__gte=yesterday,
                                  created__lt=today).values_list("creator_id",
                                                                 flat=True))

        # KB Edits
        user_ids += list(
            Revision.objects.filter(created__gte=yesterday,
                                    created__lt=today).values_list(
                                        "creator_id", flat=True))

        # KB Reviews
        user_ids += list(
            Revision.objects.filter(reviewed__gte=yesterday,
                                    reviewed__lt=today).values_list(
                                        "reviewer_id", flat=True))

        # Note:
        # Army of Awesome replies are live indexed. No need to do anything here.

        index_task.delay(to_class_path(UserMappingType), list(set(user_ids)))
Example #2
0
def reindex(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks

    :arg mapping_type_names: list of mapping types to reindex

    """
    outstanding = Record.objects.outstanding().count()
    if outstanding > 0:
        raise ReindexError('There are %s outstanding chunks.' % outstanding)

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    for cls, id_list in chunks:
        index = cls.get_index()
        chunk_name = 'Indexing: %s %d -> %d' % (cls.get_mapping_type_name(),
                                                id_list[0], id_list[-1])
        rec = Record.objects.create(batch_id=batch_id, name=chunk_name)
        index_chunk_task.delay(index, batch_id, rec.id,
                               (to_class_path(cls), id_list))
    def handle(self, **options):
        # Set up logging so it doesn't send Ricky email.
        logging.basicConfig(level=logging.ERROR)

        # Get a list of ids of questions we're going to go change. We need
        # a list of ids so that we can feed it to the update, but then
        # also know what we need to update in the index.
        days_180 = datetime.now() - timedelta(days=180)
        q_ids = list(
            Question.objects.filter(is_archived=False).filter(
                created__lte=days_180).values_list("id", flat=True))

        if q_ids:
            log.info("Updating %d questions", len(q_ids))

            sql = """
                UPDATE questions_question
                SET is_archived = 1
                WHERE id IN (%s)
                """ % ",".join(map(str, q_ids))

            cursor = connection.cursor()
            cursor.execute(sql)
            if not transaction.get_connection().in_atomic_block:
                transaction.commit()

            if settings.ES_LIVE_INDEXING:
                try:
                    # So... the first time this runs, it'll handle 160K
                    # questions or so which stresses everything. Thus we
                    # do it in chunks because otherwise this won't work.
                    #
                    # After we've done this for the first time, we can nix
                    # the chunking code.

                    from kitsune.search.utils import chunked

                    for chunk in chunked(q_ids, 100):

                        # Fetch all the documents we need to update.
                        es_docs = get_documents(QuestionMappingType, chunk)

                        log.info("Updating %d index documents", len(es_docs))

                        documents = []

                        # For each document, update the data and stick it
                        # back in the index.
                        for doc in es_docs:
                            doc["question_is_archived"] = True
                            doc["indexed_on"] = int(time.time())
                            documents.append(doc)

                        QuestionMappingType.bulk_index(documents)

                except ES_EXCEPTIONS:
                    # Something happened with ES, so let's push index
                    # updating into an index_task which retries when it
                    # fails because of ES issues.
                    index_task.delay(to_class_path(QuestionMappingType), q_ids)
Example #4
0
def reindex_questions_answers(sender, instance, **kw):
    """When a question is saved, we need to reindex it's answers.

    This is needed because the solution may have changed."""
    if instance.id:
        answer_ids = instance.answers.all().values_list("id", flat=True)
        index_task.delay(to_class_path(AnswerMetricsMappingType), list(answer_ids))
Example #5
0
def update_question_vote_chunk(data):
    """Update num_votes_past_week for a number of questions."""

    # First we recalculate num_votes_past_week in the db.
    log.info("Calculating past week votes for %s questions." % len(data))

    ids = ",".join(map(str, data))
    sql = ("""
        UPDATE questions_question q
        SET num_votes_past_week = (
            SELECT COUNT(created)
            FROM questions_questionvote qv
            WHERE qv.question_id = q.id
            AND qv.created >= DATE(SUBDATE(NOW(), 7))
        )
        WHERE q.id IN (%s);
        """ % ids)
    cursor = connection.cursor()
    cursor.execute(sql)
    if not transaction.get_connection().in_atomic_block:
        transaction.commit()

    # Next we update our index with the changes we made directly in
    # the db.
    if data and settings.ES_LIVE_INDEXING:
        # Get the data we just updated from the database.
        sql = ("""
            SELECT id, num_votes_past_week
            FROM questions_question
            WHERE id in (%s);
            """ % ids)
        cursor = connection.cursor()
        cursor.execute(sql)

        # Since this returns (id, num_votes_past_week) tuples, we can
        # convert that directly to a dict.
        id_to_num = dict(cursor.fetchall())

        try:
            # Fetch all the documents we need to update.
            from kitsune.questions.models import QuestionMappingType
            from kitsune.search import es_utils

            es_docs = es_utils.get_documents(QuestionMappingType, data)

            # For each document, update the data and stick it back in the
            # index.
            for doc in es_docs:
                # Note: Need to keep this in sync with
                # Question.extract_document.
                num = id_to_num[int(doc["id"])]
                doc["question_num_votes_past_week"] = num

                QuestionMappingType.index(doc, id_=doc["id"])
        except ES_EXCEPTIONS:
            # Something happened with ES, so let's push index updating
            # into an index_task which retries when it fails because
            # of ES issues.
            index_task.delay(to_class_path(QuestionMappingType),
                             list(id_to_num.keys()))
Example #6
0
def reindex(mapping_type_names):
    """Reindex all instances of a given mapping type with celery tasks

    :arg mapping_type_names: list of mapping types to reindex

    """
    outstanding = Record.objects.outstanding().count()
    if outstanding > 0:
        raise ReindexError('There are %s outstanding chunks.' % outstanding)

    batch_id = create_batch_id()

    # Break up all the things we want to index into chunks. This
    # chunkifies by class then by chunk size.
    chunks = []
    for cls, indexable in get_indexable(mapping_types=mapping_type_names):
        chunks.extend(
            (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE))

    for cls, id_list in chunks:
        index = cls.get_index()
        chunk_name = 'Indexing: %s %d -> %d' % (
            cls.get_mapping_type_name(), id_list[0], id_list[-1])
        rec = Record.objects.create(batch_id=batch_id, name=chunk_name)
        index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
Example #7
0
 def handle(self, **options):
     index_task.delay(to_class_path(DocumentMappingType),
                      DocumentMappingType.get_indexable())
Example #8
0
def test_to_class_path():
    eq_(to_class_path(FooBarClassOfAwesome),
        'kitsune.search.tests.test_utils:FooBarClassOfAwesome')
Example #9
0
def test_to_class_path():
    eq_(to_class_path(FooBarClassOfAwesome),
        "kitsune.search.tests.test_utils:FooBarClassOfAwesome")
Example #10
0
 def unindex_later(self):
     """Register myself to be unindexed at the end of the request."""
     _local_tasks().add(
         (unindex_task.delay, (to_class_path(self.get_mapping_type()), (self.pk,)))
     )