Python get_documents Examples, kitsune.search.es_utils.get_documents Python Examples

Example #1

0

Show file

File: cron.py Project: 1234-/kitsune

def auto_archive_old_questions():
    """Archive all questions that were created over 180 days ago"""
    # Set up logging so it doesn't send Ricky email.
    logging.basicConfig(level=logging.ERROR)

    # Get a list of ids of questions we're going to go change. We need
    # a list of ids so that we can feed it to the update, but then
    # also know what we need to update in the index.
    days_180 = datetime.now() - timedelta(days=180)
    q_ids = list(Question.objects.filter(is_archived=False)
                                 .filter(created__lte=days_180)
                                 .values_list('id', flat=True))

    if q_ids:
        log.info('Updating %d questions', len(q_ids))

        sql = """
            UPDATE questions_question
            SET is_archived = 1
            WHERE id IN (%s)
            """ % ','.join(map(str, q_ids))

        cursor = connection.cursor()
        cursor.execute(sql)
        if not transaction.get_connection().in_atomic_block:
            transaction.commit()

        if settings.ES_LIVE_INDEXING:
            try:
                # So... the first time this runs, it'll handle 160K
                # questions or so which stresses everything. Thus we
                # do it in chunks because otherwise this won't work.
                #
                # After we've done this for the first time, we can nix
                # the chunking code.

                from kitsune.search.utils import chunked
                for chunk in chunked(q_ids, 100):

                    # Fetch all the documents we need to update.
                    es_docs = get_documents(QuestionMappingType, chunk)

                    log.info('Updating %d index documents', len(es_docs))

                    documents = []

                    # For each document, update the data and stick it
                    # back in the index.
                    for doc in es_docs:
                        doc[u'question_is_archived'] = True
                        doc[u'indexed_on'] = int(time.time())
                        documents.append(doc)

                    QuestionMappingType.bulk_index(documents)

            except ES_EXCEPTIONS:
                # Something happened with ES, so let's push index
                # updating into an index_task which retries when it
                # fails because of ES issues.
                index_task.delay(QuestionMappingType, q_ids)

Example #2

0

Show file

def auto_archive_old_questions():
    """Archive all questions that were created over 180 days ago"""
    # Set up logging so it doesn't send Ricky email.
    logging.basicConfig(level=logging.ERROR)

    # Get a list of ids of questions we're going to go change. We need
    # a list of ids so that we can feed it to the update, but then
    # also know what we need to update in the index.
    days_180 = datetime.now() - timedelta(days=180)
    q_ids = list(
        Question.objects.filter(is_archived=False).filter(
            created__lte=days_180).values_list('id', flat=True))

    if q_ids:
        log.info('Updating %d questions', len(q_ids))

        sql = """
            UPDATE questions_question
            SET is_archived = 1
            WHERE id IN (%s)
            """ % ','.join(map(str, q_ids))

        cursor = connection.cursor()
        cursor.execute(sql)
        transaction.commit_unless_managed()

        if settings.ES_LIVE_INDEXING:
            try:
                # So... the first time this runs, it'll handle 160K
                # questions or so which stresses everything. Thus we
                # do it in chunks because otherwise this won't work.
                #
                # After we've done this for the first time, we can nix
                # the chunking code.

                from kitsune.search.utils import chunked
                for chunk in chunked(q_ids, 100):

                    # Fetch all the documents we need to update.
                    es_docs = get_documents(QuestionMappingType, chunk)

                    log.info('Updating %d index documents', len(es_docs))

                    documents = []

                    # For each document, update the data and stick it
                    # back in the index.
                    for doc in es_docs:
                        doc[u'question_is_archived'] = True
                        doc[u'indexed_on'] = int(time.time())
                        documents.append(doc)

                    QuestionMappingType.bulk_index(documents)

            except ES_EXCEPTIONS:
                # Something happened with ES, so let's push index
                # updating into an index_task which retries when it
                # fails because of ES issues.
                index_task.delay(QuestionMappingType, q_ids)

Example #3

0

Show file

    def test_analyzer_choices(self):
        """Check that the indexer picked the right analyzer."""

        ids = [d.id for d in list(self.docs.values())]
        docs = es_utils.get_documents(DocumentMappingType, ids)
        for doc in docs:
            locale = doc['locale']
            eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])

Example #4

0

Show file

File: test_es.py Project: Acidburn0zzz/kitsune

    def test_analyzer_choices(self):
        """Check that the indexer picked the right analyzer."""

        ids = [d.id for d in self.docs.values()]
        docs = es_utils.get_documents(DocumentMappingType, ids)
        for doc in docs:
            locale = doc['locale']
            eq_(doc['_analyzer'], self.locale_data[locale]['analyzer'])

Example #5

0

Show file

File: tasks.py Project: ziegeer/kitsune

def update_question_vote_chunk(data):
    """Update num_votes_past_week for a number of questions."""

    # First we recalculate num_votes_past_week in the db.
    log.info('Calculating past week votes for %s questions.' % len(data))

    ids = ','.join(map(str, data))
    sql = """
        UPDATE questions_question q
        SET num_votes_past_week = (
            SELECT COUNT(created)
            FROM questions_questionvote qv
            WHERE qv.question_id = q.id
            AND qv.created >= DATE(SUBDATE(NOW(), 7))
        )
        WHERE q.id IN (%s);
        """ % ids
    cursor = connection.cursor()
    cursor.execute(sql)
    if not transaction.get_connection().in_atomic_block:
        transaction.commit()

    # Next we update our index with the changes we made directly in
    # the db.
    if data and settings.ES_LIVE_INDEXING:
        # Get the data we just updated from the database.
        sql = """
            SELECT id, num_votes_past_week
            FROM questions_question
            WHERE id in (%s);
            """ % ids
        cursor = connection.cursor()
        cursor.execute(sql)

        # Since this returns (id, num_votes_past_week) tuples, we can
        # convert that directly to a dict.
        id_to_num = dict(cursor.fetchall())

        try:
            # Fetch all the documents we need to update.
            from kitsune.questions.models import QuestionMappingType
            from kitsune.search import es_utils
            es_docs = es_utils.get_documents(QuestionMappingType, data)

            # For each document, update the data and stick it back in the
            # index.
            for doc in es_docs:
                # Note: Need to keep this in sync with
                # Question.extract_document.
                num = id_to_num[int(doc[u'id'])]
                doc[u'question_num_votes_past_week'] = num

                QuestionMappingType.index(doc, id_=doc['id'])
        except ES_EXCEPTIONS:
            # Something happened with ES, so let's push index updating
            # into an index_task which retries when it fails because
            # of ES issues.
            index_task.delay(QuestionMappingType, id_to_num.keys())

Example #6

0

Show file

File: 0003_set_archived.py Project: rik/kitsune

    def change_and_reindex(self, orm, is_archived, is_locked):
        """Locks all questions that were created over 180 days ago"""
        # Get a list of ids of questions we're going to go change. We need
        # a list of ids so that we can feed it to the update, but then
        # also know what we need to update in the index.
        days_180 = datetime.now() - timedelta(days=180)
        assert is_archived != is_locked
        f = Q(created__lte=days_180)
        if is_archived:
            f |= Q(is_locked=True)
        if is_locked:
            f |= Q(is_archived=True)

        # Update the DB
        (orm.Question.objects.filter(f).update(
            is_archived=is_archived, is_locked=is_locked))

        # Using the efficient .update() of query sets doesn't emit any
        # signals, so live indexing won't automatically happen. This
        # does it manually.
        if settings.ES_LIVE_INDEXING:
            q_ids = list(
                orm.Question.objects.filter(f).values_list('id', flat=True))
            try:
                # This is going to process about 200K questions in
                # production, so it will take a while and stress
                # everything. To alleviate this stress, it is
                # divided into chunks.

                for chunk in chunked(q_ids, 1000):
                    # Fetch all the documents we need to update.
                    es_docs = get_documents(QuestionMappingType, chunk)

                    documents = []

                    # For each document, update the data and stick it
                    # back in the index.
                    for doc in es_docs:
                        doc[u'question_is_locked'] = is_locked
                        doc[u'question_is_archived'] = is_archived
                        doc[u'indexed_on'] = int(time.time())
                        documents.append(doc)

                    if documents:
                        QuestionMappingType.bulk_index(documents)

            except ES_EXCEPTIONS:
                # Something happened with ES, so let's push index
                # updating into an index_task which retries when it
                # fails because of ES issues.
                index_task.delay(QuestionMappingType, q_ids)

Example #7

0

Show file

    def test_get_documents(self):
        q = QuestionFactory()
        self.refresh()

        docs = es_utils.get_documents(QuestionMappingType, [q.id])
        eq_(docs[0]['id'], q.id)

Example #8

0

Show file

File: test_es.py Project: Acidburn0zzz/kitsune

    def test_get_documents(self):
        q = question(save=True)
        self.refresh()

        docs = es_utils.get_documents(QuestionMappingType, [q.id])
        eq_(docs[0]['id'], q.id)

Example #9

0

Show file

File: test_es.py Project: kapalselam/kitsune

    def test_get_documents(self):
        q = question(save=True)
        self.refresh()

        docs = es_utils.get_documents(QuestionMappingType, [q.id])
        eq_(docs[0]['id'], q.id)

Example #10

0

Show file

File: auto_archive_old_questions.py Project: willoughbyrm/kitsune

    def handle(self, **options):
        # Set up logging so it doesn't send Ricky email.
        logging.basicConfig(level=logging.ERROR)

        # Get a list of ids of questions we're going to go change. We need
        # a list of ids so that we can feed it to the update, but then
        # also know what we need to update in the index.
        days_180 = datetime.now() - timedelta(days=180)
        q_ids = list(
            Question.objects.filter(is_archived=False).filter(
                created__lte=days_180).values_list("id", flat=True))

        if q_ids:
            log.info("Updating %d questions", len(q_ids))

            sql = """
                UPDATE questions_question
                SET is_archived = 1
                WHERE id IN (%s)
                """ % ",".join(map(str, q_ids))

            cursor = connection.cursor()
            cursor.execute(sql)
            if not transaction.get_connection().in_atomic_block:
                transaction.commit()

            if settings.ES_LIVE_INDEXING:
                # elastic v7 code:
                answer_ids = list(
                    Answer.objects.filter(question_id__in=q_ids).values_list(
                        "id", flat=True))
                index_objects_bulk.delay("QuestionDocument", q_ids)
                index_objects_bulk.delay("AnswerDocument", answer_ids)

                # elastic v2 code:
                try:
                    # So... the first time this runs, it'll handle 160K
                    # questions or so which stresses everything. Thus we
                    # do it in chunks because otherwise this won't work.
                    #
                    # After we've done this for the first time, we can nix
                    # the chunking code.

                    from kitsune.search.utils import chunked

                    for chunk in chunked(q_ids, 100):

                        # Fetch all the documents we need to update.
                        es_docs = get_documents(QuestionMappingType, chunk)

                        log.info("Updating %d index documents", len(es_docs))

                        documents = []

                        # For each document, update the data and stick it
                        # back in the index.
                        for doc in es_docs:
                            doc["question_is_archived"] = True
                            doc["indexed_on"] = int(time.time())
                            documents.append(doc)

                        QuestionMappingType.bulk_index(documents)

                except ES_EXCEPTIONS:
                    # Something happened with ES, so let's push index
                    # updating into an index_task which retries when it
                    # fails because of ES issues.
                    index_task.delay(to_class_path(QuestionMappingType), q_ids)

Example #11

0

Show file

File: test_es.py Project: chupahanks/kitsune

    def test_get_documents(self):
        q = QuestionFactory()
        self.refresh()

        docs = es_utils.get_documents(QuestionMappingType, [q.id])
        eq_(docs[0]["id"], q.id)