コード例 #1
0
ファイル: reindexer.py プロジェクト: ziqizh/h
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    if get_aliased_index(es) is None:
        raise RuntimeError('cannot reindex if current index is not aliased')

    settings = request.find_service(name='settings')

    new_index = configure_index(es)

    try:
        settings.put(SETTING_NEW_INDEX, new_index)
        request.tm.commit()

        indexer = BatchIndexer(session,
                               es,
                               request,
                               target_index=new_index,
                               op_type='create')

        errored = indexer.index()
        if errored:
            log.debug('failed to index {} annotations, retrying...'.format(
                len(errored)))
            errored = indexer.index(errored)
            if errored:
                log.warn('failed to index {} annotations: {!r}'.format(
                    len(errored), errored))

        update_aliased_index(es, new_index)

    finally:
        settings.delete(SETTING_NEW_INDEX)
        request.tm.commit()
コード例 #2
0
ファイル: indexer.py プロジェクト: gnott/h
def reindex_user_annotations(userid):
    ids = [a.id for a in celery.request.db.query(models.Annotation.id).filter_by(userid=userid)]

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(ids)
    if errored:
        log.warning('Failed to re-index annotations %s', errored)
コード例 #3
0
ファイル: indexer.py プロジェクト: rolmovel/h
def reindex_user_annotations(userid):
    ids = [a.id for a in celery.request.db.query(models.Annotation.id).filter_by(userid=userid)]

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(ids)
    if errored:
        log.warning('Failed to re-index annotations %s', errored)
コード例 #4
0
ファイル: indexer.py プロジェクト: nanotate-tool/h
def reindex_annotations_in_date_range(start_date, end_date, max_annotations=250000):
    """Re-index annotations from Postgres to Elasticsearch in a date range.

    :param start_date: Begin at this time (greater or equal)
    :param end_date: End at this time (less than or equal)
    :param max_annotations: Maximum number of items to process overall

    """
    log.info(f"Re-indexing from {start_date} to {end_date}...")

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(
        annotation.id
        for annotation in celery.request.db.query(Annotation.id)
        .filter(Annotation.updated >= start_date)
        .filter(Annotation.updated <= end_date)
        .limit(max_annotations)
    )

    if errored:
        log.warning("Failed to re-index annotations into ES6 %s", errored)

    log.info(
        "Re-index from %s to %s complete.", start_date, end_date,
    )
コード例 #5
0
ファイル: move_uri.py プロジェクト: hypothesis/h
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj["bootstrap"]()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = (
        "Changing all annotations and document data matching:\n"
        '"{old}"\nto:\n"{new}"\n'
        "This will affect {ann_count} annotations, {doc_claimant} "
        "document uri claimants, and {doc_uri} document uri self-claims "
        "or canonical uris.\n"
        "Are you sure? [y/N]"
    ).format(
        old=old,
        new=new,
        ann_count=len(annotations),
        doc_claimant=len(docuris_claimant),
        doc_uri=len(docuris_uri),
    )
    c = click.prompt(prompt, default="n", show_default=False)

    if c != "y":
        print("Aborted")
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()
コード例 #6
0
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj["bootstrap"]()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = ("Changing all annotations and document data matching:\n"
              '"{old}"\nto:\n"{new}"\n'
              "This will affect {ann_count} annotations, {doc_claimant} "
              "document uri claimants, and {doc_uri} document uri self-claims "
              "or canonical uris.\n"
              "Are you sure? [y/N]").format(
                  old=old,
                  new=new,
                  ann_count=len(annotations),
                  doc_claimant=len(docuris_claimant),
                  doc_uri=len(docuris_uri),
              )
    c = click.prompt(prompt, default="n", show_default=False)

    if c != "y":
        print("Aborted")
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()
コード例 #7
0
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError('cannot reindex if current index is not aliased')

    settings = request.find_service(name='settings')

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name='nipsa')
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info('configured new index {}'.format(new_index))
    setting_name = 'reindex.new_index'
    if es.version < (2, ):
        setting_name = 'reindex.new_index'

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info('reindexing annotations into new index {}'.format(new_index))
        indexer = BatchIndexer(session,
                               es,
                               request,
                               target_index=new_index,
                               op_type='create')

        errored = indexer.index()
        if errored:
            log.debug('failed to index {} annotations, retrying...'.format(
                len(errored)))
            errored = indexer.index(errored)
            if errored:
                log.warn('failed to index {} annotations: {!r}'.format(
                    len(errored), errored))

        log.info('making new index {} current'.format(new_index))
        update_aliased_index(es, new_index)

        log.info('removing previous index {}'.format(current_index))
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()
コード例 #8
0
    def test_it_does_not_error_if_annotations_already_indexed(
            self, db_session, es_client, factories, pyramid_request):
        annotations = factories.Annotation.create_batch(3)
        expected_errored_ids = {annotations[1].id}

        elasticsearch.helpers.streaming_bulk = mock.Mock()
        elasticsearch.helpers.streaming_bulk.return_value = [
            (True, {}),
            (False, {
                "create": {
                    "error": "some error",
                    "_id": annotations[1].id
                }
            }),
            (
                False,
                {
                    "create": {
                        "error": "document already exists",
                        "_id": annotations[2].id,
                    }
                },
            ),
        ]

        errored = BatchIndexer(db_session, es_client, pyramid_request,
                               es_client.index, "create").index()

        assert errored == expected_errored_ids
コード例 #9
0
ファイル: reindexer.py プロジェクト: hypothesis/h
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError("cannot reindex if current index is not aliased")

    settings = request.find_service(name="settings")

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name="nipsa")
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info("configured new index {}".format(new_index))
    setting_name = "reindex.new_index"

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info("reindexing annotations into new index {}".format(new_index))
        indexer = BatchIndexer(
            session, es, request, target_index=new_index, op_type="create"
        )

        errored = indexer.index()
        if errored:
            log.debug(
                "failed to index {} annotations, retrying...".format(len(errored))
            )
            errored = indexer.index(errored)
            if errored:
                log.warning(
                    "failed to index {} annotations: {!r}".format(len(errored), errored)
                )

        log.info("making new index {} current".format(new_index))
        update_aliased_index(es, new_index)

        log.info("removing previous index {}".format(current_index))
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()
コード例 #10
0
    def test_it_accepts_different_indexes(self, target_index, es_client):
        indexer = BatchIndexer(
            session=sentinel.db,
            es_client=es_client,
            request=sentinel.request,
            target_index=target_index,
        )

        assert (indexer._target_index == target_index
                if target_index else es_client.index)
コード例 #11
0
ファイル: reindexer.py プロジェクト: bibliotechie/h
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError("cannot reindex if current index is not aliased")

    settings = request.find_service(name="settings")

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name="nipsa")
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info("configured new index %s", new_index)
    setting_name = "reindex.new_index"

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info("reindexing annotations into new index %s", new_index)
        indexer = BatchIndexer(
            session, es, request, target_index=new_index, op_type="create"
        )

        errored = indexer.index()
        if errored:
            log.debug("failed to index %d annotations, retrying...", len(errored))
            errored = indexer.index(errored)
            if errored:
                log.warning("failed to index %d annotations: %r", len(errored), errored)

        log.info("making new index %s current", new_index)
        update_aliased_index(es, new_index)

        log.info("removing previous index %s", current_index)
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()
コード例 #12
0
ファイル: service_factory.py プロジェクト: zhiiker/hypothesis
def factory(_context, request):
    """Create a SearchIndexService."""

    return SearchIndexService(
        request=request,
        es_client=request.es,
        session=request.db,
        settings=request.find_service(name="settings"),
        queue=Queue(
            db=request.db,
            es=request.es,
            batch_indexer=BatchIndexer(request.db, request.es, request),
        ),
    )
コード例 #13
0
def batch_indexer(db_session, es_client, pyramid_request, moderation_service):
    return BatchIndexer(db_session, es_client, pyramid_request)
コード例 #14
0
def batch_indexer(  # pylint:disable=unused-argument
    db_session, es_client, pyramid_request, moderation_service
):
    return BatchIndexer(db_session, es_client, pyramid_request)