Esempio n. 1
0
def es(request):
    client = request.registry["elasticsearch.client"]
    doc_types = request.registry.get("search.doc_types", set())
    index_name = request.registry["elasticsearch.index"]
    index = get_index(
        index_name,
        doc_types,
        using=client,
        shards=request.registry.get("elasticsearch.shards", 1),
        replicas=request.registry.get("elasticsearch.replicas", 0),
    )
    return index.search()
Esempio n. 2
0
def reindex_project(self, request, project_name):
    r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"])
    try:
        with SearchLock(r, timeout=15, blocking_timeout=1):
            client = request.registry["elasticsearch.client"]
            doc_types = request.registry.get("search.doc_types", set())
            index_name = request.registry["elasticsearch.index"]
            get_index(
                index_name,
                doc_types,
                using=client,
                shards=request.registry.get("elasticsearch.shards", 1),
                replicas=request.registry.get("elasticsearch.replicas", 0),
            )

            for _ in parallel_bulk(
                client, _project_docs(request.db, project_name), index=index_name
            ):
                pass
    except redis.exceptions.LockError as exc:
        raise self.retry(countdown=60, exc=exc)
Esempio n. 3
0
def reindex_project(self, request, project_name):
    r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"])
    try:
        with SearchLock(r, timeout=15, blocking_timeout=1):
            client = request.registry["elasticsearch.client"]
            doc_types = request.registry.get("search.doc_types", set())
            index_name = request.registry["elasticsearch.index"]
            get_index(
                index_name,
                doc_types,
                using=client,
                shards=request.registry.get("elasticsearch.shards", 1),
                replicas=request.registry.get("elasticsearch.replicas", 0),
            )

            for _ in parallel_bulk(
                client, _project_docs(request.db, project_name), index=index_name
            ):
                pass
    except redis.exceptions.LockError as exc:
        raise self.retry(countdown=60, exc=exc)
Esempio n. 4
0
def es(request):
    client = request.registry["elasticsearch.client"]
    doc_types = request.registry.get("search.doc_types", set())
    index_name = request.registry["elasticsearch.index"]
    index = get_index(
        index_name,
        doc_types,
        using=client,
        shards=request.registry.get("elasticsearch.shards", 1),
        replicas=request.registry.get("elasticsearch.replicas", 0),
    )
    return index.search()
Esempio n. 5
0
def reindex(self, request):
    """
    Recreate the Search Index.
    """
    r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"])
    try:
        with SearchLock(r, timeout=30 * 60, blocking_timeout=30):
            p = urllib.parse.urlparse(request.registry.settings["elasticsearch.url"])
            qs = urllib.parse.parse_qs(p.query)
            kwargs = {
                "hosts": [urllib.parse.urlunparse(p[:2] + ("",) * 4)],
                "verify_certs": True,
                "ca_certs": certifi.where(),
                "timeout": 30,
                "retry_on_timeout": True,
                "serializer": serializer.serializer,
            }
            aws_auth = bool(qs.get("aws_auth", False))
            if aws_auth:
                aws_region = qs.get("region", ["us-east-1"])[0]
                kwargs["connection_class"] = elasticsearch.RequestsHttpConnection
                kwargs["http_auth"] = requests_aws4auth.AWS4Auth(
                    request.registry.settings["aws.key_id"],
                    request.registry.settings["aws.secret_key"],
                    aws_region,
                    "es",
                )
            client = elasticsearch.Elasticsearch(**kwargs)
            number_of_replicas = request.registry.get("elasticsearch.replicas", 0)
            refresh_interval = request.registry.get("elasticsearch.interval", "1s")

            # We use a randomly named index so that we can do a zero downtime reindex.
            # Essentially we'll use a randomly named index which we will use until all
            # of the data has been reindexed, at which point we'll point an alias at
            # our randomly named index, and then delete the old randomly named index.

            # Create the new index and associate all of our doc types with it.
            index_base = request.registry["elasticsearch.index"]
            random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
            new_index_name = "{}-{}".format(index_base, random_token)
            doc_types = request.registry.get("search.doc_types", set())
            shards = request.registry.get("elasticsearch.shards", 1)

            # Create the new index with zero replicas and index refreshes disabled
            # while we are bulk indexing.
            new_index = get_index(
                new_index_name,
                doc_types,
                using=client,
                shards=shards,
                replicas=0,
                interval="-1",
            )
            new_index.create(wait_for_active_shards=shards)

            # From this point on, if any error occurs, we want to be able to delete our
            # in progress index.
            try:
                request.db.execute("SET statement_timeout = '600s'")

                for _ in parallel_bulk(
                    client, _project_docs(request.db), index=new_index_name
                ):
                    pass
            except:  # noqa
                new_index.delete()
                raise
            finally:
                request.db.rollback()
                request.db.close()

            # Now that we've finished indexing all of our data we can update the
            # replicas and refresh intervals.
            client.indices.put_settings(
                index=new_index_name,
                body={
                    "index": {
                        "number_of_replicas": number_of_replicas,
                        "refresh_interval": refresh_interval,
                    }
                },
            )

            # Point the alias at our new randomly named index and delete the old index.
            if client.indices.exists_alias(name=index_base):
                to_delete = set()
                actions = []
                for name in client.indices.get_alias(name=index_base):
                    to_delete.add(name)
                    actions.append({"remove": {"index": name, "alias": index_base}})
                actions.append({"add": {"index": new_index_name, "alias": index_base}})
                client.indices.update_aliases({"actions": actions})
                client.indices.delete(",".join(to_delete))
            else:
                client.indices.put_alias(name=index_base, index=new_index_name)
    except redis.exceptions.LockError as exc:
        raise self.retry(countdown=60, exc=exc)
Esempio n. 6
0
def reindex(request):
    """
    Recreate the Search Index.
    """
    p = urllib.parse.urlparse(request.registry.settings["elasticsearch.url"])
    client = elasticsearch.Elasticsearch(
        [urllib.parse.urlunparse(p[:2] + ("", ) * 4)],
        verify_certs=True,
        ca_certs=certifi.where(),
        timeout=30,
        retry_on_timeout=True,
        serializer=serializer.serializer,
    )
    number_of_replicas = request.registry.get("elasticsearch.replicas", 0)
    refresh_interval = request.registry.get("elasticsearch.interval", "1s")

    # We use a randomly named index so that we can do a zero downtime reindex.
    # Essentially we'll use a randomly named index which we will use until all
    # of the data has been reindexed, at which point we'll point an alias at
    # our randomly named index, and then delete the old randomly named index.

    # Create the new index and associate all of our doc types with it.
    index_base = request.registry["elasticsearch.index"]
    random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
    new_index_name = "{}-{}".format(index_base, random_token)
    doc_types = request.registry.get("search.doc_types", set())
    shards = request.registry.get("elasticsearch.shards", 1)

    # Create the new index with zero replicas and index refreshes disabled
    # while we are bulk indexing.
    new_index = get_index(
        new_index_name,
        doc_types,
        using=client,
        shards=shards,
        replicas=0,
        interval="-1",
    )
    new_index.create(wait_for_active_shards=shards)

    # From this point on, if any error occurs, we want to be able to delete our
    # in progress index.
    try:
        request.db.execute("SET statement_timeout = '600s'")

        for _ in parallel_bulk(client, _project_docs(request.db)):
            pass
    except:  # noqa
        new_index.delete()
        raise
    finally:
        request.db.rollback()
        request.db.close()

    # Now that we've finished indexing all of our data we can optimize it and
    # update the replicas and refresh intervals.
    client.indices.forcemerge(index=new_index_name)
    client.indices.put_settings(index=new_index_name,
                                body={
                                    "index": {
                                        "number_of_replicas":
                                        number_of_replicas,
                                        "refresh_interval": refresh_interval,
                                    }
                                })

    # Point the alias at our new randomly named index and delete the old index.
    if client.indices.exists_alias(name=index_base):
        to_delete = set()
        actions = []
        for name in client.indices.get_alias(name=index_base):
            to_delete.add(name)
            actions.append({"remove": {"index": name, "alias": index_base}})
        actions.append({"add": {"index": new_index_name, "alias": index_base}})
        client.indices.update_aliases({"actions": actions})
        client.indices.delete(",".join(to_delete))
    else:
        client.indices.put_alias(name=index_base, index=new_index_name)
Esempio n. 7
0
def reindex(self, request):
    """
    Recreate the Search Index.
    """
    r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"])
    try:
        with SearchLock(r, timeout=30 * 60, blocking_timeout=30):
            p = urllib.parse.urlparse(request.registry.settings["elasticsearch.url"])
            client = elasticsearch.Elasticsearch(
                [urllib.parse.urlunparse(p[:2] + ("",) * 4)],
                verify_certs=True,
                ca_certs=certifi.where(),
                timeout=30,
                retry_on_timeout=True,
                serializer=serializer.serializer,
            )
            number_of_replicas = request.registry.get("elasticsearch.replicas", 0)
            refresh_interval = request.registry.get("elasticsearch.interval", "1s")

            # We use a randomly named index so that we can do a zero downtime reindex.
            # Essentially we'll use a randomly named index which we will use until all
            # of the data has been reindexed, at which point we'll point an alias at
            # our randomly named index, and then delete the old randomly named index.

            # Create the new index and associate all of our doc types with it.
            index_base = request.registry["elasticsearch.index"]
            random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
            new_index_name = "{}-{}".format(index_base, random_token)
            doc_types = request.registry.get("search.doc_types", set())
            shards = request.registry.get("elasticsearch.shards", 1)

            # Create the new index with zero replicas and index refreshes disabled
            # while we are bulk indexing.
            new_index = get_index(
                new_index_name,
                doc_types,
                using=client,
                shards=shards,
                replicas=0,
                interval="-1",
            )
            new_index.create(wait_for_active_shards=shards)

            # From this point on, if any error occurs, we want to be able to delete our
            # in progress index.
            try:
                request.db.execute("SET statement_timeout = '600s'")

                for _ in parallel_bulk(
                    client, _project_docs(request.db), index=new_index_name
                ):
                    pass
            except:  # noqa
                new_index.delete()
                raise
            finally:
                request.db.rollback()
                request.db.close()

            # Now that we've finished indexing all of our data we can update the
            # replicas and refresh intervals.
            client.indices.put_settings(
                index=new_index_name,
                body={
                    "index": {
                        "number_of_replicas": number_of_replicas,
                        "refresh_interval": refresh_interval,
                    }
                },
            )

            # Point the alias at our new randomly named index and delete the old index.
            if client.indices.exists_alias(name=index_base):
                to_delete = set()
                actions = []
                for name in client.indices.get_alias(name=index_base):
                    to_delete.add(name)
                    actions.append({"remove": {"index": name, "alias": index_base}})
                actions.append({"add": {"index": new_index_name, "alias": index_base}})
                client.indices.update_aliases({"actions": actions})
                client.indices.delete(",".join(to_delete))
            else:
                client.indices.put_alias(name=index_base, index=new_index_name)
    except redis.exceptions.LockError as exc:
        raise self.retry(countdown=60, exc=exc)