Example #1
0
def reindex(config, **kwargs):
    """
    Recreate the Search Index.
    """
    client = config.registry["elasticsearch.client"]
    db = Session(bind=config.registry["sqlalchemy.engine"])

    # We use a randomly named index so that we can do a zero downtime reindex.
    # Essentially we'll use a randomly named index which we will use until all
    # of the data has been reindexed, at which point we'll point an alias at
    # our randomly named index, and then delete the old randomly named index.

    # Create the new index and associate all of our doc types with it.
    index_base = config.registry["elasticsearch.index"]
    random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
    new_index_name = "{}-{}".format(index_base, random_token)
    doc_types = config.registry.get("search.doc_types", set())
    new_index = get_index(
        new_index_name,
        doc_types,
        using=client,
        shards=config.registry.get("elasticsearch.shards", 1),
        replicas=config.registry.get("elasticsearch.replicas", 1),
    )
    new_index.create()

    # From this point on, if any error occurs, we want to be able to delete our
    # in progress index.
    try:
        db.execute(
            """ BEGIN TRANSACTION
                ISOLATION LEVEL SERIALIZABLE
                READ ONLY
                DEFERRABLE
            """
        )
        db.execute("SET statement_timeout = '600s'")

        bulk(client, _project_docs(db))
    except:
        new_index.delete()
        raise
    finally:
        db.rollback()
        db.close()

    # Now that we've finished indexing all of our data, we'll point the alias
    # at our new randomly named index and delete the old index.
    if client.indices.exists_alias(name=index_base):
        to_delete = set()
        actions = []
        for name in client.indices.get_alias(name=index_base):
            to_delete.add(name)
            actions.append({"remove": {"index": name, "alias": index_base}})
        actions.append({"add": {"index": new_index_name, "alias": index_base}})
        client.indices.update_aliases({"actions": actions})
        client.indices.delete(",".join(to_delete))
    else:
        client.indices.put_alias(name=index_base, index=new_index_name)
Example #2
0
def populate_db(config):
    """
    Sync the Warehouse database with initial sponsors list.
    Once this command is executed once, you shouldn't need to run
    it again.
    """
    # Imported here because we don't want to trigger an import from anything
    # but warehouse.cli at the module scope.
    from warehouse.db import Session

    session = Session(bind=config.registry["sqlalchemy.engine"])

    for data in SPONSORS_DICTS:
        name = data["name"]
        sponsor = session.query(Sponsor).filter_by(name=name).one_or_none()
        if sponsor:
            print(f"Skipping {name} sponsor because it already exists.")
            continue

        params = data.copy()
        img = params.pop("image")
        params["is_active"] = True
        params["link_url"] = params.pop("url")
        params["activity_markdown"] = "\n\n".join(params.pop("activity",
                                                             [])).strip()
        params["color_logo_url"] = BLACK_BASE_URL + img
        if params["footer"] or params["infra_sponsor"]:
            params["white_logo_url"] = WHITE_BASE_URL + img

        sponsor = Sponsor(**params)
        try:
            session.add(sponsor)
            session.commit()
            print(f"{name} sponsor created with success.")
        except Exception as e:
            session.rollback()
            print(f"Error while creating {name} sponsor:")
            print(f"\t{e}")
Example #3
0
def shell(config, type_):
    """
    Open up a Python shell with Warehouse preconfigured in it.
    """

    # Imported here because we don't want to trigger an import from anything
    # but warehouse.cli at the module scope.
    from warehouse.db import Session

    if type_ is None:
        type_ = autodetect()

    runner = {"bpython": bpython, "ipython": ipython, "plain": plain}[type_]

    session = Session(bind=config.registry["sqlalchemy.engine"])

    try:
        runner(config=config, db=session)
    except ImportError:
        raise click.ClickException(
            "The {!r} shell is not available.".format(type_)) from None
Example #4
0
def reindex(config, **kwargs):
    """
    Recreate the Search Index.
    """
    client = config.registry["elasticsearch.client"]
    db = Session(bind=config.registry["sqlalchemy.engine"])
    number_of_replicas = config.registry.get("elasticsearch.replicas", 0)
    refresh_interval = config.registry.get("elasticsearch.interval", "1s")

    # We use a randomly named index so that we can do a zero downtime reindex.
    # Essentially we'll use a randomly named index which we will use until all
    # of the data has been reindexed, at which point we'll point an alias at
    # our randomly named index, and then delete the old randomly named index.

    # Create the new index and associate all of our doc types with it.
    index_base = config.registry["elasticsearch.index"]
    random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
    new_index_name = "{}-{}".format(index_base, random_token)
    doc_types = config.registry.get("search.doc_types", set())

    # Create the new index with zero replicas and index refreshes disabled
    # while we are bulk indexing.
    new_index = get_index(
        new_index_name,
        doc_types,
        using=client,
        shards=config.registry.get("elasticsearch.shards", 1),
        replicas=0,
        interval="-1",
    )

    # From this point on, if any error occurs, we want to be able to delete our
    # in progress index.
    try:
        db.execute("SET statement_timeout = '600s'")

        for _ in parallel_bulk(client, _project_docs(db)):
            pass
    except:
        new_index.delete()
        raise
    finally:
        db.rollback()
        db.close()

    # Now that we've finished indexing all of our data we can optimize it and
    # update the replicas and refresh intervals.
    client.indices.forcemerge(index=new_index_name)
    client.indices.put_settings(index=new_index_name,
                                body={
                                    "index": {
                                        "number_of_replicas":
                                        number_of_replicas,
                                        "refresh_interval": refresh_interval,
                                    }
                                })

    # Point the alias at our new randomly named index and delete the old index.
    if client.indices.exists_alias(name=index_base):
        to_delete = set()
        actions = []
        for name in client.indices.get_alias(name=index_base):
            to_delete.add(name)
            actions.append({"remove": {"index": name, "alias": index_base}})
        actions.append({"add": {"index": new_index_name, "alias": index_base}})
        client.indices.update_aliases({"actions": actions})
        client.indices.delete(",".join(to_delete))
    else:
        client.indices.put_alias(name=index_base, index=new_index_name)
Example #5
0
def reindex(config, **kwargs):
    """
    Recreate the Search Index.
    """
    client = config.registry["elasticsearch.client"]
    db = Session(bind=config.registry["sqlalchemy.engine"])
    number_of_replicas = config.registry.get("elasticsearch.replicas", 0)
    refresh_interval = config.registry.get("elasticsearch.interval", "1s")

    # We use a randomly named index so that we can do a zero downtime reindex.
    # Essentially we'll use a randomly named index which we will use until all
    # of the data has been reindexed, at which point we'll point an alias at
    # our randomly named index, and then delete the old randomly named index.

    # Create the new index and associate all of our doc types with it.
    index_base = config.registry["elasticsearch.index"]
    random_token = binascii.hexlify(os.urandom(5)).decode("ascii")
    new_index_name = "{}-{}".format(index_base, random_token)
    doc_types = config.registry.get("search.doc_types", set())
    shards = config.registry.get("elasticsearch.shards", 1)

    # Create the new index with zero replicas and index refreshes disabled
    # while we are bulk indexing.
    new_index = get_index(
        new_index_name,
        doc_types,
        using=client,
        shards=shards,
        replicas=0,
        interval="-1",
    )
    new_index.create(wait_for_active_shards=shards)

    # From this point on, if any error occurs, we want to be able to delete our
    # in progress index.
    try:
        db.execute("SET statement_timeout = '600s'")

        for _ in parallel_bulk(client, _project_docs(db)):
            pass
    except:  # noqa
        new_index.delete()
        raise
    finally:
        db.rollback()
        db.close()

    # Now that we've finished indexing all of our data we can optimize it and
    # update the replicas and refresh intervals.
    client.indices.forcemerge(index=new_index_name)
    client.indices.put_settings(
        index=new_index_name,
        body={
            "index": {
                "number_of_replicas": number_of_replicas,
                "refresh_interval": refresh_interval,
            }
        }
    )

    # Point the alias at our new randomly named index and delete the old index.
    if client.indices.exists_alias(name=index_base):
        to_delete = set()
        actions = []
        for name in client.indices.get_alias(name=index_base):
            to_delete.add(name)
            actions.append({"remove": {"index": name, "alias": index_base}})
        actions.append({"add": {"index": new_index_name, "alias": index_base}})
        client.indices.update_aliases({"actions": actions})
        client.indices.delete(",".join(to_delete))
    else:
        client.indices.put_alias(name=index_base, index=new_index_name)