Example #1
0
def _index_program_enrolled_users_chunk(program_enrollments):
    """
    Add/update a list of ProgramEnrollment records in Elasticsearch

    Args:
        program_enrollments (list of ProgramEnrollments):
            List of ProgramEnrollments to serialize and index

    Returns:
        int: Number of items inserted into Elasticsearch
    """

    conn = get_conn()
    insert_count, errors = bulk(
        conn,
        (serialize_program_enrolled_user(program_enrollment)
         for program_enrollment in program_enrollments),
        index=settings.ELASTICSEARCH_INDEX,
        doc_type=USER_DOC_TYPE,
    )
    if len(errors) > 0:
        raise ReindexException(
            "Error during bulk insert: {errors}".format(errors=errors))
    refresh_index()
    return insert_count
Example #2
0
def index_items(documents, object_type, **kwargs):
    """
    Index items based on list of item ids

    Args:
        documents (iterable of dict): An iterable with ElasticSearch documents to index
        object_type (str): the ES object type
    """
    conn = get_conn()
    # bulk will also break an iterable into chunks. However we should do this here so that
    # we can use the same documents when indexing to multiple aliases.
    for chunk in chunks(documents,
                        chunk_size=settings.ELASTICSEARCH_INDEXING_CHUNK_SIZE):
        for alias in get_active_aliases(conn, [object_type]):
            _, errors = bulk(
                conn,
                chunk,
                index=alias,
                doc_type=GLOBAL_DOC_TYPE,
                chunk_size=settings.ELASTICSEARCH_INDEXING_CHUNK_SIZE,
                **kwargs,
            )
            if len(errors) > 0:
                raise ReindexException(
                    f"Error during bulk {object_type} insert: {errors}")
def _index_chunk(chunk, *, index):
    """
    Add/update a list of records in Elasticsearch

    Args:
        chunk (list):
            List of serialized items to index
        index (str): An Elasticsearch index

    Returns:
        int: Number of items inserted into Elasticsearch
    """

    conn = get_conn(verify_indices=[index])
    insert_count, errors = bulk(
        conn,
        chunk,
        index=index,
        doc_type=GLOBAL_DOC_TYPE,
    )
    if len(errors) > 0:
        raise ReindexException(
            "Error during bulk insert: {errors}".format(errors=errors))

    refresh_index(index)
    return insert_count
Example #4
0
def get_conn(verify=True):
    """
    Lazily create the connection.
    """
    # pylint: disable=global-statement
    # This is ugly. Any suggestions on a way that doesn't require "global"?
    global _CONN
    global _CONN_VERIFIED

    do_verify = False
    if _CONN is None:
        _CONN = connections.create_connection(hosts=[URL])
        # Verify connection on first connect if verify=True.
        do_verify = verify

    if verify and not _CONN_VERIFIED:
        # If we have a connection but haven't verified before, do it now.
        do_verify = True

    if not do_verify:
        if not verify:
            # We only skip verification if we're reindexing or
            # deleting the index. Make sure we verify next time we connect.
            _CONN_VERIFIED = False
        return _CONN

    # Make sure everything exists.
    if not _CONN.indices.exists(INDEX_NAME):
        raise ReindexException(
            "Unable to find index {index_name}".format(index_name=INDEX_NAME))

    mapping = _CONN.indices.get_mapping()
    if INDEX_NAME not in mapping:
        raise ReindexException(
            "No mappings found in index {index_name}".format(
                index_name=INDEX_NAME))

    mappings = _CONN.indices.get_mapping()[INDEX_NAME]["mappings"]
    if DOC_TYPE not in mappings.keys():
        raise ReindexException(
            "Mapping {doc_type} not found".format(doc_type=DOC_TYPE))

    _CONN_VERIFIED = True
    return _CONN
Example #5
0
def get_conn(verify=True):
    """
    Lazily create the connection.
    """
    # pylint: disable=global-statement
    global _CONN
    global _CONN_VERIFIED

    do_verify = False
    if _CONN is None:
        headers = None
        if settings.ELASTICSEARCH_X_API_KEY is not None:
            headers = {'X-Api-Key': settings.ELASTICSEARCH_X_API_KEY}
        _CONN = connections.create_connection(
            hosts=[settings.ELASTICSEARCH_URL], headers=headers)
        # Verify connection on first connect if verify=True.
        do_verify = verify

    if verify and not _CONN_VERIFIED:
        # If we have a connection but haven't verified before, do it now.
        do_verify = True

    if not do_verify:
        if not verify:
            # We only skip verification if we're reindexing or
            # deleting the index. Make sure we verify next time we connect.
            _CONN_VERIFIED = False
        return _CONN

    # Make sure everything exists.
    index_name = settings.ELASTICSEARCH_INDEX
    if not _CONN.indices.exists(index_name):
        raise ReindexException(
            "Unable to find index {index_name}".format(index_name=index_name))

    mappings = _CONN.indices.get_mapping()[index_name]["mappings"]
    for doc_type in DOC_TYPES:
        if doc_type not in mappings.keys():
            raise ReindexException(
                "Mapping {doc_type} not found".format(doc_type=doc_type))

    _CONN_VERIFIED = True
    return _CONN
def get_conn(*, verify=True, verify_indices=None):
    """
    Lazily create the connection.

    Args:
        verify (bool): If true, check the presence of indices and mappings
        verify_indices (list of str): If set, check the presence of these indices. Else use the defaults.

    Returns:
        elasticsearch.client.Elasticsearch: An Elasticsearch client
    """
    # pylint: disable=global-statement
    global _CONN
    global _CONN_VERIFIED

    do_verify = False
    if _CONN is None:
        http_auth = settings.ELASTICSEARCH_HTTP_AUTH
        use_ssl = http_auth is not None
        _CONN = connections.create_connection(
            hosts=[settings.ELASTICSEARCH_URL],
            http_auth=http_auth,
            use_ssl=use_ssl,
            # make sure we verify SSL certificates (off by default)
            verify_certs=use_ssl)
        # Verify connection on first connect if verify=True.
        do_verify = verify

    if verify and not _CONN_VERIFIED:
        # If we have a connection but haven't verified before, do it now.
        do_verify = True

    if not do_verify:
        if not verify:
            # We only skip verification if we're reindexing or
            # deleting the index. Make sure we verify next time we connect.
            _CONN_VERIFIED = False
        return _CONN

    # Make sure everything exists.
    if verify_indices is None:
        verify_indices = set()
        for index_type in ALL_INDEX_TYPES:
            verify_indices = verify_indices.union(get_aliases(index_type))
    for verify_index in verify_indices:
        if not _CONN.indices.exists(verify_index):
            raise ReindexException("Unable to find index {index_name}".format(
                index_name=verify_index))

    _CONN_VERIFIED = True
    return _CONN
Example #7
0
def finish_recreate_index(results, backing_indices):
    """
    Swap reindex backing index with default backing index

    Args:
        results (list or bool): Results saying whether the error exists
        backing_indices (dict): The backing elasticsearch indices keyed by object type
    """
    errors = merge_strings(results)
    if errors:
        raise ReindexException(
            f"Errors occurred during recreate_index: {errors}")

    log.info(
        "Done with temporary index. Pointing default aliases to newly created backing indexes..."
    )
    for obj_type, backing_index in backing_indices.items():
        api.switch_indices(backing_index, obj_type)
    log.info("recreate_index has finished successfully!")
Example #8
0
def _index_resource_chunk(resource_ids):
    """Add/update records in Elasticsearch."""

    # Terms assigned to the resources.
    term_info = get_resource_terms(resource_ids)

    ensure_vocabulary_mappings(term_info)

    # Perform bulk insert using Elasticsearch directly.
    conn = get_conn()
    resources = LearningResource.objects.filter(id__in=resource_ids).iterator()
    insert_count, errors = bulk(
        conn,
        (resource_to_dict(x, term_info[x.id]) for x in resources),
        index=INDEX_NAME,
        doc_type=DOC_TYPE,
    )

    if errors != []:
        raise ReindexException(
            "Error during bulk insert: {errors}".format(errors=errors))
    refresh_index()

    return insert_count