def _index_program_enrolled_users_chunk(program_enrollments): """ Add/update a list of ProgramEnrollment records in Elasticsearch Args: program_enrollments (list of ProgramEnrollments): List of ProgramEnrollments to serialize and index Returns: int: Number of items inserted into Elasticsearch """ conn = get_conn() insert_count, errors = bulk( conn, (serialize_program_enrolled_user(program_enrollment) for program_enrollment in program_enrollments), index=settings.ELASTICSEARCH_INDEX, doc_type=USER_DOC_TYPE, ) if len(errors) > 0: raise ReindexException( "Error during bulk insert: {errors}".format(errors=errors)) refresh_index() return insert_count
def index_items(documents, object_type, **kwargs): """ Index items based on list of item ids Args: documents (iterable of dict): An iterable with ElasticSearch documents to index object_type (str): the ES object type """ conn = get_conn() # bulk will also break an iterable into chunks. However we should do this here so that # we can use the same documents when indexing to multiple aliases. for chunk in chunks(documents, chunk_size=settings.ELASTICSEARCH_INDEXING_CHUNK_SIZE): for alias in get_active_aliases(conn, [object_type]): _, errors = bulk( conn, chunk, index=alias, doc_type=GLOBAL_DOC_TYPE, chunk_size=settings.ELASTICSEARCH_INDEXING_CHUNK_SIZE, **kwargs, ) if len(errors) > 0: raise ReindexException( f"Error during bulk {object_type} insert: {errors}")
def _index_chunk(chunk, *, index): """ Add/update a list of records in Elasticsearch Args: chunk (list): List of serialized items to index index (str): An Elasticsearch index Returns: int: Number of items inserted into Elasticsearch """ conn = get_conn(verify_indices=[index]) insert_count, errors = bulk( conn, chunk, index=index, doc_type=GLOBAL_DOC_TYPE, ) if len(errors) > 0: raise ReindexException( "Error during bulk insert: {errors}".format(errors=errors)) refresh_index(index) return insert_count
def get_conn(verify=True): """ Lazily create the connection. """ # pylint: disable=global-statement # This is ugly. Any suggestions on a way that doesn't require "global"? global _CONN global _CONN_VERIFIED do_verify = False if _CONN is None: _CONN = connections.create_connection(hosts=[URL]) # Verify connection on first connect if verify=True. do_verify = verify if verify and not _CONN_VERIFIED: # If we have a connection but haven't verified before, do it now. do_verify = True if not do_verify: if not verify: # We only skip verification if we're reindexing or # deleting the index. Make sure we verify next time we connect. _CONN_VERIFIED = False return _CONN # Make sure everything exists. if not _CONN.indices.exists(INDEX_NAME): raise ReindexException( "Unable to find index {index_name}".format(index_name=INDEX_NAME)) mapping = _CONN.indices.get_mapping() if INDEX_NAME not in mapping: raise ReindexException( "No mappings found in index {index_name}".format( index_name=INDEX_NAME)) mappings = _CONN.indices.get_mapping()[INDEX_NAME]["mappings"] if DOC_TYPE not in mappings.keys(): raise ReindexException( "Mapping {doc_type} not found".format(doc_type=DOC_TYPE)) _CONN_VERIFIED = True return _CONN
def get_conn(verify=True): """ Lazily create the connection. """ # pylint: disable=global-statement global _CONN global _CONN_VERIFIED do_verify = False if _CONN is None: headers = None if settings.ELASTICSEARCH_X_API_KEY is not None: headers = {'X-Api-Key': settings.ELASTICSEARCH_X_API_KEY} _CONN = connections.create_connection( hosts=[settings.ELASTICSEARCH_URL], headers=headers) # Verify connection on first connect if verify=True. do_verify = verify if verify and not _CONN_VERIFIED: # If we have a connection but haven't verified before, do it now. do_verify = True if not do_verify: if not verify: # We only skip verification if we're reindexing or # deleting the index. Make sure we verify next time we connect. _CONN_VERIFIED = False return _CONN # Make sure everything exists. index_name = settings.ELASTICSEARCH_INDEX if not _CONN.indices.exists(index_name): raise ReindexException( "Unable to find index {index_name}".format(index_name=index_name)) mappings = _CONN.indices.get_mapping()[index_name]["mappings"] for doc_type in DOC_TYPES: if doc_type not in mappings.keys(): raise ReindexException( "Mapping {doc_type} not found".format(doc_type=doc_type)) _CONN_VERIFIED = True return _CONN
def get_conn(*, verify=True, verify_indices=None): """ Lazily create the connection. Args: verify (bool): If true, check the presence of indices and mappings verify_indices (list of str): If set, check the presence of these indices. Else use the defaults. Returns: elasticsearch.client.Elasticsearch: An Elasticsearch client """ # pylint: disable=global-statement global _CONN global _CONN_VERIFIED do_verify = False if _CONN is None: http_auth = settings.ELASTICSEARCH_HTTP_AUTH use_ssl = http_auth is not None _CONN = connections.create_connection( hosts=[settings.ELASTICSEARCH_URL], http_auth=http_auth, use_ssl=use_ssl, # make sure we verify SSL certificates (off by default) verify_certs=use_ssl) # Verify connection on first connect if verify=True. do_verify = verify if verify and not _CONN_VERIFIED: # If we have a connection but haven't verified before, do it now. do_verify = True if not do_verify: if not verify: # We only skip verification if we're reindexing or # deleting the index. Make sure we verify next time we connect. _CONN_VERIFIED = False return _CONN # Make sure everything exists. if verify_indices is None: verify_indices = set() for index_type in ALL_INDEX_TYPES: verify_indices = verify_indices.union(get_aliases(index_type)) for verify_index in verify_indices: if not _CONN.indices.exists(verify_index): raise ReindexException("Unable to find index {index_name}".format( index_name=verify_index)) _CONN_VERIFIED = True return _CONN
def finish_recreate_index(results, backing_indices): """ Swap reindex backing index with default backing index Args: results (list or bool): Results saying whether the error exists backing_indices (dict): The backing elasticsearch indices keyed by object type """ errors = merge_strings(results) if errors: raise ReindexException( f"Errors occurred during recreate_index: {errors}") log.info( "Done with temporary index. Pointing default aliases to newly created backing indexes..." ) for obj_type, backing_index in backing_indices.items(): api.switch_indices(backing_index, obj_type) log.info("recreate_index has finished successfully!")
def _index_resource_chunk(resource_ids): """Add/update records in Elasticsearch.""" # Terms assigned to the resources. term_info = get_resource_terms(resource_ids) ensure_vocabulary_mappings(term_info) # Perform bulk insert using Elasticsearch directly. conn = get_conn() resources = LearningResource.objects.filter(id__in=resource_ids).iterator() insert_count, errors = bulk( conn, (resource_to_dict(x, term_info[x.id]) for x in resources), index=INDEX_NAME, doc_type=DOC_TYPE, ) if errors != []: raise ReindexException( "Error during bulk insert: {errors}".format(errors=errors)) refresh_index() return insert_count