def delete_index(): """ Deletes the ElasticSearch indices. """ for indexer in _get_registered(): es = get_elasticsearch(indexer) es.indices.delete(index=indexer.index, ignore=404)
def _delete_items(indexer, pks): es = get_elasticsearch(indexer) def mapper(pk): return { "_index": indexer.index, "_id": pk, "_op_type": "delete", } bulk(es, map(mapper, pks), raise_on_error=False)
def update_index(remove=True): """ Performs a full update of the ElasticSearch index. """ logger.info('Updating index') create_index() for indexer in _get_registered(): es = get_elasticsearch(indexer) bulk(es, indexer.iterate_items(remove=remove))
def patch_index(updates): """ Performs a partial update of the ElasticSearch index. """ updates_str = ['%s: %d items' % (k, len(v)) for k, v in updates.items()] logger.info('Patching index (%s)' % ', '.join(updates_str)) indexers = _get_registered() for doc_type, pks in updates.items(): for indexer in indexers: if indexer.doc_type == doc_type: es = get_elasticsearch(indexer) bulk(es, indexer.partial_items(pks))
def _create_index(indexer): body = { "mappings": {}, "settings": getattr(settings, "REST_SEARCH_INDEX_SETTINGS", DEFAULT_INDEX_SETTINGS), } if indexer.mappings is not None: body["mappings"] = indexer.mappings es = get_elasticsearch(indexer) if not es.indices.exists(indexer.index): logger.info("Creating index %s" % indexer.index) es.indices.create(index=indexer.index, body=body)
def create_index(): """ Creates the ElasticSearch index if it does not exist. """ conns = {} for indexer in _get_registered(): es = get_elasticsearch(indexer) if es not in conns: conns[es] = {'mappings': {}, 'settings': es._settings} if indexer.mappings is not None: mappings = conns[es]['mappings'] mappings[indexer.doc_type] = indexer.mappings for es, body in conns.items(): if not es.indices.exists(es._index): es.indices.create(index=es._index, body=body)
def _index_items(indexer, pks): es = get_elasticsearch(indexer) seen_pks = set() def bulk_mapper(block_size=1000): for i in range(0, len(pks), block_size): chunk = pks[i:i + block_size] qs = indexer.get_queryset().filter(pk__in=chunk) data = indexer.serializer_class(qs, many=True).data for item in data: seen_pks.add(item["id"]) yield { "_index": indexer.index, "_id": item["id"], "_source": item, } bulk(es, bulk_mapper()) return seen_pks
def partial_items(self, pks): """ Generates items to perform a partial update of the index. pks is a list of primary keys of items which have changed or been deleted. """ es = get_elasticsearch(self) index = es._index queryset = self.get_queryset().filter(pk__in=pks) # index current items removed = set(pks) for item in bulk_iterate(queryset): removed.discard(item.pk) yield self.__add_item(item, index=index) # remove obsolete items for pk in removed: yield self.__remove_item(pk, index=index)
def iterate_items(self, remove=True): """ Generates items to perform a full resync of the index. """ es = get_elasticsearch(self) index = es._index queryset = self.get_queryset() # index current items ids = set() for item in bulk_iterate(queryset): ids.add(item.pk) yield self.__add_item(item, index=index) # remove obsolete items if remove: for i in scan(es, index=index, doc_type=self.doc_type, fields=[]): pk = int(i['_id']) if pk not in ids: yield self.__remove_item(pk, index=index)
def search(self, **kwargs): es = get_elasticsearch(self) return es.search(index=es._index, doc_type=self.doc_type, **kwargs)
def scan(self, **kwargs): es = get_elasticsearch(self) return scan(es, index=es._index, doc_type=self.doc_type, **kwargs)
def search(self, **kwargs): es = get_elasticsearch(self) return es.search(index=self.index, **kwargs)
def scan(self, **kwargs): es = get_elasticsearch(self) return scan(es, index=self.index, **kwargs)