def delete_collection(collection_id, sync=False): """Delete all documents from a particular collection.""" es.delete(collections_index(), doc_type='doc', id=str(collection_id), refresh=refresh_sync(sync), ignore=[404])
def delete_entity(entity_id, exclude=None, sync=False): """Delete an entity from the index.""" query = {'query': {'ids': {'values': str(entity_id)}}} es.delete_by_query(index=entities_read_index(exclude=exclude), body=query, wait_for_completion=sync, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = get_collection(collection.id) if data is None: return log.info( "[%s] Index: %s (%s things)...", collection, data.get("label"), data.get("count"), ) text = [data.get("label")] text.append(normalize(data.get("label"))) text.append(normalize(data.get("foreign_id"))) text.append(normalize(data.get("summary"))) data["text"] = text data.pop("id", None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = get_collection(collection.id) data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def delete_entity(entity_id, exclude=None, sync=False): """Delete an entity from the index.""" if exclude is not None: exclude = entities_write_index(exclude) for entity in entities_by_ids(entity_id, excludes='*'): index = entity.get('_index') if index == exclude: continue es.delete(index=index, id=entity_id, refresh=refresh_sync(sync))
def index_entity(entity, sync=False): """Index an entity.""" if entity.deleted_at is not None: return delete_entity(entity.id) entity_id, index, data = index_operation(entity.to_dict()) refresh = refresh_sync(sync) # This is required if an entity changes its type: # delete_entity(entity_id, exclude=proxy.schema, sync=False) return index_safe(index, entity_id, data, refresh=refresh)
def index_single(obj, proxy, data, texts, sync=False): """Indexing aspects common to entities and documents.""" data = finalize_index(proxy, data, texts) data['bulk'] = False data['collection_id'] = obj.collection_id data['created_at'] = obj.created_at data['updated_at'] = obj.updated_at # pprint(data) index = entities_write_index(proxy.schema) refresh = refresh_sync(sync) if settings.ENTITIES_INDEX_SPLIT: delete_entity(obj.id, exclude=proxy.schema, sync=False) return index_safe(index, obj.id, data, refresh=refresh)
def index_notification(event, actor_id, params, channels, sync=False): """Index a notification.""" params = params or {} params = {n: get_entity_id(params.get(n)) for n in event.params.keys()} channels = list(set([c for c in channels if c is not None])) data = { 'actor_id': actor_id, 'params': params, 'event': event.name, 'channels': channels, 'created_at': datetime.utcnow(), } index = notifications_index() id_ = hash_data((actor_id, event.name, channels, params)) return index_safe(index, id_, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) log.info("Index [%s]: %s", collection.id, collection.label) data = get_collection(collection.id) text = [data.get('label')] text.append(normalize(data.get('label'))) text.append(normalize(data.get('foreign_id'))) text.append(normalize(data.get('summary'))) data['text'] = text data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_bulk(collection_id, entities): """Index a set of entities.""" lock = cache.lock(cache.key('index_bulk')) lock.acquire(blocking=True) try: actions = _index_updates(collection_id, entities) chunk_size = len(actions) + 1 return bulk(es, actions, chunk_size=chunk_size, max_retries=10, initial_backoff=2, request_timeout=REQUEST_TIMEOUT, timeout=TIMEOUT, refresh=refresh_sync(True)) except BulkIndexError as exc: log.warning('Indexing error: %s', exc) finally: try: lock.release() except Exception: log.exception("Cannot release index lock.")
def delete_entity(entity_id, exclude=None, sync=False): """Delete an entity from the index.""" if exclude is not None: exclude = entities_write_index(exclude) for entity in entities_by_ids(entity_id, excludes='*'): index = entity.get('_index') if index == exclude: continue try: es.delete(index=index, id=entity_id, refresh=refresh_sync(sync)) q = {'term': {'entities': entity_id}} query_delete(entities_read_index(), q, sync=sync) except NotFoundError: # This is expected in some cases. For example, when 2 Things are # connected by an Interval and all the 3 entities get deleted # simultaneously, Aleph tries to delete the Interval thrice due to # recursive deletion of adjacent entities. ElasticSearch throws a # 404 in that case. # In those cases, we want to skip both the `es.delete` step and # the `query_delete` step. log.warning("Delete failed for entity %s - not found", entity_id) continue
def delete_collection(collection_id, sync=False): """Delete all documents from a particular collection.""" es.delete(collections_index(), id=str(collection_id), refresh=refresh_sync(sync), ignore=[404])