def delete_collection(collection_id, sync=False): """Delete all documents from a particular collection.""" es.delete(collections_index(), doc_type='doc', id=str(collection_id), refresh=sync, ignore=[404])
def get_collection(collection_id): """Fetch a collection from the index.""" result = es.get(index=collections_index(), doc_type='doc', id=collection_id, ignore=[404], _source_exclude=['text']) return unpack_result(result)
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = get_collection(collection.id) data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) log.info("Index [%s]: %s", collection.id, collection.label) data = get_collection(collection.id) text = [data.get('label')] text.append(normalize(data.get('label'))) text.append(normalize(data.get('foreign_id'))) text.append(normalize(data.get('summary'))) data['text'] = text data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'kind': collection.kind, 'summary': collection.summary, 'category': Collection.DEFAULT, 'publisher': collection.publisher, 'publisher_url': collection.publisher_url, 'info_url': collection.info_url, 'data_url': collection.data_url, 'casefile': collection.casefile, 'secret': collection.secret, 'collection_id': collection.id, 'schemata': {}, 'team': [] } texts = [v for v in data.values() if isinstance(v, str)] if collection.category in Collection.CATEGORIES: data['category'] = collection.category if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } texts.append(collection.creator.name) for role in collection.team: data['team'].append({ 'id': role.id, 'type': role.type, 'name': role.name }) texts.append(role.name) stats = get_collection_stats(collection.id) data['count'] = stats['count'] # expose entities by schema count. thing = model.get(Entity.THING) for schema, count in stats['schemata'].items(): schema = model.get(schema) if schema is not None and schema.is_a(thing): data['schemata'][schema.name] = count # if no countries or langs are given, take the most common from the data. countries = ensure_list(collection.countries) countries = countries or stats['countries'].keys() data['countries'] = registry.country.normalize_set(countries) languages = ensure_list(collection.languages) languages = languages or stats['languages'].keys() data['languages'] = registry.language.normalize_set(languages) texts.extend([normalize(t, ascii=True) for t in texts]) data['text'] = index_form(texts) return index_safe(collections_index(), collection.id, data, refresh=sync)
def get_index(self): return collections_index()
def all_indexes(): return ','.join((collections_index(), entities_read_index()))
def delete_collection(collection_id, sync=False): """Delete all documents from a particular collection.""" es.delete(collections_index(), id=str(collection_id), refresh=refresh_sync(sync), ignore=[404])