def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = get_collection(collection.id) if data is None: return log.info( "[%s] Index: %s (%s things)...", collection, data.get("label"), data.get("count"), ) text = [data.get("label")] text.append(normalize(data.get("label"))) text.append(normalize(data.get("foreign_id"))) text.append(normalize(data.get("summary"))) data["text"] = text data.pop("id", None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = get_collection(collection.id) data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_single(obj, data, texts): """Indexing aspects common to entities and documents.""" data['bulk'] = False data['roles'] = obj.collection.roles data['collection_id'] = obj.collection.id data['created_at'] = obj.created_at data['updated_at'] = obj.updated_at data = finalize_index(data, obj.model, texts) data = clean_dict(data) return index_safe(entity_index(), obj.id, data)
def index_entity(entity, sync=False): """Index an entity.""" if entity.deleted_at is not None: return delete_entity(entity.id) entity_id, index, data = index_operation(entity.to_dict()) refresh = refresh_sync(sync) # This is required if an entity changes its type: # delete_entity(entity_id, exclude=proxy.schema, sync=False) return index_safe(index, entity_id, data, refresh=refresh)
def index_single(obj, proxy, data, texts, sync=False): """Indexing aspects common to entities and documents.""" data = finalize_index(proxy, data, texts) data['bulk'] = False data['collection_id'] = obj.collection.id data['created_at'] = obj.created_at data['updated_at'] = obj.updated_at # pprint(data) refresh = 'wait_for' if sync else False return index_safe(entity_index(), obj.id, data, refresh=refresh)
def index_single(obj, proxy, data, texts, sync=False): """Indexing aspects common to entities and documents.""" data = finalize_index(proxy, data, texts) data['bulk'] = False data['collection_id'] = obj.collection_id data['created_at'] = obj.created_at data['updated_at'] = obj.updated_at # pprint(data) index = entities_write_index(proxy.schema) refresh = refresh_sync(sync) if settings.ENTITIES_INDEX_SPLIT: delete_entity(obj.id, exclude=proxy.schema, sync=False) return index_safe(index, obj.id, data, refresh=refresh)
def index_notification(event, actor_id, params, channels, sync=False): """Index a notification.""" params = params or {} params = {n: get_entity_id(params.get(n)) for n in event.params.keys()} channels = list(set([c for c in channels if c is not None])) data = { 'actor_id': actor_id, 'params': params, 'event': event.name, 'channels': channels, 'created_at': datetime.utcnow(), } index = notifications_index() id_ = hash_data((actor_id, event.name, channels, params)) return index_safe(index, id_, data, refresh=refresh_sync(sync))
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) log.info("Index [%s]: %s", collection.id, collection.label) data = get_collection(collection.id) text = [data.get('label')] text.append(normalize(data.get('label'))) text.append(normalize(data.get('foreign_id'))) text.append(normalize(data.get('summary'))) data['text'] = text data.pop('id', None) return index_safe(collections_index(), collection.id, data, refresh=refresh_sync(sync))
def index_notification(event, actor_id, params, channels, sync=False): """Index a notification.""" params = params or {} data = {} for param, value in params.items(): value = get_entity_id(value) if value is not None: data[param] = str(value) channels = list(set([c for c in channels if c is not None])) data = { "actor_id": actor_id, "params": data, "event": event.name, "channels": channels, "created_at": datetime.utcnow(), } index = notifications_index() id_ = hash_data((actor_id, event.name, channels, params)) return index_safe(index, id_, data, sync=sync)
def index_collection(collection, sync=False): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'kind': collection.kind, 'summary': collection.summary, 'category': Collection.DEFAULT, 'publisher': collection.publisher, 'publisher_url': collection.publisher_url, 'info_url': collection.info_url, 'data_url': collection.data_url, 'casefile': collection.casefile, 'secret': collection.secret, 'collection_id': collection.id, 'schemata': {}, 'team': [] } texts = [v for v in data.values() if isinstance(v, str)] if collection.category in Collection.CATEGORIES: data['category'] = collection.category if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } texts.append(collection.creator.name) for role in collection.team: data['team'].append({ 'id': role.id, 'type': role.type, 'name': role.name }) texts.append(role.name) stats = get_collection_stats(collection.id) data['count'] = stats['count'] # expose entities by schema count. thing = model.get(Entity.THING) for schema, count in stats['schemata'].items(): schema = model.get(schema) if schema is not None and schema.is_a(thing): data['schemata'][schema.name] = count # if no countries or langs are given, take the most common from the data. countries = ensure_list(collection.countries) countries = countries or stats['countries'].keys() data['countries'] = registry.country.normalize_set(countries) languages = ensure_list(collection.languages) languages = languages or stats['languages'].keys() data['languages'] = registry.language.normalize_set(languages) texts.extend([normalize(t, ascii=True) for t in texts]) data['text'] = index_form(texts) return index_safe(collections_index(), collection.id, data, refresh=sync)
def index_collection(collection): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'kind': collection.kind, 'summary': collection.summary, 'category': Collection.DEFAULT, 'publisher': collection.publisher, 'publisher_url': collection.publisher_url, 'info_url': collection.info_url, 'data_url': collection.data_url, 'casefile': collection.casefile, 'roles': collection.roles, 'schemata': {}, 'team': [] } texts = [v for v in data.values() if isinstance(v, str)] if collection.category in Collection.CATEGORIES: data['category'] = collection.category if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } texts.append(collection.creator.name) for role in collection.team: data['team'].append({ 'id': role.id, 'type': role.type, 'name': role.name }) texts.append(role.name) # Compute some statistics on the content of a collection. query = { 'size': 0, 'query': { 'bool': { 'filter': [{ 'term': { 'collection_id': collection.id } }, { 'term': { 'schemata': Entity.THING } }] } }, 'aggs': { 'schema': { 'terms': { 'field': 'schema', 'size': 1000 } }, 'countries': { 'terms': { 'field': 'countries', 'size': 500 } }, 'languages': { 'terms': { 'field': 'languages', 'size': 100 } }, } } result = search_safe(index=entities_index(), body=query) aggregations = result.get('aggregations') data['count'] = result['hits']['total'] # expose entities by schema count. for schema in aggregations['schema']['buckets']: data['schemata'][schema['key']] = schema['doc_count'] # if no countries or langs are given, take the most common from the data. countries = collection.countries if countries is None or not len(countries): countries = aggregations['countries']['buckets'] countries = [c['key'] for c in countries] data['countries'] = exactitude.countries.normalize_set(countries) languages = collection.languages if languages is None or not len(languages): languages = aggregations['languages']['buckets'] languages = [c['key'] for c in languages] data['languages'] = exactitude.languages.normalize_set(languages) texts.extend([normalize(t, ascii=True) for t in texts]) data['text'] = index_form(texts) data = index_safe(collections_index(), collection.id, data) refresh_index(index=collections_index()) return data