def index_collection(collection): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'summary': collection.summary, 'category': collection.category, 'countries': collection.countries, 'languages': collection.languages, 'managed': collection.managed, 'roles': collection.roles } if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } data.update(get_collection_stats(collection.id)) es.index(index=collection_index(), doc_type=collection_type(), id=collection.id, body=data)
def upgrade_search(): """Add any missing properties to the index mappings.""" INDEXES = [ (collection_index(), COLLECTION_MAPPING), (entity_index(), ENTITY_MAPPING), (record_index(), RECORD_MAPPING), ] for (index, mapping) in INDEXES: log.info("Creating index: %s", index) es.indices.create(index, ignore=[404, 400]) es.indices.put_mapping(index=index, doc_type='doc', body=mapping) es.indices.open(index=index, ignore=[400, 404]) es.indices.refresh(index=index)
def setUp(self): if not hasattr(TestCase, '_global_test_state'): TestCase._global_test_state = True delete_index() upgrade_search() else: indexes = [collection_index(), entity_index(), record_index()] es.delete_by_query(index=indexes, body={'query': { 'match_all': {} }}, refresh=True, conflicts='proceed') destroy_db() db.create_all() create_system_roles()
def all_indexes(): return [collection_index(), entity_index(), record_index()]
def index_collection(collection): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'summary': collection.summary, 'category': collection.category, 'countries': collection.countries, 'languages': collection.languages, 'managed': collection.managed, 'roles': collection.roles, 'schemata': {}, } texts = [ collection.label, collection.foreign_id, collection.summary, collection.category ] if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } texts.append(collection.creator.name) # Compute some statistics on the content of a collection. query = { 'size': 0, 'query': { 'bool': { 'filter': [{ 'term': { 'collection_id': collection.id } }, { 'term': { 'schemata': Entity.THING } }] } }, 'aggs': { 'schema': { 'terms': { 'field': 'schema', 'size': 1000 } }, 'countries': { 'terms': { 'field': 'countries', 'size': 500 } }, 'languages': { 'terms': { 'field': 'languages', 'size': 100 } }, } } result = es.search(index=entities_index(), body=query) aggregations = result.get('aggregations') data['count'] = result['hits']['total'] # expose entities by schema count. for schema in aggregations['schema']['buckets']: data['schemata'][schema['key']] = schema['doc_count'] # if no countries or langs are given, take the most common from the data. if not data.get('countries'): countries = aggregations['countries']['buckets'] data['countries'] = [c['key'] for c in countries] if not data.get('languages'): countries = aggregations['languages']['buckets'] data['languages'] = [c['key'] for c in countries] texts.extend([match_form(t) for t in texts]) data['text'] = index_form(texts) es.index(index=collection_index(), doc_type='doc', id=collection.id, body=data)
def flush_index(): """Run a refresh to apply all indexing changes.""" es.indices.refresh(index=collection_index()) es.indices.refresh(index=entity_index()) es.indices.refresh(index=record_index())
def delete_index(): es.indices.delete(collection_index(), ignore=[404, 400]) es.indices.delete(entity_index(), ignore=[404, 400]) es.indices.delete(record_index(), ignore=[404, 400])
def index_collection(collection): """Index a collection.""" if collection.deleted_at is not None: return delete_collection(collection.id) data = { 'foreign_id': collection.foreign_id, 'created_at': collection.created_at, 'updated_at': collection.updated_at, 'label': collection.label, 'kind': collection.kind, 'summary': collection.summary, 'category': collection.category, 'publisher': collection.publisher, 'publisher_url': collection.publisher_url, 'info_url': collection.info_url, 'data_url': collection.data_url, 'casefile': collection.casefile, 'roles': collection.roles, 'schemata': {}, } texts = [v for v in data.values() if isinstance(v, six.string_types)] if collection.creator is not None: data['creator'] = { 'id': collection.creator.id, 'type': collection.creator.type, 'name': collection.creator.name } texts.append(collection.creator.name) # Compute some statistics on the content of a collection. query = { 'size': 0, 'query': { 'bool': { 'filter': [{ 'term': { 'collection_id': collection.id } }, { 'term': { 'schemata': Entity.THING } }] } }, 'aggs': { 'schema': { 'terms': { 'field': 'schema', 'size': 1000 } }, 'countries': { 'terms': { 'field': 'countries', 'size': 500 } }, 'languages': { 'terms': { 'field': 'languages', 'size': 100 } }, } } result = es.search(index=entities_index(), body=query) aggregations = result.get('aggregations') data['count'] = result['hits']['total'] # expose entities by schema count. for schema in aggregations['schema']['buckets']: data['schemata'][schema['key']] = schema['doc_count'] # if no countries or langs are given, take the most common from the data. countries = collection.countries if countries is None or not len(countries): countries = aggregations['countries']['buckets'] countries = [c['key'] for c in countries] data['countries'] = exactitude.countries.normalize_set(countries) languages = collection.languages if languages is None or not len(languages): languages = aggregations['languages']['buckets'] languages = [c['key'] for c in languages] data['languages'] = exactitude.languages.normalize_set(languages) texts.extend([normalize(t, ascii=True) for t in texts]) data['text'] = index_form(texts) return index_doc(collection_index(), collection.id, data)
def clear_index(): indexes = [collection_index(), entity_index(), record_index()] q = {'query': {'match_all': {}}} es.delete_by_query(index=indexes, body=q, refresh=True)