def upgrade_search(): """Add any missing properties to the index mappings.""" INDEXES = [ (collections_index(), COLLECTION_MAPPING), (entity_index(), ENTITY_MAPPING), (record_index(), RECORD_MAPPING), ] for (index, mapping) in INDEXES: log.info("Creating index: %s", index) settings = deepcopy(INDEX_SETTINGS) if index == record_index(): # optimise records for bulk write settings['index']['refresh_interval'] = '-1' es.indices.create(index, body=settings, ignore=[404, 400]) es.indices.put_mapping(index=index, doc_type='doc', body=mapping) es.indices.open(index=index, ignore=[400, 404]) es.indices.refresh(index=index, ignore=[400, 404]) es.indices.clear_cache(index=index, ignore=[400, 404])
def upgrade_search(): """Add any missing properties to the index mappings.""" INDEXES = [ (collection_index(), COLLECTION_MAPPING), (entity_index(), ENTITY_MAPPING), (record_index(), RECORD_MAPPING), ] for (index, mapping) in INDEXES: log.info("Creating index: %s", index) es.indices.create(index, ignore=[404, 400]) es.indices.put_mapping(index=index, doc_type='doc', body=mapping) es.indices.open(index=index, ignore=[400, 404]) es.indices.refresh(index=index)
def setUp(self): if not hasattr(TestCase, '_global_test_state'): TestCase._global_test_state = True delete_index() upgrade_search() else: indexes = [collection_index(), entity_index(), record_index()] es.delete_by_query(index=indexes, body={'query': { 'match_all': {} }}, refresh=True, conflicts='proceed') destroy_db() db.create_all() create_system_roles()
def generate_records(document): """Generate index records, based on document rows or pages.""" q = db.session.query(DocumentRecord) q = q.filter(DocumentRecord.document_id == document.id) for record in q: yield { '_id': record.id, '_index': record_index(), '_type': record_type(), '_source': { 'document_id': document.id, 'collection_id': document.collection_id, 'index': record.index, 'sheet': record.sheet, 'text': index_form(record.texts) } }
def generate_records(document): """Generate index records, based on document rows or pages.""" q = db.session.query(DocumentRecord) q = q.filter(DocumentRecord.document_id == document.id) for idx, record in enumerate(q): yield { '_id': record.id, '_index': record_index(), '_type': 'doc', '_source': { 'document_id': document.id, 'collection_id': document.collection_id, 'index': record.index, 'text': index_form(record.texts) } } if idx > 0 and idx % 1000 == 0: log.info("Indexed [%s]: %s records...", document.id, idx)
def all_indexes(): return [collection_index(), entity_index(), record_index()]
def flush_index(): """Run a refresh to apply all indexing changes.""" es.indices.refresh(index=collection_index()) es.indices.refresh(index=entity_index()) es.indices.refresh(index=record_index())
def delete_index(): es.indices.delete(collection_index(), ignore=[404, 400]) es.indices.delete(entity_index(), ignore=[404, 400]) es.indices.delete(record_index(), ignore=[404, 400])
def clear_index(): indexes = [collection_index(), entity_index(), record_index()] q = {'query': {'match_all': {}}} es.delete_by_query(index=indexes, body=q, refresh=True)