def fill_index(session, batch_size=1000): client = elasticsearch_config['client'] index_name = elasticsearch_config['index'] status = {'start_time': datetime.now(), 'last_progress_update': None} _, date_now = es_sync.get_status(session) total = session.query(Document). \ filter(Document.redirects_to.is_(None)).count() def progress(count, total_count): if status['last_progress_update'] is None or \ status['last_progress_update'] + timedelta(seconds=1) < \ datetime.now(): print('{0} of {1}'.format(count, total_count)) status['last_progress_update'] = datetime.now() batch = ElasticBatch(client, batch_size) count = 0 with batch: for doc_type in document_types: print('Importing document type {}'.format(doc_type)) to_search_document = search_documents[doc_type].to_search_document for doc in sync.get_documents(session, doc_type, batch_size): batch.add(to_search_document(doc, index_name)) count += 1 progress(count, total) es_sync.mark_as_updated(session, date_now) duration = datetime.now() - status['start_time'] print('Done (duration: {0})'.format(duration))
def sync_deleted_documents(session, deleted_documents, batch_size): client = elasticsearch_config['client'] batch = ElasticBatch(client, batch_size) index = elasticsearch_config['index'] n = 0 with batch: for document_id, doc_type in deleted_documents: batch.add({ '_index': index, '_id': document_id, '_type': doc_type, 'id': document_id, '_op_type': 'delete' }) n += 1 log.info('Removed {} document(s)'.format(n))
def sync_documents(session, changed_documents): client = elasticsearch_config['client'] batch = ElasticBatch(client, batch_size) with batch: docs_per_type = get_documents_per_type(changed_documents) add_dependent_documents(session, docs_per_type) for doc_type, document_ids in docs_per_type.items(): if document_ids: docs = get_documents(session, doc_type, document_ids) create_search_documents(doc_type, docs, batch)
def fill_index(session): client = elasticsearch_config['client'] index_name = elasticsearch_config['index'] status = { 'start_time': datetime.now(), 'last_progress_update': None } _, date_now = es_sync.get_status(session) total = session.query(Document). \ filter(Document.redirects_to.is_(None)).count() def progress(count, total_count): if status['last_progress_update'] is None or \ status['last_progress_update'] + timedelta(seconds=1) < \ datetime.now(): print('{0} of {1}'.format(count, total_count)) status['last_progress_update'] = datetime.now() batch = ElasticBatch(client, batch_size) count = 0 with batch: for doc_type in document_types: print('Importing document type {}'.format(doc_type)) to_search_document = search_documents[doc_type].to_search_document for doc in sync.get_documents(session, doc_type): batch.add(to_search_document(doc, index_name)) count += 1 progress(count, total) es_sync.mark_as_updated(session, date_now) duration = datetime.now() - status['start_time'] print('Done (duration: {0})'.format(duration))