Exemple #1
0
def fill_index(session, batch_size=1000):
    client = elasticsearch_config['client']
    index_name = elasticsearch_config['index']

    status = {'start_time': datetime.now(), 'last_progress_update': None}

    _, date_now = es_sync.get_status(session)

    total = session.query(Document). \
        filter(Document.redirects_to.is_(None)).count()

    def progress(count, total_count):
        if status['last_progress_update'] is None or \
                status['last_progress_update'] + timedelta(seconds=1) < \
                datetime.now():
            print('{0} of {1}'.format(count, total_count))
            status['last_progress_update'] = datetime.now()

    batch = ElasticBatch(client, batch_size)
    count = 0
    with batch:
        for doc_type in document_types:
            print('Importing document type {}'.format(doc_type))
            to_search_document = search_documents[doc_type].to_search_document

            for doc in sync.get_documents(session, doc_type, batch_size):
                batch.add(to_search_document(doc, index_name))

                count += 1
                progress(count, total)

    es_sync.mark_as_updated(session, date_now)

    duration = datetime.now() - status['start_time']
    print('Done (duration: {0})'.format(duration))
Exemple #2
0
def sync_deleted_documents(session, deleted_documents, batch_size):
    client = elasticsearch_config['client']
    batch = ElasticBatch(client, batch_size)
    index = elasticsearch_config['index']
    n = 0
    with batch:
        for document_id, doc_type in deleted_documents:
            batch.add({
                '_index': index,
                '_id': document_id,
                '_type': doc_type,
                'id': document_id,
                '_op_type': 'delete'
            })
            n += 1
    log.info('Removed {} document(s)'.format(n))
Exemple #3
0
def sync_documents(session, changed_documents):
    client = elasticsearch_config['client']
    batch = ElasticBatch(client, batch_size)
    with batch:
        docs_per_type = get_documents_per_type(changed_documents)
        add_dependent_documents(session, docs_per_type)
        for doc_type, document_ids in docs_per_type.items():
            if document_ids:
                docs = get_documents(session, doc_type, document_ids)
                create_search_documents(doc_type, docs, batch)
Exemple #4
0
def fill_index(session):
    client = elasticsearch_config['client']
    index_name = elasticsearch_config['index']

    status = {
        'start_time': datetime.now(),
        'last_progress_update': None
    }

    _, date_now = es_sync.get_status(session)

    total = session.query(Document). \
        filter(Document.redirects_to.is_(None)).count()

    def progress(count, total_count):
        if status['last_progress_update'] is None or \
                status['last_progress_update'] + timedelta(seconds=1) < \
                datetime.now():
            print('{0} of {1}'.format(count, total_count))
            status['last_progress_update'] = datetime.now()

    batch = ElasticBatch(client, batch_size)
    count = 0
    with batch:
        for doc_type in document_types:
            print('Importing document type {}'.format(doc_type))
            to_search_document = search_documents[doc_type].to_search_document

            for doc in sync.get_documents(session, doc_type):
                batch.add(to_search_document(doc, index_name))

                count += 1
                progress(count, total)

    es_sync.mark_as_updated(session, date_now)

    duration = datetime.now() - status['start_time']
    print('Done (duration: {0})'.format(duration))