Python ElasticDocument.bulk_add_generator Examples

Programming Language: Python

Namespace/Package Name: texta_elastic.document

Class/Type: ElasticDocument

Method/Function: bulk_add_generator

Examples at hotexamples.com: 3

Python ElasticDocument.bulk_add_generator - 3 examples found. These are the top rated real world Python examples of texta_elastic.document.ElasticDocument.bulk_add_generator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ElasticDocument(30)

remove_duplicate_facts(10)

bulk_update(7)

get(5)

get_bulk(5)

bulk_add_generator(3)

add(2)

update(2)

add_fact_to_documents(1)

bulk_add(1)

count(1)

delete(1)

Example #1

Show file

def bulk_add_documents(
    elastic_search: ElasticSearcher,
    elastic_doc: ElasticDocument,
    index: str,
    chunk_size: int,
    field_data: List[dict],
    flatten_doc=False,
):
    new_docs = apply_custom_processing(elastic_search, flatten_doc)
    actions = apply_field_changes_generator(new_docs, index, field_data)
    # No need to wait for indexing to actualize, hence refresh is False.
    elastic_doc.bulk_add_generator(actions=actions,
                                   chunk_size=chunk_size,
                                   refresh="wait_for")

Example #2

Show file

File: views.py Project: EMBEDDIA/texta-rest

    def post(self, request, pk: int):
        ed = ElasticDocument(index=None)

        # Validate payload and project permissions.
        serializer: InsertDocumentsSerializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        project = get_object_or_404(Project, pk=pk)
        if project.users.filter(pk=request.user.pk).exists() is False:
            raise PermissionDenied("You do not have permissions for this project!")

        # Split indices on whether they have index access or lack any index details at all.
        documents = serializer.validated_data["documents"]
        split_fields = serializer.validated_data["split_text_in_fields"]
        indices = project.get_indices()

        correct_actions, failed_actions, missing_actions = self._split_documents_per_index(allowed_indices=indices, documents=documents)
        missing_actions, index_name, has_new_index = self._normalize_missing_index_values(missing_actions, project.pk, indices)
        split_actions = self._split_text(correct_actions + missing_actions, split_fields)

        if has_new_index:
            ed.core.create_index(index_name)
            ed.core.add_texta_facts_mapping(index_name)
            index, is_created = Index.objects.get_or_create(name=index_name, is_open=True)
            project.indices.add(index)

        # Send the documents to Elasticsearch.
        success_count, errors = ed.bulk_add_generator(actions=split_actions, stats_only=False)
        return Response(
            {
                "successfully_indexed": success_count,
                "errors": errors,
                "failed_index_permissions": len(failed_actions)
            }
        )

Example #3

Show file

def bulk_add_documents(elastic_search: ElasticSearcher, elastic_doc: ElasticDocument, index: str, chunk_size: int, flatten_doc=False):
    new_docs = apply_elastic_search(elastic_search, flatten_doc)
    actions = annotator_bulk_generator(new_docs, index)
    # No need to wait for indexing to actualize, hence refresh is False.
    elastic_doc.bulk_add_generator(actions=actions, chunk_size=chunk_size, refresh="wait_for")