def bulk_add_documents( elastic_search: ElasticSearcher, elastic_doc: ElasticDocument, index: str, chunk_size: int, field_data: List[dict], flatten_doc=False, ): new_docs = apply_custom_processing(elastic_search, flatten_doc) actions = apply_field_changes_generator(new_docs, index, field_data) # No need to wait for indexing to actualize, hence refresh is False. elastic_doc.bulk_add_generator(actions=actions, chunk_size=chunk_size, refresh="wait_for")
def post(self, request, pk: int): ed = ElasticDocument(index=None) # Validate payload and project permissions. serializer: InsertDocumentsSerializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) project = get_object_or_404(Project, pk=pk) if project.users.filter(pk=request.user.pk).exists() is False: raise PermissionDenied("You do not have permissions for this project!") # Split indices on whether they have index access or lack any index details at all. documents = serializer.validated_data["documents"] split_fields = serializer.validated_data["split_text_in_fields"] indices = project.get_indices() correct_actions, failed_actions, missing_actions = self._split_documents_per_index(allowed_indices=indices, documents=documents) missing_actions, index_name, has_new_index = self._normalize_missing_index_values(missing_actions, project.pk, indices) split_actions = self._split_text(correct_actions + missing_actions, split_fields) if has_new_index: ed.core.create_index(index_name) ed.core.add_texta_facts_mapping(index_name) index, is_created = Index.objects.get_or_create(name=index_name, is_open=True) project.indices.add(index) # Send the documents to Elasticsearch. success_count, errors = ed.bulk_add_generator(actions=split_actions, stats_only=False) return Response( { "successfully_indexed": success_count, "errors": errors, "failed_index_permissions": len(failed_actions) } )
def bulk_add_documents(elastic_search: ElasticSearcher, elastic_doc: ElasticDocument, index: str, chunk_size: int, flatten_doc=False): new_docs = apply_elastic_search(elastic_search, flatten_doc) actions = annotator_bulk_generator(new_docs, index) # No need to wait for indexing to actualize, hence refresh is False. elastic_doc.bulk_add_generator(actions=actions, chunk_size=chunk_size, refresh="wait_for")