def get(self, request, pk: int, index: str, document_id: str): validate_index_and_project_perms(request, pk, index) ed = ElasticDocument(index) document = ed.get(document_id) if not document: raise NotFound(f"Could not find document with ID '{document_id}' from index '{index}'!") return Response(document)
def post(self, request, pk: int, index: str, document_id: str): validate_index_and_project_perms(request, pk, index) serializer = self.get_serializer(data=request.data) if serializer.is_valid(raise_exception=True): ed = ElasticDocument(index) document = ed.get(document_id, fields=[TEXTA_TAGS_KEY]) if not document: raise NotFound( f"Could not find document with ID '{document_id}' from index '{index}'!" ) document = document.get("_source") target_facts = serializer.validated_data.get("facts", []) existing_facts = document.get(TEXTA_TAGS_KEY, []) new_facts = [] for index_count, existing_fact in enumerate(existing_facts): for fact in target_facts: if not (fact.items() <= existing_fact.items()): new_facts.append(existing_fact) document[TEXTA_TAGS_KEY] = new_facts ed.update(index, document_id, doc=document) return Response({ "message": f"Removed given facts from document with the ID of {document_id}!" })
def pull_document_by_id(self, request, pk=None, project_pk=None): annotator: Annotator = self.get_object() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) ed = ElasticDocument(index=annotator.get_indices()) document_id = serializer.validated_data["document_id"] document = ed.get(document_id) if document: document = self._process_document_output(document, annotator) return Response(document) else: return Response({"message": "No such document!"}, status=status.HTTP_404_NOT_FOUND)
def post(self, request, project_pk: int): project: Project = get_object_or_404(Project, pk=project_pk) self.check_object_permissions(request, project) serializer = ProjectDocumentSerializer(data=request.data) if not serializer.is_valid(): raise SerializerNotValid(detail=serializer.errors) indices = project.get_available_or_all_project_indices(serializer.validated_data["indices"]) if not indices: raise ProjectValidationFailed(detail="No indices supplied and project has no indices") doc_id = serializer.validated_data["doc_id"] if not doc_id: raise InvalidInputDocument(detail="No doc_id supplied") es = ElasticDocument(index=indices) results = es.get(doc_id) return Response(results, status=status.HTTP_200_OK)
def skip_document(self, request, pk=None, project_pk=None): serializer: DocumentIDSerializer = self.get_serializer( data=request.data) serializer.is_valid(raise_exception=True) annotator: Annotator = self.get_object() ed = ElasticDocument(index=annotator.get_indices()) document_id = serializer.validated_data["document_id"] document = ed.get(document_id) texta_annotations = document["_source"].get("texta_annotator", []) processed_timestamp = None if texta_annotations: for texta_annotation in texta_annotations: processed_timestamp = texta_annotation.get( "processed_timestamp_utc", None) if processed_timestamp: return Response({ "detail": f"Document with ID: {serializer.validated_data['document_id']} is already annotated" }) annotator.skip_document(serializer.validated_data["document_id"], serializer.validated_data["index"], user=request.user) return Response({ "detail": f"Skipped document with ID: {serializer.validated_data['document_id']}" }) else: annotator.skip_document(serializer.validated_data["document_id"], serializer.validated_data["index"], user=request.user) return Response({ "detail": f"Skipped document with ID: {serializer.validated_data['document_id']}" })