Example #1
0
 def get(self, request, pk: int, index: str, document_id: str):
     validate_index_and_project_perms(request, pk, index)
     ed = ElasticDocument(index)
     document = ed.get(document_id)
     if not document:
         raise NotFound(f"Could not find document with ID '{document_id}' from index '{index}'!")
     return Response(document)
Example #2
0
    def post(self, request, pk: int, index: str, document_id: str):
        validate_index_and_project_perms(request, pk, index)
        serializer = self.get_serializer(data=request.data)
        if serializer.is_valid(raise_exception=True):
            ed = ElasticDocument(index)
            document = ed.get(document_id, fields=[TEXTA_TAGS_KEY])
            if not document:
                raise NotFound(
                    f"Could not find document with ID '{document_id}' from index '{index}'!"
                )

            document = document.get("_source")
            target_facts = serializer.validated_data.get("facts", [])
            existing_facts = document.get(TEXTA_TAGS_KEY, [])

            new_facts = []
            for index_count, existing_fact in enumerate(existing_facts):
                for fact in target_facts:
                    if not (fact.items() <= existing_fact.items()):
                        new_facts.append(existing_fact)

            document[TEXTA_TAGS_KEY] = new_facts
            ed.update(index, document_id, doc=document)
            return Response({
                "message":
                f"Removed given facts from document with the ID of {document_id}!"
            })
Example #3
0
    def pull_document_by_id(self, request, pk=None, project_pk=None):
        annotator: Annotator = self.get_object()
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        ed = ElasticDocument(index=annotator.get_indices())
        document_id = serializer.validated_data["document_id"]
        document = ed.get(document_id)
        if document:
            document = self._process_document_output(document, annotator)
            return Response(document)
        else:
            return Response({"message": "No such document!"},
                            status=status.HTTP_404_NOT_FOUND)
Example #4
0
    def post(self, request, project_pk: int):
        project: Project = get_object_or_404(Project, pk=project_pk)
        self.check_object_permissions(request, project)

        serializer = ProjectDocumentSerializer(data=request.data)
        if not serializer.is_valid():
            raise SerializerNotValid(detail=serializer.errors)

        indices = project.get_available_or_all_project_indices(serializer.validated_data["indices"])
        if not indices:
            raise ProjectValidationFailed(detail="No indices supplied and project has no indices")

        doc_id = serializer.validated_data["doc_id"]
        if not doc_id:
            raise InvalidInputDocument(detail="No doc_id supplied")

        es = ElasticDocument(index=indices)
        results = es.get(doc_id)
        return Response(results, status=status.HTTP_200_OK)
Example #5
0
    def skip_document(self, request, pk=None, project_pk=None):
        serializer: DocumentIDSerializer = self.get_serializer(
            data=request.data)
        serializer.is_valid(raise_exception=True)
        annotator: Annotator = self.get_object()

        ed = ElasticDocument(index=annotator.get_indices())
        document_id = serializer.validated_data["document_id"]
        document = ed.get(document_id)
        texta_annotations = document["_source"].get("texta_annotator", [])

        processed_timestamp = None
        if texta_annotations:
            for texta_annotation in texta_annotations:
                processed_timestamp = texta_annotation.get(
                    "processed_timestamp_utc", None)

                if processed_timestamp:
                    return Response({
                        "detail":
                        f"Document with ID: {serializer.validated_data['document_id']} is already annotated"
                    })

            annotator.skip_document(serializer.validated_data["document_id"],
                                    serializer.validated_data["index"],
                                    user=request.user)
            return Response({
                "detail":
                f"Skipped document with ID: {serializer.validated_data['document_id']}"
            })
        else:
            annotator.skip_document(serializer.validated_data["document_id"],
                                    serializer.validated_data["index"],
                                    user=request.user)
            return Response({
                "detail":
                f"Skipped document with ID: {serializer.validated_data['document_id']}"
            })