Esempio n. 1
0
def _save_annotation(annotator_data: Dict, user, field_value_id=None) -> Dict:
    """
            Add a new annotation / document field value.
            Accepts JSON structure generated by annotator.js.

            """
    doc = Document.objects.get(pk=annotator_data['document_id'])
    document_field = DocumentField.objects.get(
        pk=annotator_data.get('field_id'))
    value = annotator_data.get('value')
    selection_range = annotator_data['ranges'][0]
    location_start = selection_range['startOffset']
    location_end = selection_range['endOffset']
    location_text = doc.full_text[location_start:location_end]

    field_type = FIELD_TYPES_REGISTRY.get(document_field.type)

    if field_value_id:
        field_value = DocumentFieldValue.objects.get(pk=field_value_id)
        field_value = field_type.update(field_value, doc, document_field,
                                        location_start, location_end,
                                        location_text, value, user)
    else:
        field_value = field_type.save_value(doc, document_field,
                                            location_start, location_end,
                                            location_text, value, user, True)

    _trigger_retraining_model(doc)

    return _to_dto(field_value)
Esempio n. 2
0
    def get(self, request, subscription_id, content_format, **_kwargs):
        send_email = as_bool(request.GET, self.PARAM_SEND, False)

        subscription = DocumentNotificationSubscription.objects.get(pk=subscription_id)

        document_type = subscription.document_type

        document_id = as_int(request.GET, self.PARAM_DOCUMENT, None)
        if document_id:
            document = Document.objects.filter(document_type=document_type, pk=document_id).first()
            if not document:
                return HttpResponseBadRequest('Document with id = {0} not found or has wrong type.'.format(document_id))
        else:
            document = Document.objects.filter(document_type=document_type).first()
            if not document:
                return HttpResponseBadRequest('Document id not provided and '
                                              'there are no example documents of type {0}.'.format(document_type.code))

        document_id = document.pk
        field_handlers = build_field_handlers(document_type, include_suggested_fields=False)
        field_values = get_document_field_values(document_type, document_id, handlers=field_handlers)

        example_changes = dict()
        if subscription.event in {DocumentAssignedEvent.code, DocumentChangedEvent.code} and field_values:
            for h in field_handlers:
                if random.random() > 0.3:
                    continue
                field_type = FIELD_TYPES_REGISTRY.get(h.field_type)  # type: FieldType
                field = DocumentField.objects.filter(code=h.field_code).first()
                if not field:
                    continue
                example_value = field_type.example_python_value(field=field)
                example_changes[h.field_code] = (example_value, field_values.get(h.field_code))

        notification = render_notification(already_sent_user_ids=set(),
                                           subscription=subscription,
                                           document=document,
                                           field_handlers=field_handlers,
                                           field_values=field_values,
                                           changes=example_changes,
                                           changed_by_user=request.user
                                           )
        if not notification:
            return HttpResponse('Notification contains no data.', status=200)

        if content_format == self.FORMAT_HTML:
            content = notification.html
            content_type = 'text/html'
        else:
            content = notification.txt
            content_type = 'text/plain'

        if send_email:
            log = ErrorCollectingLogger()
            notification.send(log=log)
            error = log.get_error()
            if error:
                return HttpResponseServerError(content=error, content_type='application/json')

        return HttpResponse(content=content, content_type=content_type, status=200)
Esempio n. 3
0
def _save_annotation(annotator_data: Dict, user, field_value_id=None) -> Dict:
    """
        Add a new annotation / document field value.
        Accepts JSON structure generated by annotator.js.
     """
    doc = Document.objects.get(pk=annotator_data['document_id'])
    document_field = DocumentField.objects.get(
        pk=annotator_data.get('field_id'))
    value = annotator_data.get('value')
    selection_range = annotator_data['ranges'][0]
    location_start = selection_range['startOffset']
    location_end = selection_range['endOffset']
    location_text = doc.full_text[location_start:location_end]

    field_type = FIELD_TYPES_REGISTRY.get(document_field.type)

    sentence_text_unit = TextUnit.objects.filter(
        document=doc,
        unit_type='sentence',
        location_start__lte=location_end,
        location_end__gte=location_start).first()

    field_value = field_type.save_value(doc, document_field, location_start,
                                        location_end, location_text,
                                        sentence_text_unit, value, user, True)

    _trigger_retraining_model(doc, document_field)

    return _to_dto(field_value)
    def load_field_values(task: ExtendedTask, document: Document,
                          document_fields: dict, field_owners: dict) -> Dict:
        document_type = document.document_type
        fields_to_values = {}

        if not document_type:
            return fields_to_values

        field_code_aliases = document_type.field_code_aliases

        field_codes_to_fields = {
            f.code.lower(): f
            for f in document_type.fields.all()
        }

        if field_code_aliases:
            field_codes_to_fields.update({
                field_alias.lower():
                field_codes_to_fields.get(field_code.lower())
                for field_alias, field_code in field_code_aliases.items()
                if field_alias and field_code
            })

        def _maybe_add_val(f, v, owner):
            if v is None:
                return
            v = DetectedFieldValue(f, v, user=owner)
            prev = fields_to_values.get(f)
            if not prev:
                fields_to_values[f] = [v]
            else:
                prev.append(v)

        for field_alias, field_value_text in document_fields.items():
            if field_value_text is None:
                continue

            field = field_codes_to_fields.get(field_alias.lower())
            if not field:
                task.log_warn(
                    'Field alias "{0}" not found for document type {1}'.format(
                        field_alias, document_type.code))
                continue
            field_type_adapter = FIELD_TYPES_REGISTRY.get(
                field.type)  # type: FieldType
            field_owner = field_owners.get(field_alias)

            if type(field_value_text) is list:
                for possible_value_text in list(field_value_text):
                    maybe_value = field_type_adapter.extract_from_possible_value_text(
                        field, possible_value_text)
                    _maybe_add_val(field, maybe_value, field_owner)
            else:
                maybe_value = field_type_adapter.extract_from_possible_value_text(
                    field, field_value_text)
                _maybe_add_val(field, maybe_value, field_owner)

        return fields_to_values
    def detect_field_values(cls, log: ProcessLogger, doc: Document,
                            field: DocumentField) -> List[DetectedFieldValue]:

        depends_on_fields = list(field.depends_on_fields.all())

        qs_document_field_values = doc.documentfieldvalue_set \
            .filter(removed_by_user=False) \
            .filter(field__in=depends_on_fields)

        field_code_to_value = merge_document_field_values_to_python_value(
            list(qs_document_field_values))

        field_code_to_value = {
            f.code: field_code_to_value.get(f.code)
            for f in depends_on_fields
        }

        document_type = doc.document_type

        field_detectors = DocumentFieldDetector.objects.filter(
            document_type=document_type, field=field)
        field_type_adapter = FIELD_TYPES_REGISTRY.get(
            field.type)  # type: FieldType

        detected_values = list()  # type: List[DetectedFieldValue]

        for depends_on_value in field_code_to_value.values():
            if not depends_on_value:
                continue
            depends_on_value = str(depends_on_value)
            for field_detector in field_detectors:
                if field_detector.matches(depends_on_value):
                    value = field_detector.detected_value
                    hint_name = None
                    if field_type_adapter.value_aware:
                        hint_name = field_detector.extraction_hint or ValueExtractionHint.TAKE_FIRST.name
                        value, hint_name = field_type_adapter \
                            .get_or_extract_value(doc,
                                                  field, value,
                                                  hint_name,
                                                  depends_on_value)
                        if value is None:
                            continue

                    detected_values.append(
                        DetectedFieldValue(field, value, None, hint_name))

                    if not (field_type_adapter.multi_value
                            or field.is_choice_field()):
                        break

            if detected_values and not (field_type_adapter.multi_value
                                        or field.is_choice_field()):
                break

        return detected_values
Esempio n. 6
0
    def detect_field_values(cls, log: ProcessLogger, doc: Document,
                            field: DocumentField) -> List[DetectedFieldValue]:

        depends_on_full_text = doc.full_text
        detected_with_stop_words, detected_values = detect_with_stop_words_by_field_and_full_text(
            field, depends_on_full_text)
        if detected_with_stop_words:
            return detected_values or list()

        qs_text_units = TextUnit.objects \
            .filter(document=doc) \
            .filter(unit_type=field.text_unit_type) \
            .order_by('location_start', 'pk')
        document_type = doc.document_type

        field_detectors = DocumentFieldDetector.objects.filter(
            document_type=document_type, field=field)

        field_type_adapter = FIELD_TYPES_REGISTRY.get(
            field.type)  # type: FieldType

        detected_values = list()  # type: List[DetectedFieldValue]

        for text_unit in qs_text_units.iterator():

            for field_detector in field_detectors:
                if field_detector.matches(text_unit.text):
                    value = field_detector.detected_value
                    hint_name = None
                    if field_type_adapter.value_aware:
                        hint_name = field_detector.extraction_hint or ValueExtractionHint.TAKE_FIRST.name
                        value, hint_name = field_type_adapter \
                            .get_or_extract_value(doc,
                                                  field, value,
                                                  hint_name,
                                                  text_unit.text)
                        if value is None:
                            continue

                    detected_values.append(
                        DetectedFieldValue(field, value, text_unit, hint_name))

                    if not (field_type_adapter.multi_value
                            or field.is_choice_field()):
                        break

            if detected_values and not (field_type_adapter.multi_value
                                        or field.is_choice_field()):
                break

        return detected_values
Esempio n. 7
0
    def delete(self, request, *args, pk):
        """
        Delete an annotation.

        """

        field_value = DocumentFieldValue.objects.get(pk=pk)
        doc = field_value.document
        field = field_value.field
        field_type = FIELD_TYPES_REGISTRY.get(field.type)

        field_type.delete(field_value)

        _trigger_retraining_model(doc, field, request.user.id)
        return JsonResponse(_to_dto(field_value))
Esempio n. 8
0
def detect_with_stop_words_by_field_and_full_text(field: DocumentField, full_text: str) -> Tuple[bool, Optional[List]]:
    if field.requires_text_annotations:
        return False, None
    stop_words = compile_stop_words(field.stop_words)
    if not stop_words:
        return False, None
    field_type_adapter = FIELD_TYPES_REGISTRY.get(field.type)  # type: FieldType
    detected, possible_value = detect_value_with_stop_words(stop_words, full_text)
    if not detected:
        return False, None
    if possible_value is None:
        return True, None
    else:
        possible_value = field_type_adapter.extract_from_possible_value_text(field, possible_value)
        return True, [DetectedFieldValue(field, possible_value)]
Esempio n. 9
0
    def post(self, request, *args, **kwargs):
        """
        Suggest field value before creating an annotation.
        Accepts the same JSON structure as annotation saving endpoint.
        Returns suggested field value of the specified field possibly found in the provided text.

        """
        annotator_data = request.data
        doc = Document.objects.get(pk=annotator_data['document_id'])
        document_field = DocumentField.objects.get(pk=annotator_data.get('field_id'))
        location_text = annotator_data['quote']

        field_type = FIELD_TYPES_REGISTRY.get(document_field.type)

        field_value = field_type.suggest_value(doc, document_field, location_text)

        return Response({'suggested_value': field_value})
Esempio n. 10
0
    def delete(self, request, *args, pk):
        """
        Delete an annotation.
        """
        field_value = DocumentFieldValue.objects.get(pk=pk)
        doc = field_value.document
        field = field_value.field
        field_type = FIELD_TYPES_REGISTRY.get(field.type)

        with transaction.atomic():
            if field_value.created_by_id is not None:
                field_type.delete(field_value)
            else:
                field_type.mark_removed_by_user(field_value)

            DocumentTypeField.objects.set_dirty_for_value(field_value)

        _trigger_retraining_model(doc, field, request.user.id)
        return Response(_to_dto(field_value))
Esempio n. 11
0
    def detect_field_values_with_regexps(document: Document,
                                         field: DocumentField,
                                         sentence_text_units: List[TextUnit],
                                         do_not_write: bool) -> int:
        document_type = document.document_type
        field_detectors = DocumentFieldDetector.objects.filter(
            document_type=document_type, field=field)
        field_type_adapter = FIELD_TYPES_REGISTRY.get(
            field.type)  # type: FieldType

        detected_values = list()  # type: List[DetectedFieldValue]

        for text_unit in sentence_text_units:

            for field_detector in field_detectors:
                if field_detector.matches(text_unit.text):
                    value = field_detector.detected_value
                    hint_name = None
                    if field_type_adapter.value_aware:
                        hint_name = field_detector.extraction_hint or ValueExtractionHint.TAKE_FIRST.name
                        value, hint_name = field_type_adapter \
                            .get_or_extract_value(document,
                                                  field, value,
                                                  hint_name,
                                                  text_unit.text)
                        if value is None:
                            continue

                    detected_values.append(
                        DetectedFieldValue(text_unit, value, hint_name))

                    if not (field_type_adapter.multi_value
                            or field.is_choice_field()):
                        break

        return DetectFieldValues.save_detected_values(document, field,
                                                      field_type_adapter,
                                                      detected_values,
                                                      do_not_write)
Esempio n. 12
0
    def detect_initial_field_values_for_document_field(document, field,
                                                       sentence_text_units,
                                                       do_not_write,
                                                       main_task) -> int:
        document_type = document.document_type
        field_detectors = DocumentFieldDetector.objects.filter(
            document_type=document_type, field=field)
        field_type_adapter = FIELD_TYPES_REGISTRY.get(field.type)

        detected_values = list()

        for text_unit in sentence_text_units:

            for field_detector in field_detectors:
                if field_detector.matches(text_unit.text):
                    value = field_detector.detected_value
                    hint = None
                    if field_type_adapter.value_aware:
                        hint = field_detector.extraction_hint or ValueExtractionHint.TAKE_FIRST.name
                        value, hint = field_type_adapter \
                            .get_or_extract_value(document,
                                                  field, value,
                                                  hint,
                                                  text_unit.text)
                        if value is None:
                            continue

                    detected_values.append((text_unit, value, hint))

                    if not (field_type_adapter.multi_value
                            or field.is_choice_field()):
                        break

        return DetectFieldValues.save_detected_values(document, field,
                                                      field_type_adapter,
                                                      detected_values,
                                                      do_not_write)
Esempio n. 13
0
 def destroy(self, request, *args, **kwargs):
     instance = self.get_object()
     adapter = FIELD_TYPES_REGISTRY.get(instance.field.type)
     adapter.delete(instance)
     return Response(status=status.HTTP_204_NO_CONTENT)
Esempio n. 14
0
 def save(self, **kwargs):
     field = self.validated_data['field']
     self.adapter = FIELD_TYPES_REGISTRY.get(field.type)
     return super().save(**kwargs)
Esempio n. 15
0
    def detect_field_values(cls, log: ProcessLogger, doc: Document,
                            field: DocumentField) -> List[DetectedFieldValue]:

        depends_on_fields = list(field.depends_on_fields.all())

        qs_document_field_values = doc.documentfieldvalue_set \
            .filter(removed_by_user=False) \
            .filter(field__in=depends_on_fields)

        field_code_to_value = merge_document_field_values_to_python_value(
            list(qs_document_field_values))

        field_code_to_value = {
            f.code: field_code_to_value.get(f.code)
            for f in depends_on_fields
        }

        if field.stop_words:
            depends_on_full_text = '\n'.join(
                [str(v) for v in field_code_to_value.values()])
            detected_with_stop_words, detected_values \
                = detect_with_stop_words_by_field_and_full_text(field, depends_on_full_text)
            if detected_with_stop_words:
                return detected_values or list()

        field_detectors = DocumentFieldDetector.objects.filter(field=field)
        field_type_adapter = FIELD_TYPES_REGISTRY.get(
            field.type)  # type: FieldType

        detected_values = list()  # type: List[DetectedFieldValue]

        for depends_on_value in field_code_to_value.values():
            if not depends_on_value:
                continue
            depends_on_value = str(depends_on_value)
            for field_detector in field_detectors:  # type: DocumentFieldDetector
                matching_string = field_detector.matching_string(
                    depends_on_value, text_is_sentence=False)
                if matching_string is not None:
                    value = field_detector.get_validated_detected_value(field)
                    hint_name = None
                    if field_type_adapter.requires_value:
                        hint_name = field_detector.extraction_hint or ValueExtractionHint.TAKE_FIRST.name
                        value, hint_name = field_type_adapter \
                            .get_or_extract_value(doc,
                                                  field, value,
                                                  hint_name,
                                                  matching_string)
                        if value is None:
                            continue

                    detected_values.append(
                        DetectedFieldValue(field, value, None, hint_name))

                    if not (field_type_adapter.multi_value
                            or field.is_choice_field()):
                        break

            if detected_values and not (field_type_adapter.multi_value
                                        or field.is_choice_field()):
                break

        return detected_values