Example #1
0
    def load_doc(task: ExtendedTask, document: Document, document_fields: Dict, run_detect_field_values: bool,
                 filed_owners: dict = None):
        filed_owners = filed_owners if filed_owners else {}
        fields_to_values = LoadDocumentWithFields.load_field_values(task, document, document_fields, filed_owners)
        log = CeleryTaskLogger(task)

        with transaction.atomic():
            new_document = document.pk is None
            document.save(force_insert=new_document)
            if not new_document:
                DocumentFieldValue.objects \
                    .filter(document=document,
                            removed_by_user=False,
                            created_by__isnull=True,
                            modified_by__isnull=True) \
                    .delete()

            for field, values in fields_to_values.items():
                field_detection.save_detected_values(document, field, values)

            if run_detect_field_values:
                field_detection.detect_and_cache_field_values_for_document(log, document, True)
            else:
                dfvs = field_detection.detect_and_cache_field_values_for_document(log, document, False)
                field_value_cache.cache_field_values(document, dfvs, save=True)

        task.log_info('Loaded {0} field values for document #{1} ({2})'
                      .format(len(fields_to_values), document.pk, document.name))
Example #2
0
 def import_document(self, values: Dict[str, Any]):
     doc = Document()
     doc.name = values['name']
     doc.description = values['description']
     doc.source = values['source']
     doc.source_type = values['source_type']
     doc.paragraphs = values['paragraphs']
     doc.sentences = values['sentences']
     doc.title = values['title']
     doc.document_type_id = self.document_types[str(
         values['document_type_id'])]
     doc.project_id = self.project_ids[values['project_id']]
     doc.status_id = str(values['status_id'])
     doc.language = values['language']
     doc.file_size = values['file_size']
     if not pd.isnull(values['assign_date']):
         doc.assign_date = values['assign_date']
     doc.delete_pending = values['delete_pending'] == 't'
     doc.processed = values['processed'] == 't'
     doc.folder = values['folder']
     doc.document_class = values['document_class']
     doc.fields_dirty = values['fields_dirty']
     if not pd.isnull(values['assignee_id']):
         doc.assignee = self.target_user
     doc.source_path = values['source_path']
     doc.save()
     self.document_ids[values['id']] = doc.pk
     self.document_src_paths[doc.pk] = doc.source_path
     self.initially_loaded_docs.append(doc.pk)
Example #3
0
def cache_field_values(doc: Document,
                       suggested_field_values: Optional[
                           List[DetectedFieldValue]],
                       save: bool = True) -> Dict[str, Any]:
    """
    Loads DocumentFieldValue objects from DB, merges them to get python field values of their fields for the document,
    converts them to the sortable DB-aware form and saves them to Document.field_values.
    :param doc:
    :param save:
    :param suggested_field_values:
    :return:
    """
    document_type = doc.document_type  # type: DocumentType
    # TODO: get/save field value for specific field
    all_fields = list(document_type.fields.all())

    fields_to_field_values = {f: None for f in all_fields}

    for fv in doc.documentfieldvalue_set.all():
        if fv.removed_by_user:
            continue

        field = fv.field
        field_type = FIELD_TYPES_REGISTRY[fv.field.type]  # type: FieldType
        fields_to_field_values[field] = field_type \
            .merge_multi_python_values(fields_to_field_values.get(field), fv.python_value)

    field_uids_to_field_values_db = {}

    for f in all_fields:  # type: DocumentField
        field_type = FIELD_TYPES_REGISTRY[f.type]  # type: FieldType
        v = fields_to_field_values[f]
        field_uids_to_field_values_db[
            f.uid] = field_type.merged_python_value_to_db(v)

    if suggested_field_values:
        field_codes_to_suggested_values = \
            merge_detected_field_values_to_python_value(suggested_field_values)  # type: Dict[str, Any]
    else:
        field_codes_to_suggested_values = None

    for f in all_fields:  # type: DocumentField
        field_type = f.get_field_type()  # type: FieldType
        if f.is_detectable():
            suggested_field_uid = Document.get_suggested_field_uid(f.uid)
            if field_codes_to_suggested_values:
                suggested_value_db = field_type.merged_python_value_to_db(
                    field_codes_to_suggested_values.get(f.code))
            else:
                suggested_value_db = doc.field_values.get(
                    suggested_field_uid) if doc.field_values else None

            field_uids_to_field_values_db[
                suggested_field_uid] = suggested_value_db

    if save:
        doc.field_values = field_uids_to_field_values_db
        doc.save()

    return field_uids_to_field_values_db
def cache_generic_values(doc: Document, save: bool = True,
                         log: ProcessLogger = None):
    doc.generic_data = get_generic_values(doc)

    if save:
        doc.save(update_fields=['generic_data'])
        events.on_document_change(events.DocumentChangedEvent(log=log,
                                                              document=doc,
                                                              system_fields_changed=False,
                                                              generic_fields_changed=True,
                                                              user_fields_changed=False,
                                                              pre_detected_field_values=None))
def cache_generic_values(doc: Document,
                         save: bool = True,
                         log: ProcessLogger = None,
                         fire_doc_changed_event: bool = True):
    doc.generic_data = get_generic_values(doc)

    if save:
        doc.save(update_fields=['generic_data'])
        if fire_doc_changed_event:
            signals.fire_document_changed(sender=cache_generic_values,
                                          log=log,
                                          document=doc,
                                          system_fields_changed=False,
                                          generic_fields_changed=True,
                                          user_fields_changed=False,
                                          pre_detected_field_values=None)
Example #6
0
def cache_generic_values(doc: Document, save: bool = True):
    document_qs = Document.objects.filter(pk=doc.pk) \
        .annotate(cluster_id=Max('documentcluster'),
                  parties=StringAgg('textunit__partyusage__party__name',
                                    delimiter=', ',
                                    distinct=True),
                  max_currency_amount=Max('textunit__currencyusage__amount'),
                  max_currency_name=Max('textunit__currencyusage__currency'),
                  min_date=Min('textunit__dateusage__date'),
                  max_date=Max('textunit__dateusage__date'))
    values = document_qs.values('cluster_id', 'parties', 'max_currency_amount',
                                'max_currency_name', 'min_date',
                                'max_date').first()
    doc.generic_data = values

    if save:
        doc.save(update_fields=['generic_data'])
Example #7
0
    def load_doc(task: ExtendedTask,
                 document: Document,
                 field_values_alias_to_value: Dict[str, Any],
                 run_detect_field_values: bool,
                 field_owners: Dict[str, User] = None):
        field_owners = field_owners if field_owners else {}
        fields_to_values = LoadDocumentWithFields.load_field_values(task, document, field_values_alias_to_value)
        log = CeleryTaskLogger(task)
        import apps.document.repository.document_field_repository as dfr
        field_repo = dfr.DocumentFieldRepository()

        with transaction.atomic():
            new_document = document.pk is None
            document.save(force_insert=new_document)
            DocumentMetadata.objects.create(document=document, metadata={'parsed_by': None})

            for field, value_dto in fields_to_values.items():
                field_repo.update_field_value_with_dto(document=document,
                                                       field=field,
                                                       field_value_dto=value_dto,
                                                       user=field_owners.get(field.code))

            if run_detect_field_values:
                field_detection.detect_and_cache_field_values_for_document(log=log,
                                                                           document=document,
                                                                           save=True,
                                                                           clear_old_values=False)
            else:
                signals.fire_document_changed(sender=task,
                                              log=log,
                                              document=document,
                                              changed_by_user=None,
                                              document_initial_load=True,
                                              system_fields_changed=True,
                                              generic_fields_changed=True,
                                              user_fields_changed=True)

        task.log_info('Loaded {0} field values for document #{1} ({2}): {3}'
                      .format(len(fields_to_values),
                              document.pk,
                              document.name,
                              ';\n'.join(f'{f}: {dto.field_value}' for f, dto in fields_to_values.items())))
Example #8
0
def cache_field_values(doc: Document,
                       suggested_field_values: Optional[
                           List[DetectedFieldValue]],
                       save: bool = True,
                       log: ProcessLogger = None) -> Dict[str, Any]:
    """
    Loads DocumentFieldValue objects from DB, merges them to get python field values of their fields for the document,
    converts them to the sortable DB-aware form and saves them to Document.field_values.
    :param doc:
    :param save:
    :param suggested_field_values:
    :param log
    :return:
    """
    document_type = doc.document_type  # type: DocumentType
    # TODO: get/save field value for specific field
    all_fields = list(document_type.fields.all())

    related_info_field_uids = {
        f.uid
        for f in all_fields if f.is_related_info_field()
    }

    fields_to_field_values = {f: None for f in all_fields}

    for fv in doc.documentfieldvalue_set.all():
        if fv.removed_by_user:
            continue

        field = fv.field
        field_type = FIELD_TYPES_REGISTRY[fv.field.type]  # type: FieldType
        fields_to_field_values[field] = field_type \
            .merge_multi_python_values(fields_to_field_values.get(field), fv.python_value)

    field_uids_to_field_values_db = {}

    for f in all_fields:  # type: DocumentField
        field_type = FIELD_TYPES_REGISTRY[f.type]  # type: FieldType
        v = fields_to_field_values[f]
        field_uids_to_field_values_db[
            f.uid] = field_type.merged_python_value_to_db(v)

    if suggested_field_values:
        field_codes_to_suggested_values = \
            merge_detected_field_values_to_python_value(suggested_field_values)  # type: Dict[str, Any]
    else:
        field_codes_to_suggested_values = None

    for f in all_fields:  # type: DocumentField
        field_type = f.get_field_type()  # type: FieldType
        if f.is_detectable():
            suggested_field_uid = Document.get_suggested_field_uid(f.uid)
            if field_codes_to_suggested_values:
                suggested_value_db = field_type.merged_python_value_to_db(
                    field_codes_to_suggested_values.get(f.code))
            else:
                suggested_value_db = doc.field_values.get(
                    suggested_field_uid) if doc.field_values else None

            field_uids_to_field_values_db[
                suggested_field_uid] = suggested_value_db

    if save:
        doc.field_values = {
            uid: len(value)
            if uid in related_info_field_uids and value is not None else value
            for uid, value in field_uids_to_field_values_db.items()
        }
        doc.save()
        events.on_document_change(
            events.DocumentChangedEvent(
                log=log,
                document=doc,
                system_fields_changed=False,
                generic_fields_changed=False,
                user_fields_changed=True,
                pre_detected_field_values=field_codes_to_suggested_values))

    return field_uids_to_field_values_db