Exemple #1
0
    def load_doc(task: ExtendedTask, document: Document, document_fields: Dict, run_detect_field_values: bool,
                 filed_owners: dict = None):
        filed_owners = filed_owners if filed_owners else {}
        fields_to_values = LoadDocumentWithFields.load_field_values(task, document, document_fields, filed_owners)
        log = CeleryTaskLogger(task)

        with transaction.atomic():
            new_document = document.pk is None
            document.save(force_insert=new_document)
            if not new_document:
                DocumentFieldValue.objects \
                    .filter(document=document,
                            removed_by_user=False,
                            created_by__isnull=True,
                            modified_by__isnull=True) \
                    .delete()

            for field, values in fields_to_values.items():
                field_detection.save_detected_values(document, field, values)

            if run_detect_field_values:
                field_detection.detect_and_cache_field_values_for_document(log, document, True)
            else:
                dfvs = field_detection.detect_and_cache_field_values_for_document(log, document, False)
                field_value_cache.cache_field_values(document, dfvs, save=True)

        task.log_info('Loaded {0} field values for document #{1} ({2})'
                      .format(len(fields_to_values), document.pk, document.name))
 def cache_document_fields_for_doc_ids(_task: ExtendedTask, doc_ids: Set):
     for doc in Document.objects.filter(pk__in=doc_ids):
         log = CeleryTaskLogger(_task)
         field_value_cache.cache_generic_values(doc, log=log)
         suggested_values = field_detection.detect_and_cache_field_values_for_document(log, doc, False,
                                                                                       clear_old_values=False)
         field_value_cache.cache_field_values(doc, suggested_values, save=True, log=log)
Exemple #3
0
def detect_and_cache_field_values_for_document(log: ProcessLogger,
                                               document: Document,
                                               save: bool = True):
    """
    Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value.
    These two should always be consistent.
    :param log:
    :param document:
    :param save:
    :return:
    """

    save_cache = save
    save_detected = save
    if save and document.status and not document.status.is_active:
        log.info(
            'Forbidden storing detected field values for document with "completed"'
            ' status, document #{} ({})'.format(document.id, document.name))
        save_detected = False

    document_type = document.document_type  # type: DocumentType

    all_fields = document_type.fields \
        .all() \
        .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all()))

    all_fields = list(all_fields)

    fields_and_deps = [(f.code, f.get_depends_on_codes() or set())
                       for f in all_fields]
    sorted_codes = order_field_detection(fields_and_deps)
    all_fields_code_to_field = {f.code: f
                                for f in all_fields
                                }  # type: Dict[str, DocumentField]

    field_values_pre_cached = False

    res = list()
    for field_code in sorted_codes:
        field = all_fields_code_to_field[field_code]  # type: DocumentField
        field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[
            field.value_detection_strategy]  # type: FieldDetectionStrategy
        if not field_values_pre_cached \
                and field_detection_strategy.uses_cached_document_field_values(field):
            # Pre-cache Document.field_values structure for the usage in field detection strategies
            document.field_values = field_value_cache.cache_field_values(
                document, None, save=False)
            field_values_pre_cached = True

        detected_values = field_detection_strategy.detect_field_values(
            log, document, field)  # type: List[DetectedFieldValue]
        if detected_values:
            res.extend(detected_values)
            if save_detected:
                save_detected_values(document, field, detected_values)

    if save_cache:
        field_value_cache.cache_field_values(document, res, save=True, log=log)

    return res
Exemple #4
0
def detect_and_cache_field_values(log: ProcessLogger,
                                  doc: Document,
                                  field: DocumentField,
                                  save: bool = True) -> Optional[List[DetectedFieldValue]]:
    strategy = FIELD_DETECTION_STRATEGY_REGISTRY[
        field.value_detection_strategy] \
        if field.value_detection_strategy else STRATEGY_DISABLED
    if strategy.uses_cached_document_field_values(field):
        # Pre-cache Document.field_values structure for the usage in field detection strategies
        doc.field_values = field_value_cache.cache_field_values(doc, None, save=False)
    detected_values = strategy.detect_field_values(log, doc, field)
    if save:
        save_detected_values(doc, field, detected_values)
        field_value_cache.cache_field_values(doc, detected_values,
                                             save=True,
                                             log=log)
    return detected_values
def detect_and_cache_field_values(log: ProcessLogger,
                                  doc: Document,
                                  field: DocumentField,
                                  save: bool = True) -> Optional[List[DetectedFieldValue]]:
    strategy = FIELD_DETECTION_STRATEGY_REGISTRY[
        field.value_detection_strategy] \
        if field.value_detection_strategy else STRATEGY_DISABLED

    doc_field_values = None
    if strategy.uses_cached_document_field_values(field):
        doc_field_values = field_value_cache.cache_field_values(doc, None, save=False)
    detected_values = strategy.detect_field_values(log, doc, field, doc_field_values)
    if save:
        field_value_cache.cache_field_values(doc, detected_values,
                                             save=True,
                                             log=log)
        save_detected_values(doc, field, detected_values)
    return detected_values
def detect_and_cache_field_values_for_document(log: ProcessLogger,
                                               document: Document,
                                               save: bool = True,
                                               clear_old_values: bool = True,
                                               changed_by_user: User = None,
                                               system_fields_changed: bool = False,
                                               generic_fields_changed: bool = False,
                                               document_initial_load: bool = False,
                                               ignore_field_codes: Set[str] = None,
                                               updated_field_codes: List[str] = None):
    """
    Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value.
    These two should always be consistent.
    :param log:
    :param document:
    :param save:
    :param clear_old_values:
    :param changed_by_user
    :param system_fields_changed
    :param generic_fields_changed
    :param document_initial_load
    :param updated_field_codes - if set, we search for changed and dependent fields only
    :return:
    """

    save_cache = save
    save_detected = save
    if save and document.status and not document.status.is_active:
        log.info('Forbidden storing detected field values for document with "completed"'
                 ' status, document #{} ({})'.format(document.id, document.name))
        save_detected = False

    document_type = document.document_type  # type: DocumentType

    all_fields = document_type.fields \
        .all() \
        .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all()))

    all_fields = list(all_fields)

    fields_and_deps = [(f.code, f.get_depends_on_codes() or set()) for f in all_fields]
    required_fields = get_dependent_fields(fields_and_deps, set(updated_field_codes)) \
        if updated_field_codes else None

    sorted_codes = order_field_detection(fields_and_deps)
    all_fields_code_to_field = {f.code: f for f in all_fields}  # type: Dict[str, DocumentField]

    res = list()
    for field_code in sorted_codes:
        if ignore_field_codes and field_code in ignore_field_codes:
            continue
        if required_fields and field_code not in required_fields:
            continue

        field = all_fields_code_to_field[field_code]  # type: DocumentField
        field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[
            field.value_detection_strategy]  # type: FieldDetectionStrategy

        try:
            field_vals = field_value_cache.cache_field_values(document, None, save=False)
            detected_values = field_detection_strategy.detect_field_values(log,
                                                                           document,
                                                                           field,
                                                                           field_vals)  # type: List[DetectedFieldValue]
        except Exception as e:
            msg = '''Unable to detect field value. 
            Document type: {0} 
            Document: {1} 
            Field: {2}'''.format(document_type.code, document.pk, field.code)
            log.error(render_error(msg, e))
            raise e

        if save_detected and clear_old_values:
            # Delete previously detected values
            # to avoid accumulating garbage on each iteration.
            DocumentFieldValue.objects \
                .filter(document=document,
                        field=field,
                        removed_by_user=False,
                        created_by__isnull=True,
                        modified_by__isnull=True) \
                .exclude(field__value_detection_strategy=DocumentField.VD_DISABLED) \
                .delete()

        if detected_values:
            res.extend(detected_values)
            if save_detected:
                save_detected_values(document, field, detected_values)

    if save_cache:
        field_value_cache.cache_field_values(document, suggested_field_values=res,
                                             save=True, log=log,
                                             changed_by_user=changed_by_user,
                                             system_fields_changed=system_fields_changed,
                                             generic_fields_changed=generic_fields_changed,
                                             document_initial_load=document_initial_load)

    return res