def detect_and_cache_field_values_for_document(log: ProcessLogger, document: Document, save: bool = True): """ Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value. These two should always be consistent. :param log: :param document: :param save: :return: """ save_cache = save save_detected = save if save and document.status and not document.status.is_active: log.info( 'Forbidden storing detected field values for document with "completed"' ' status, document #{} ({})'.format(document.id, document.name)) save_detected = False document_type = document.document_type # type: DocumentType all_fields = document_type.fields \ .all() \ .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all())) all_fields = list(all_fields) fields_and_deps = [(f.code, f.get_depends_on_codes() or set()) for f in all_fields] sorted_codes = order_field_detection(fields_and_deps) all_fields_code_to_field = {f.code: f for f in all_fields } # type: Dict[str, DocumentField] field_values_pre_cached = False res = list() for field_code in sorted_codes: field = all_fields_code_to_field[field_code] # type: DocumentField field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] # type: FieldDetectionStrategy if not field_values_pre_cached \ and field_detection_strategy.uses_cached_document_field_values(field): # Pre-cache Document.field_values structure for the usage in field detection strategies document.field_values = field_value_cache.cache_field_values( document, None, save=False) field_values_pre_cached = True detected_values = field_detection_strategy.detect_field_values( log, document, field) # type: List[DetectedFieldValue] if detected_values: res.extend(detected_values) if save_detected: save_detected_values(document, field, detected_values) if save_cache: field_value_cache.cache_field_values(document, res, save=True, log=log) return res
def cache_field_values(doc: Document, suggested_field_values: Optional[ List[DetectedFieldValue]], save: bool = True) -> Dict[str, Any]: """ Loads DocumentFieldValue objects from DB, merges them to get python field values of their fields for the document, converts them to the sortable DB-aware form and saves them to Document.field_values. :param doc: :param save: :param suggested_field_values: :return: """ document_type = doc.document_type # type: DocumentType # TODO: get/save field value for specific field all_fields = list(document_type.fields.all()) fields_to_field_values = {f: None for f in all_fields} for fv in doc.documentfieldvalue_set.all(): if fv.removed_by_user: continue field = fv.field field_type = FIELD_TYPES_REGISTRY[fv.field.type] # type: FieldType fields_to_field_values[field] = field_type \ .merge_multi_python_values(fields_to_field_values.get(field), fv.python_value) field_uids_to_field_values_db = {} for f in all_fields: # type: DocumentField field_type = FIELD_TYPES_REGISTRY[f.type] # type: FieldType v = fields_to_field_values[f] field_uids_to_field_values_db[ f.uid] = field_type.merged_python_value_to_db(v) if suggested_field_values: field_codes_to_suggested_values = \ merge_detected_field_values_to_python_value(suggested_field_values) # type: Dict[str, Any] else: field_codes_to_suggested_values = None for f in all_fields: # type: DocumentField field_type = f.get_field_type() # type: FieldType if f.is_detectable(): suggested_field_uid = Document.get_suggested_field_uid(f.uid) if field_codes_to_suggested_values: suggested_value_db = field_type.merged_python_value_to_db( field_codes_to_suggested_values.get(f.code)) else: suggested_value_db = doc.field_values.get( suggested_field_uid) if doc.field_values else None field_uids_to_field_values_db[ suggested_field_uid] = suggested_value_db if save: doc.field_values = field_uids_to_field_values_db doc.save() return field_uids_to_field_values_db
def detect_and_cache_field_values(log: ProcessLogger, doc: Document, field: DocumentField, save: bool = True) -> Optional[List[DetectedFieldValue]]: strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] \ if field.value_detection_strategy else STRATEGY_DISABLED if strategy.uses_cached_document_field_values(field): # Pre-cache Document.field_values structure for the usage in field detection strategies doc.field_values = field_value_cache.cache_field_values(doc, None, save=False) detected_values = strategy.detect_field_values(log, doc, field) if save: save_detected_values(doc, field, detected_values) field_value_cache.cache_field_values(doc, detected_values, save=True, log=log) return detected_values
def cache_field_values(doc: Document, suggested_field_values: Optional[ List[DetectedFieldValue]], save: bool = True, log: ProcessLogger = None) -> Dict[str, Any]: """ Loads DocumentFieldValue objects from DB, merges them to get python field values of their fields for the document, converts them to the sortable DB-aware form and saves them to Document.field_values. :param doc: :param save: :param suggested_field_values: :param log :return: """ document_type = doc.document_type # type: DocumentType # TODO: get/save field value for specific field all_fields = list(document_type.fields.all()) related_info_field_uids = { f.uid for f in all_fields if f.is_related_info_field() } fields_to_field_values = {f: None for f in all_fields} for fv in doc.documentfieldvalue_set.all(): if fv.removed_by_user: continue field = fv.field field_type = FIELD_TYPES_REGISTRY[fv.field.type] # type: FieldType fields_to_field_values[field] = field_type \ .merge_multi_python_values(fields_to_field_values.get(field), fv.python_value) field_uids_to_field_values_db = {} for f in all_fields: # type: DocumentField field_type = FIELD_TYPES_REGISTRY[f.type] # type: FieldType v = fields_to_field_values[f] field_uids_to_field_values_db[ f.uid] = field_type.merged_python_value_to_db(v) if suggested_field_values: field_codes_to_suggested_values = \ merge_detected_field_values_to_python_value(suggested_field_values) # type: Dict[str, Any] else: field_codes_to_suggested_values = None for f in all_fields: # type: DocumentField field_type = f.get_field_type() # type: FieldType if f.is_detectable(): suggested_field_uid = Document.get_suggested_field_uid(f.uid) if field_codes_to_suggested_values: suggested_value_db = field_type.merged_python_value_to_db( field_codes_to_suggested_values.get(f.code)) else: suggested_value_db = doc.field_values.get( suggested_field_uid) if doc.field_values else None field_uids_to_field_values_db[ suggested_field_uid] = suggested_value_db if save: doc.field_values = { uid: len(value) if uid in related_info_field_uids and value is not None else value for uid, value in field_uids_to_field_values_db.items() } doc.save() events.on_document_change( events.DocumentChangedEvent( log=log, document=doc, system_fields_changed=False, generic_fields_changed=False, user_fields_changed=True, pre_detected_field_values=field_codes_to_suggested_values)) return field_uids_to_field_values_db
def detect_and_cache_field_values_for_document( log: ProcessLogger, document: Document, save: bool = True, clear_old_values: bool = True, changed_by_user: User = None, system_fields_changed: bool = False, generic_fields_changed: bool = False, document_initial_load: bool = False, ignore_field_codes: Set[str] = None): """ Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value. These two should always be consistent. :param log: :param document: :param save: :param clear_old_values: :param changed_by_user :param system_fields_changed :param generic_fields_changed :param document_initial_load :return: """ save_cache = save save_detected = save if save and document.status and not document.status.is_active: log.info( 'Forbidden storing detected field values for document with "completed"' ' status, document #{} ({})'.format(document.id, document.name)) save_detected = False document_type = document.document_type # type: DocumentType all_fields = document_type.fields \ .all() \ .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all())) all_fields = list(all_fields) fields_and_deps = [(f.code, f.get_depends_on_codes() or set()) for f in all_fields] sorted_codes = order_field_detection(fields_and_deps) all_fields_code_to_field = {f.code: f for f in all_fields } # type: Dict[str, DocumentField] field_values_pre_cached = False res = list() for field_code in sorted_codes: if ignore_field_codes and field_code in ignore_field_codes: continue field = all_fields_code_to_field[field_code] # type: DocumentField field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] # type: FieldDetectionStrategy if not field_values_pre_cached \ and field_detection_strategy.uses_cached_document_field_values(field): # Pre-cache Document.field_values structure for the usage in field detection strategies document.field_values = field_value_cache.cache_field_values( document, None, save=False) field_values_pre_cached = True try: detected_values = field_detection_strategy.detect_field_values( log, document, field) # type: List[DetectedFieldValue] except Exception as e: msg = '''Unable to detect field value. Document type: {0} Document: {1} Field: {2}'''.format(document_type.code, document.pk, field.code) log.error(render_error(msg, e)) raise e if save_detected and clear_old_values: # Delete previously detected values # to avoid accumulating garbage on each iteration. DocumentFieldValue.objects \ .filter(document=document, field=field, removed_by_user=False, created_by__isnull=True, modified_by__isnull=True) \ .delete() if detected_values: res.extend(detected_values) if save_detected: save_detected_values(document, field, detected_values) if save_cache: field_value_cache.cache_field_values( document, suggested_field_values=res, save=True, log=log, changed_by_user=changed_by_user, system_fields_changed=system_fields_changed, generic_fields_changed=generic_fields_changed, document_initial_load=document_initial_load) return res