def load_doc(task: ExtendedTask, document: Document, document_fields: Dict, run_detect_field_values: bool, filed_owners: dict = None): filed_owners = filed_owners if filed_owners else {} fields_to_values = LoadDocumentWithFields.load_field_values(task, document, document_fields, filed_owners) log = CeleryTaskLogger(task) with transaction.atomic(): new_document = document.pk is None document.save(force_insert=new_document) if not new_document: DocumentFieldValue.objects \ .filter(document=document, removed_by_user=False, created_by__isnull=True, modified_by__isnull=True) \ .delete() for field, values in fields_to_values.items(): field_detection.save_detected_values(document, field, values) if run_detect_field_values: field_detection.detect_and_cache_field_values_for_document(log, document, True) else: dfvs = field_detection.detect_and_cache_field_values_for_document(log, document, False) field_value_cache.cache_field_values(document, dfvs, save=True) task.log_info('Loaded {0} field values for document #{1} ({2})' .format(len(fields_to_values), document.pk, document.name))
def cache_document_fields_for_doc_ids(_task: ExtendedTask, doc_ids: Set): for doc in Document.objects.filter(pk__in=doc_ids): log = CeleryTaskLogger(_task) field_value_cache.cache_generic_values(doc, log=log) suggested_values = field_detection.detect_and_cache_field_values_for_document(log, doc, False, clear_old_values=False) field_value_cache.cache_field_values(doc, suggested_values, save=True, log=log)
def detect_and_cache_field_values_for_document(log: ProcessLogger, document: Document, save: bool = True): """ Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value. These two should always be consistent. :param log: :param document: :param save: :return: """ save_cache = save save_detected = save if save and document.status and not document.status.is_active: log.info( 'Forbidden storing detected field values for document with "completed"' ' status, document #{} ({})'.format(document.id, document.name)) save_detected = False document_type = document.document_type # type: DocumentType all_fields = document_type.fields \ .all() \ .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all())) all_fields = list(all_fields) fields_and_deps = [(f.code, f.get_depends_on_codes() or set()) for f in all_fields] sorted_codes = order_field_detection(fields_and_deps) all_fields_code_to_field = {f.code: f for f in all_fields } # type: Dict[str, DocumentField] field_values_pre_cached = False res = list() for field_code in sorted_codes: field = all_fields_code_to_field[field_code] # type: DocumentField field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] # type: FieldDetectionStrategy if not field_values_pre_cached \ and field_detection_strategy.uses_cached_document_field_values(field): # Pre-cache Document.field_values structure for the usage in field detection strategies document.field_values = field_value_cache.cache_field_values( document, None, save=False) field_values_pre_cached = True detected_values = field_detection_strategy.detect_field_values( log, document, field) # type: List[DetectedFieldValue] if detected_values: res.extend(detected_values) if save_detected: save_detected_values(document, field, detected_values) if save_cache: field_value_cache.cache_field_values(document, res, save=True, log=log) return res
def detect_and_cache_field_values(log: ProcessLogger, doc: Document, field: DocumentField, save: bool = True) -> Optional[List[DetectedFieldValue]]: strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] \ if field.value_detection_strategy else STRATEGY_DISABLED if strategy.uses_cached_document_field_values(field): # Pre-cache Document.field_values structure for the usage in field detection strategies doc.field_values = field_value_cache.cache_field_values(doc, None, save=False) detected_values = strategy.detect_field_values(log, doc, field) if save: save_detected_values(doc, field, detected_values) field_value_cache.cache_field_values(doc, detected_values, save=True, log=log) return detected_values
def detect_and_cache_field_values(log: ProcessLogger, doc: Document, field: DocumentField, save: bool = True) -> Optional[List[DetectedFieldValue]]: strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] \ if field.value_detection_strategy else STRATEGY_DISABLED doc_field_values = None if strategy.uses_cached_document_field_values(field): doc_field_values = field_value_cache.cache_field_values(doc, None, save=False) detected_values = strategy.detect_field_values(log, doc, field, doc_field_values) if save: field_value_cache.cache_field_values(doc, detected_values, save=True, log=log) save_detected_values(doc, field, detected_values) return detected_values
def detect_and_cache_field_values_for_document(log: ProcessLogger, document: Document, save: bool = True, clear_old_values: bool = True, changed_by_user: User = None, system_fields_changed: bool = False, generic_fields_changed: bool = False, document_initial_load: bool = False, ignore_field_codes: Set[str] = None, updated_field_codes: List[str] = None): """ Detects field values for a document and stores their DocumentFieldValue objects as well as Document.field_value. These two should always be consistent. :param log: :param document: :param save: :param clear_old_values: :param changed_by_user :param system_fields_changed :param generic_fields_changed :param document_initial_load :param updated_field_codes - if set, we search for changed and dependent fields only :return: """ save_cache = save save_detected = save if save and document.status and not document.status.is_active: log.info('Forbidden storing detected field values for document with "completed"' ' status, document #{} ({})'.format(document.id, document.name)) save_detected = False document_type = document.document_type # type: DocumentType all_fields = document_type.fields \ .all() \ .prefetch_related(Prefetch('depends_on_fields', queryset=DocumentField.objects.only('uid').all())) all_fields = list(all_fields) fields_and_deps = [(f.code, f.get_depends_on_codes() or set()) for f in all_fields] required_fields = get_dependent_fields(fields_and_deps, set(updated_field_codes)) \ if updated_field_codes else None sorted_codes = order_field_detection(fields_and_deps) all_fields_code_to_field = {f.code: f for f in all_fields} # type: Dict[str, DocumentField] res = list() for field_code in sorted_codes: if ignore_field_codes and field_code in ignore_field_codes: continue if required_fields and field_code not in required_fields: continue field = all_fields_code_to_field[field_code] # type: DocumentField field_detection_strategy = FIELD_DETECTION_STRATEGY_REGISTRY[ field.value_detection_strategy] # type: FieldDetectionStrategy try: field_vals = field_value_cache.cache_field_values(document, None, save=False) detected_values = field_detection_strategy.detect_field_values(log, document, field, field_vals) # type: List[DetectedFieldValue] except Exception as e: msg = '''Unable to detect field value. Document type: {0} Document: {1} Field: {2}'''.format(document_type.code, document.pk, field.code) log.error(render_error(msg, e)) raise e if save_detected and clear_old_values: # Delete previously detected values # to avoid accumulating garbage on each iteration. DocumentFieldValue.objects \ .filter(document=document, field=field, removed_by_user=False, created_by__isnull=True, modified_by__isnull=True) \ .exclude(field__value_detection_strategy=DocumentField.VD_DISABLED) \ .delete() if detected_values: res.extend(detected_values) if save_detected: save_detected_values(document, field, detected_values) if save_cache: field_value_cache.cache_field_values(document, suggested_field_values=res, save=True, log=log, changed_by_user=changed_by_user, system_fields_changed=system_fields_changed, generic_fields_changed=generic_fields_changed, document_initial_load=document_initial_load) return res