def calc_formula(self, field_code, type_code, formula, fields_to_values, form_field, formula_name='formula'): try: FormulaBasedFieldDetectionStrategy.calc_formula( field_code, type_code, formula, fields_to_values) except DocumentFieldFormulaError as ex: base_error_class = type(ex.base_error).__name__ base_error_msg = str(ex.base_error) lines = list() lines.append( 'Error caught while trying to execute {0} on example values:'. format(formula_name)) for field_name in ex.field_values: lines.append('{0}={1}'.format(field_name, ex.field_values[field_name])) lines.append("{0}. {1} in {2} of field '{3}' at line {4}".format( base_error_class, base_error_msg, formula_name, ex.field_code, ex.line_number)) self.add_error(form_field, lines) except Exception: trace = traceback.format_exc() raise forms.ValidationError( 'Tried to eval {0} on example values:\n{1}\nGot error:\n{2}'. format(formula_name, str(fields_to_values), trace))
def detect_field_values(cls, log: ProcessLogger, doc: Document, field: DocumentField) -> List[DetectedFieldValue]: try: return super().detect_field_values(log, doc, field) except ClassifierModel.DoesNotExist: return FormulaBasedFieldDetectionStrategy.detect_field_values( log, doc, field)
def clean(self): field_code = self.cleaned_data.get('code') formula = self.cleaned_data.get('formula') type_code = self.cleaned_data.get('type') depends_on_fields = self.cleaned_data.get('depends_on_fields') or [] depends_on_fields = list(depends_on_fields) classifier_init_script = self.cleaned_data['classifier_init_script'] stop_words = self.cleaned_data['stop_words'] try: stop_words = compile_stop_words(stop_words) _v = detect_value_with_stop_words(stop_words, 'dummy text') except Exception as err: self.add_error('stop_words', str(err)) try: FieldBasedMLOnlyFieldDetectionStrategy.init_classifier_impl( field_code, classifier_init_script) except ScriptError as err: self.add_error('classifier_init_script', str(err).split('\n')) fields_and_deps = { self.cleaned_data.get('code') or 'xxx': {f.code for f in depends_on_fields} } fields_and_deps = self._extract_field_and_deps(depends_on_fields, fields_and_deps) fields_and_deps = [(code, deps) for code, deps in fields_and_deps.items()] try: order_field_detection(fields_and_deps) except ValueError as ve: self.add_error(None, str(ve)) fields_to_values = { field.code: FIELD_TYPES_REGISTRY[field.type].example_python_value(field) for field in depends_on_fields } python_coded_field_code = self.cleaned_data.get('python_coded_field') if python_coded_field_code: python_coded_field = PYTHON_CODED_FIELDS_REGISTRY.get( python_coded_field_code) if not python_coded_field: self.add_error( 'python_coded_field', 'Unknown Python-coded field: {0}'.format( python_coded_field_code)) else: if type_code != python_coded_field.type: self.add_error( 'type', 'Python-coded field {0} is of type {1} but {2} is specified' ' as the field type'.format(python_coded_field.title, python_coded_field.type, type_code)) if not formula or not formula.strip() or not type_code: return self.cleaned_data try: FormulaBasedFieldDetectionStrategy.calc_formula( field_code, type_code, formula, fields_to_values) except DocumentFieldFormulaError as ex: base_error_class = type(ex.base_error).__name__ base_error_msg = str(ex.base_error) lines = list() lines.append( "Error caught while trying to execute formula on example values:" ) for field_name in ex.field_values: lines.append('{0}={1}'.format(field_name, ex.field_values[field_name])) lines.append( "{0}. {1} in formula of field '{2}' at line {3}".format( base_error_class, base_error_msg, ex.field_code, ex.line_number)) self.add_error('formula', lines) except Exception: trace = traceback.format_exc() raise forms.ValidationError( 'Tried to eval formula on example values:\n{0}\nGot error:\n{1}' .format(str(fields_to_values), trace)) return self.cleaned_data
from apps.document.fields_detection.regexps_and_text_based_ml_field_detection import \ RegexpsAndTextBasedMLFieldDetectionStrategy, TextBasedMLFieldDetectionStrategy from apps.document.fields_detection.regexps_field_detection import RegexpsOnlyFieldDetectionStrategy, \ FieldBasedRegexpsDetectionStrategy from apps.document.fields_processing import field_value_cache from apps.document.fields_processing.field_processing_utils import merge_detected_field_values_to_python_value, \ order_field_detection from apps.document.models import ClassifierModel from apps.document.models import Document, DocumentType, DocumentField STRATEGY_DISABLED = DisabledFieldDetectionStrategy() _FIELD_DETECTION_STRATEGIES = [ FieldBasedMLOnlyFieldDetectionStrategy(), FormulaAndFieldBasedMLFieldDetectionStrategy(), FormulaBasedFieldDetectionStrategy(), RegexpsOnlyFieldDetectionStrategy(), RegexpsAndTextBasedMLFieldDetectionStrategy(), TextBasedMLFieldDetectionStrategy(), PythonCodedFieldDetectionStrategy(), FieldBasedRegexpsDetectionStrategy(), STRATEGY_DISABLED ] FIELD_DETECTION_STRATEGY_REGISTRY = { st.code: st for st in _FIELD_DETECTION_STRATEGIES } def train_document_field_detector_model( log: ProcessLogger,