Ejemplo n.º 1
0
    def clean(self):
        field_code = self.cleaned_data.get('code')
        formula = self.cleaned_data.get('formula')
        type_code = self.cleaned_data.get('type')
        depends_on_fields = self.cleaned_data.get('depends_on_fields') or []
        document_type = self.cleaned_data.get('document_type')
        depends_on_fields = list(depends_on_fields)
        classifier_init_script = self.cleaned_data['classifier_init_script']
        stop_words = self.cleaned_data.get('stop_words')
        hide_until_python = self.cleaned_data['hide_until_python']
        default_value = self.cleaned_data.get('default_value')
        unsure_choice_value = self.cleaned_data[self.UNSURE_CHOICE_VALUE]
        choice_values = DocumentField.parse_choice_values(self.cleaned_data['choices'])
        unsure_thresholds_by_value = self.cleaned_data.get(self.UNSURE_THRESHOLDS)

        try:
            field_type = FIELD_TYPE_REGISTRY[type_code]
        except KeyError:
            self.add_error('type', 'Unknown field type "{}".'.format(type_code))

        if unsure_choice_value and (not choice_values or unsure_choice_value not in choice_values):
            self.add_error(self.UNSURE_CHOICE_VALUE, '"Unsure choice value" must be listed in the choice values.')

        if unsure_thresholds_by_value is not None:
            if not hasattr(unsure_thresholds_by_value, 'items'):
                self.add_error(self.UNSURE_THRESHOLDS, 'Must be a dict of choice values to float thresholds [0..1]')
            else:
                if not choice_values:
                    self.add_error(self.UNSURE_THRESHOLDS, '"Unsure" thresholds are set but choice values are not.')
                if not unsure_choice_value:
                    self.add_error(self.UNSURE_THRESHOLDS, '"Unsure" thresholds are set but '
                                                           '"unsure" choice value is not.')

                if choice_values and unsure_choice_value:
                    for k, v in unsure_thresholds_by_value.items():
                        if k == unsure_choice_value:
                            self.add_error(self.UNSURE_THRESHOLDS, 'Please set thresholds only for "sure" choice '
                                                                   'values and not for ' + k)
                        elif k not in choice_values:
                            self.add_error(self.UNSURE_THRESHOLDS, 'Value not in choice values: ' + k)
                        if (not isinstance(v, int) and not isinstance(v, float)) or v < 0 or v > 1:
                            self.add_error(self.UNSURE_THRESHOLDS, 'Threshold should be a float value between 0 and 1: '
                                           + k)

        try:
            stop_words = compile_stop_words(stop_words)
            detect_value_with_stop_words(stop_words, 'dummy text')
        except Exception as err:
            self.add_error('stop_words', str(err))

        try:
            init_classifier_impl(field_code, classifier_init_script)
        except ScriptError as err:
            self.add_error('classifier_init_script', str(err).split('\n'))

        fields_and_deps = {self.cleaned_data.get('code') or 'xxx': {f.code for f in depends_on_fields}}
        fields_and_deps = self._extract_field_and_deps(depends_on_fields, fields_and_deps)
        fields_and_deps = [(code, deps) for code, deps in fields_and_deps.items()]
        try:
            order_field_detection(fields_and_deps)
        except ValueError as ve:
            self.add_error(None, str(ve))

        fields_to_values = {field.code: FIELD_TYPE_REGISTRY[field.type].example_python_value(field)
                            for field in depends_on_fields}

        python_coded_field_code = self.cleaned_data.get('python_coded_field')
        if python_coded_field_code:
            python_coded_field = PYTHON_CODED_FIELDS_REGISTRY.get(python_coded_field_code)
            if not python_coded_field:
                self.add_error('python_coded_field', 'Unknown Python-coded field: {0}'.format(python_coded_field_code))
            else:
                if type_code != python_coded_field.type:
                    self.add_error('type', 'Python-coded field {0} is of type {1} but {2} is specified'
                                           ' as the field type'.format(python_coded_field.title,
                                                                       python_coded_field.type,
                                                                       type_code))

        if formula and formula.strip() and type_code:
            self.calc_formula(field_code, type_code, formula, fields_to_values, 'formula')

        hide_until_python = hide_until_python.strip() if hide_until_python else None
        if hide_until_python:
            fields_to_values = {field.code: FIELD_TYPE_REGISTRY[field.type].example_python_value(field)
                                for field in list(document_type.fields.all())}
            if field_code and field_code in fields_to_values:
                del fields_to_values[field_code]
            if type_code:
                fields_to_values[field_code] = FIELD_TYPE_REGISTRY[type_code] \
                    .example_python_value(self.instance)

            self.calc_formula(field_code,
                              None,
                              hide_until_python,
                              fields_to_values,
                              'hide_until_python',
                              formula_name='hide until python')

        if default_value is not None:
            if type_code == RelatedInfoField.code:
                self.add_error('default_value', 'Related info field can\'t have default value')
            elif field_type.extract_from_possible_value(self.instance, default_value) != default_value:
                self.add_error('default_value', 'Wrong value for type {0}. Example: {1}'
                               .format(type_code, json.dumps(field_type.example_python_value(self.instance))))

        try:
            DocumentField.compile_value_regexp(self.cleaned_data['value_regexp'])
        except Exception as exc:
            self.add_error('value_regexp', exc)

        self.validate_field_code()

        if self.initial and 'type' in self.changed_data:
            wrong_field_detector_pks = []
            for field_detector in DocumentFieldDetector.objects.filter(field=self.instance):
                try:
                    DocumentFieldDetector.validate_detected_value(type_code, field_detector.detected_value)
                except Exception:
                    wrong_field_detector_pks.append('#' + field_detector.pk)
            if wrong_field_detector_pks:
                self.add_error('type', 'Detected value is not allowed for this field type, please unset detected value '
                                       'for this field detectors: {0}'.format(', '.join(wrong_field_detector_pks)))

        return self.cleaned_data
Ejemplo n.º 2
0
    def clean(self):
        field_code = self.cleaned_data.get('code')
        formula = self.cleaned_data.get('formula')
        type_code = self.cleaned_data.get('type')
        depends_on_fields = self.cleaned_data.get('depends_on_fields') or []
        document_type = self.cleaned_data.get('document_type')
        depends_on_fields = list(depends_on_fields)
        classifier_init_script = self.cleaned_data['classifier_init_script']
        stop_words = self.cleaned_data['stop_words']
        hide_until_python = self.cleaned_data['hide_until_python']
        default_value = self.cleaned_data['default_value']

        try:
            stop_words = compile_stop_words(stop_words)
            detect_value_with_stop_words(stop_words, 'dummy text')
        except Exception as err:
            self.add_error('stop_words', str(err))

        try:
            FieldBasedMLOnlyFieldDetectionStrategy.init_classifier_impl(
                field_code, classifier_init_script)
        except ScriptError as err:
            self.add_error('classifier_init_script', str(err).split('\n'))

        fields_and_deps = {
            self.cleaned_data.get('code') or 'xxx':
            {f.code
             for f in depends_on_fields}
        }
        fields_and_deps = self._extract_field_and_deps(depends_on_fields,
                                                       fields_and_deps)
        fields_and_deps = [(code, deps)
                           for code, deps in fields_and_deps.items()]
        try:
            order_field_detection(fields_and_deps)
        except ValueError as ve:
            self.add_error(None, str(ve))

        fields_to_values = {
            field.code:
            FIELD_TYPES_REGISTRY[field.type].example_python_value(field)
            for field in depends_on_fields
        }

        python_coded_field_code = self.cleaned_data.get('python_coded_field')
        if python_coded_field_code:
            python_coded_field = PYTHON_CODED_FIELDS_REGISTRY.get(
                python_coded_field_code)
            if not python_coded_field:
                self.add_error(
                    'python_coded_field',
                    'Unknown Python-coded field: {0}'.format(
                        python_coded_field_code))
            else:
                if type_code != python_coded_field.type:
                    self.add_error(
                        'type',
                        'Python-coded field {0} is of type {1} but {2} is specified'
                        ' as the field type'.format(python_coded_field.title,
                                                    python_coded_field.type,
                                                    type_code))

        if formula and formula.strip() and type_code:
            self.calc_formula(field_code, type_code, formula, fields_to_values,
                              'formula')

        hide_until_python = hide_until_python.strip(
        ) if hide_until_python else None
        if hide_until_python:
            fields_to_values = {
                field.code:
                FIELD_TYPES_REGISTRY[field.type].example_python_value(field)
                for field in list(document_type.fields.all())
            }
            code = self.instance.code if self.instance else None
            if code and code in fields_to_values:
                del fields_to_values[code]
            if type_code:
                fields_to_values[field_code] = FIELD_TYPES_REGISTRY[type_code] \
                    .example_python_value(DocumentField(**self.cleaned_data))

            self.calc_formula(field_code,
                              None,
                              hide_until_python,
                              fields_to_values,
                              'hide_until_python',
                              formula_name='hide until python')

        if default_value and type_code == RelatedInfoField.code:
            self.add_error('default_value',
                           'Related info field can\'t have default value')

        try:
            DocumentField.compile_value_regexp(
                self.cleaned_data['value_regexp'])
        except Exception as exc:
            self.add_error('value_regexp', exc)

        # Ensure field code is not too long for Postgres column names
        # We use field codes to build column names for Postgres tables.
        # Max length of column name is 63. We escape them to snake case and sometimes add postfixes to them.
        # Lets assume that we should have max 23 chars for postfixes and max 40 chars for the field code.
        field_code_escaped = escape_column_name(field_code)
        if len(field_code_escaped) > self.MAX_ESCAPED_FIELD_CODE_LEN:
            self.add_error(
                'code',
                '''Field code is too long. Field codes are used to build column names of DB tables.
Escaped version should have max {max_length} chars but it is {length} chars long. Current escaped version of the 
specified field code is: "{field_code_escaped}"'''.format(
                    max_length=self.MAX_ESCAPED_FIELD_CODE_LEN,
                    length=len(field_code_escaped),
                    field_code_escaped=field_code_escaped))
        if not self.R_AZ.search(field_code_escaped):
            self.add_error(
                'code',
                '''Field codes are used to build column names of DB tables. Escaped version of 
the specified field code should contain at least one latin letter. Current escaped version of the specified field 
code is: "{0}"'''.format(field_code_escaped))

        if self.initial and 'type' in self.changed_data:
            wrong_field_detector_pks = []
            for field_detector in DocumentFieldDetector.objects.filter(
                    field=self.instance):
                try:
                    DocumentFieldDetector.validate_detected_value(
                        type_code, field_detector.detected_value)
                except Exception:
                    wrong_field_detector_pks.append('#' + field_detector.pk)
            if wrong_field_detector_pks:
                self.add_error(
                    'type',
                    'Detected value is not allowed for this field type, please unset detected value '
                    'for this field detectors: {0}'.format(
                        ', '.join(wrong_field_detector_pks)))

        return self.cleaned_data