def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base)
     self.currency_column = escape_column_name(self.field_column_name_base + '_cur')
     self.amount_column = escape_column_name(self.field_column_name_base + '_amt')
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base)
     self.numerator = escape_column_name(self.field_column_name_base + '_num')
     self.denominator = escape_column_name(self.field_column_name_base + '_den')
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base)
     self.column = escape_column_name(self.field_column_name_base)
     self.text_column = escape_column_name(self.field_column_name_base) + '_txt'
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base)
     self.document_ids_column = escape_column_name(self.field_column_name_base + '_ids')
     self.document_links_column = escape_column_name(self.field_column_name_base + '_lnks')
 def __init__(self,
              field_code: str,
              field_title: str,
              table_name: str,
              default_value: str = None,
              field_column_name_base: str = None) -> None:
     super().__init__(field_code, field_title, table_name, default_value,
                      field_column_name_base)
     self.output_column = escape_column_name(self.field_column_name_base)
     self.text_search_column = escape_column_name(
         self.field_column_name_base + '_text_search')
Beispiel #6
0
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None,
              is_suggested: bool = False) -> None:
     super().__init__(field_code, field_type, field_title, table_name,
                      default_value, field_column_name_base, is_suggested)
     self.numerator = escape_column_name(self.field_column_name_base +
                                         '_num')
     self.consequent = escape_column_name(self.field_column_name_base +
                                          '_con')
Beispiel #7
0
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value: str = None,
              field_column_name_base: str = None,
              is_suggested: bool = False,
              output_column_char_limit: int = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name,
                      default_value, field_column_name_base, is_suggested)
     self.output_column = escape_column_name(self.field_column_name_base)
     self.text_search_column = escape_column_name(
         self.field_column_name_base + '_text_search')
     self.output_column_char_limit = output_column_char_limit
Beispiel #8
0
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value: bool = None,
              field_column_name_base: str = None,
              is_suggested: bool = False) -> None:
     super().__init__(field_code, field_type, field_title, table_name,
                      default_value, field_column_name_base, is_suggested)
     self.column = escape_column_name(self.field_column_name_base)
Beispiel #9
0
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value: str = None,
              field_column_name_base: str = None,
              column_output_char_limit: int = None) -> None:
     super().__init__(field_code, field_type, field_title, table_name,
                      default_value, field_column_name_base)
     self.column = escape_column_name(self.field_column_name_base)
     self.column_output_char_limit = column_output_char_limit
 def __init__(self,
              field_code: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__()
     self.field_code = field_code
     self.field_column_name_base = field_column_name_base or escape_column_name(
         field_code)
     self.field_title = field_title
     self.table_name = table_name
     self.default_value = default_value
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value: str = None,
              field_column_name_base: str = None,
              column_output_char_limit: int = None,
              explicit_text_conversion: bool = False) -> None:
     super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base)
     self.column = escape_column_name(self.field_column_name_base)
     self.column_output_char_limit = column_output_char_limit
     self.explicit_text_conversion = explicit_text_conversion
 def __init__(self,
              field_code: str,
              field_type: Optional[str] = None,
              field_title: Optional[str] = None,
              table_name: Optional[str] = None,
              default_value=None,
              field_column_name_base: str = None) -> None:
     super().__init__()
     self.field_code = field_code
     self.field_type = field_type
     self.field_column_name_base = field_column_name_base or escape_column_name(field_code)
     self.field_title = field_title or field_code
     self.table_name = table_name
     self.default_value = default_value
     self.is_annotation = False
Beispiel #13
0
 def __init__(self,
              field_code: str,
              field_type: str,
              field_title: str,
              table_name: str,
              default_value=None,
              field_column_name_base: str = None,
              is_suggested: bool = False) -> None:
     super().__init__()
     self.field_code = field_code
     self.field_type = field_type
     self.field_column_name_base = field_column_name_base or escape_column_name(
         field_code)
     self.field_title = field_title
     self.table_name = table_name
     self.default_value = default_value
     self.is_suggested = is_suggested
def build_field_handlers(document_type: DocumentType, table_name: str = None,
                         include_generic_fields: bool = True,
                         include_user_fields: bool = True,
                         include_suggested_fields: bool = True,
                         exclude_hidden_always_fields: bool = False) \
        -> List[field_handlers.FieldHandler]:
    res = list()  # type: List[field_handlers.FieldHandler]

    if not table_name:
        table_name = doc_fields_table_name(document_type.code)

    res.extend(_build_system_field_handlers(table_name))

    if include_generic_fields:
        res.extend(_build_generic_field_handlers(table_name))

    # Prevent repeating column names.
    # Lets assume generic field codes are unique as well as system field codes.
    # Assigning 1 to their usage count for further taking it into account when building
    # column name bases for the user fields.
    field_code_use_counts = {
        field_handler.field_column_name_base: 1
        for field_handler in res
    }

    if include_user_fields:
        doc_field_qr = DocumentField.objects.filter(
            document_type=document_type)
        if exclude_hidden_always_fields:
            doc_field_qr = doc_field_qr.filter(hidden_always=False)

        for field in doc_field_qr.order_by('order',
                                           'code'):  # type: DocumentField
            field_type = field.get_field_type()  # type: field_types.FieldType

            # Escape field code and take max N chars for using as the column name base
            field_code_escaped = escape_column_name(
                field.code)[:DOCUMENT_FIELD_CODE_MAX_LEN]

            # If we already have this escaped field code in the dict then
            # attach an index to it to avoid repeating of the column names.
            field_code_use_count = field_code_use_counts.get(
                field_code_escaped)
            if field_code_use_count is not None:
                field_code_use_counts[
                    field_code_escaped] = field_code_use_count + 1
                counter_str = str(field_code_use_count)

                # make next repeated column name to be column1, column2, ...
                # make it fitting into N chars by cutting the field code on the required number of chars to fit the num
                field_code_escaped = field_code_escaped[:DOCUMENT_FIELD_CODE_MAX_LEN - len(counter_str) - 1] \
                                     + '_' + counter_str
            else:
                field_code_use_counts[field_code_escaped] = 1

            field_handler_class = FIELD_DB_SUPPORT_REGISTRY[field_type.code]
            field_handler = field_handler_class(
                field.code,
                field_type.code,
                field.title,
                table_name,
                field.default_value,
                field_column_name_base=field_code_escaped,
                is_suggested=False)
            res.append(field_handler)
            if include_suggested_fields and field.is_detectable(
            ) and not field.read_only:
                field_code_suggested = field.code + '_suggested'
                field_handler_suggested = field_handler_class(
                    field_code_suggested,
                    field_type.code,
                    field.title + ': Suggested',
                    table_name,
                    field.default_value,
                    field_column_name_base=field_code_escaped + '_sug',
                    is_suggested=True)
                res.append(field_handler_suggested)

    return res
Beispiel #15
0
    def process(self, **kwargs):

        with transaction.atomic():

            csv_log = list()  # type: List[Tuple[str, str, str]]
            for document_type in DocumentType.objects.all():  # type: DocumentType
                changed_field_codes = dict()  # type: Dict[str, str]
                field_code_use_counts = dict()  # type: Dict[str, int]

                for code in DocumentField.objects \
                        .filter(document_type=document_type) \
                        .order_by('order', 'code') \
                        .values_list('code', flat=True):
                    field_code_use_counts[code] = 1
                    m = self.RE_FIELD_CODE_NUM.fullmatch(code)
                    if m:
                        base = m.group(1)
                        num = int(m.group(2))
                        old_num = field_code_use_counts.get(base) or 0
                        field_code_use_counts[base] = max(old_num, num)

                for field in DocumentField.objects \
                        .filter(document_type=document_type) \
                        .order_by('order', 'code'):  # type: DocumentField
                    field_code_escaped = escape_column_name(field.code)[:DOCUMENT_FIELD_CODE_MAX_LEN]

                    if field.code == field_code_escaped:
                        field_code_use_counts[field.code] = (field_code_use_counts.get(field.code) or 0) + 1
                        long_code = DocumentField.get_long_code(field, document_type)
                        if field.long_code != long_code:
                            self.log_info('Updating field long code {0} to {1}'
                                          .format(field.long_code, long_code))
                            field.long_code = long_code
                            field.save(update_fields={'long_code'})
                    else:
                        field_code_use_count = field_code_use_counts.get(field_code_escaped)
                        if field_code_use_count is not None:
                            field_code_use_counts[field_code_escaped] = field_code_use_count + 1
                            counter_str = str(field_code_use_count)

                            # make next repeated column name to be column1, column2, ...
                            # make it fitting into N chars by cutting the field code on the required
                            # number of chars to fit the num
                            field_code_escaped = field_code_escaped[:DOCUMENT_FIELD_CODE_MAX_LEN - len(counter_str) - 1] \
                                                 + '_' + counter_str
                        else:
                            field_code_use_counts[field_code_escaped] \
                                = (field_code_use_counts.get(field_code_escaped) or 0) + 1

                        self.log_info('Updating field {0}.{1} to {2}'
                                      .format(document_type.code, field.code, field_code_escaped))
                        changed_field_codes[field.code] = field_code_escaped
                        csv_log.append((document_type.code, field.code, field_code_escaped))
                        field.code = field_code_escaped

                        field.long_code = DocumentField.get_long_code(field, document_type)

                        field.save(update_fields={'code', 'long_code'})

                    hide_until_js = jiphy.to.javascript(field.hide_until_python) if field.hide_until_python else ''
                    if hide_until_js != field.hide_until_js:
                        field.hide_until_js = hide_until_js
                        self.log_info('Updating hide_until_js for field {0}.{1}'
                                      .format(document_type.code, field.code))
                        field.save(update_fields={'hide_until_js'})

                if len(changed_field_codes) > 0 and document_type.field_code_aliases:
                    updated_aliases = {k: changed_field_codes.get(v) or v
                                       for k, v in document_type.field_code_aliases.items()}
                    self.log_info('Updating field code aliases of document type {0}"\n{1}'
                                  .format(document_type.code, updated_aliases))
                    document_type.field_code_aliases = updated_aliases
                    document_type.save(update_fields={'field_code_aliases'})

        output = io.StringIO()
        writer = csv.writer(output)
        writer.writerow(('Document Type', 'Old Field Code', 'New Field Code'))
        for r in csv_log:
            writer.writerow(r)
        self.log_info('\n\n\n------------------\n'
                      'Changed fields csv:\n' + output.getvalue() + '\n------------------')
Beispiel #16
0
    def clean(self):
        field_code = self.cleaned_data.get('code')
        formula = self.cleaned_data.get('formula')
        type_code = self.cleaned_data.get('type')
        depends_on_fields = self.cleaned_data.get('depends_on_fields') or []
        document_type = self.cleaned_data.get('document_type')
        depends_on_fields = list(depends_on_fields)
        classifier_init_script = self.cleaned_data['classifier_init_script']
        stop_words = self.cleaned_data['stop_words']
        hide_until_python = self.cleaned_data['hide_until_python']
        default_value = self.cleaned_data['default_value']

        try:
            stop_words = compile_stop_words(stop_words)
            detect_value_with_stop_words(stop_words, 'dummy text')
        except Exception as err:
            self.add_error('stop_words', str(err))

        try:
            FieldBasedMLOnlyFieldDetectionStrategy.init_classifier_impl(
                field_code, classifier_init_script)
        except ScriptError as err:
            self.add_error('classifier_init_script', str(err).split('\n'))

        fields_and_deps = {
            self.cleaned_data.get('code') or 'xxx':
            {f.code
             for f in depends_on_fields}
        }
        fields_and_deps = self._extract_field_and_deps(depends_on_fields,
                                                       fields_and_deps)
        fields_and_deps = [(code, deps)
                           for code, deps in fields_and_deps.items()]
        try:
            order_field_detection(fields_and_deps)
        except ValueError as ve:
            self.add_error(None, str(ve))

        fields_to_values = {
            field.code:
            FIELD_TYPES_REGISTRY[field.type].example_python_value(field)
            for field in depends_on_fields
        }

        python_coded_field_code = self.cleaned_data.get('python_coded_field')
        if python_coded_field_code:
            python_coded_field = PYTHON_CODED_FIELDS_REGISTRY.get(
                python_coded_field_code)
            if not python_coded_field:
                self.add_error(
                    'python_coded_field',
                    'Unknown Python-coded field: {0}'.format(
                        python_coded_field_code))
            else:
                if type_code != python_coded_field.type:
                    self.add_error(
                        'type',
                        'Python-coded field {0} is of type {1} but {2} is specified'
                        ' as the field type'.format(python_coded_field.title,
                                                    python_coded_field.type,
                                                    type_code))

        if formula and formula.strip() and type_code:
            self.calc_formula(field_code, type_code, formula, fields_to_values,
                              'formula')

        hide_until_python = hide_until_python.strip(
        ) if hide_until_python else None
        if hide_until_python:
            fields_to_values = {
                field.code:
                FIELD_TYPES_REGISTRY[field.type].example_python_value(field)
                for field in list(document_type.fields.all())
            }
            code = self.instance.code if self.instance else None
            if code and code in fields_to_values:
                del fields_to_values[code]
            if type_code:
                fields_to_values[field_code] = FIELD_TYPES_REGISTRY[type_code] \
                    .example_python_value(DocumentField(**self.cleaned_data))

            self.calc_formula(field_code,
                              None,
                              hide_until_python,
                              fields_to_values,
                              'hide_until_python',
                              formula_name='hide until python')

        if default_value and type_code == RelatedInfoField.code:
            self.add_error('default_value',
                           'Related info field can\'t have default value')

        try:
            DocumentField.compile_value_regexp(
                self.cleaned_data['value_regexp'])
        except Exception as exc:
            self.add_error('value_regexp', exc)

        # Ensure field code is not too long for Postgres column names
        # We use field codes to build column names for Postgres tables.
        # Max length of column name is 63. We escape them to snake case and sometimes add postfixes to them.
        # Lets assume that we should have max 23 chars for postfixes and max 40 chars for the field code.
        field_code_escaped = escape_column_name(field_code)
        if len(field_code_escaped) > self.MAX_ESCAPED_FIELD_CODE_LEN:
            self.add_error(
                'code',
                '''Field code is too long. Field codes are used to build column names of DB tables.
Escaped version should have max {max_length} chars but it is {length} chars long. Current escaped version of the 
specified field code is: "{field_code_escaped}"'''.format(
                    max_length=self.MAX_ESCAPED_FIELD_CODE_LEN,
                    length=len(field_code_escaped),
                    field_code_escaped=field_code_escaped))
        if not self.R_AZ.search(field_code_escaped):
            self.add_error(
                'code',
                '''Field codes are used to build column names of DB tables. Escaped version of 
the specified field code should contain at least one latin letter. Current escaped version of the specified field 
code is: "{0}"'''.format(field_code_escaped))

        if self.initial and 'type' in self.changed_data:
            wrong_field_detector_pks = []
            for field_detector in DocumentFieldDetector.objects.filter(
                    field=self.instance):
                try:
                    DocumentFieldDetector.validate_detected_value(
                        type_code, field_detector.detected_value)
                except Exception:
                    wrong_field_detector_pks.append('#' + field_detector.pk)
            if wrong_field_detector_pks:
                self.add_error(
                    'type',
                    'Detected value is not allowed for this field type, please unset detected value '
                    'for this field detectors: {0}'.format(
                        ', '.join(wrong_field_detector_pks)))

        return self.cleaned_data
Beispiel #17
0
 def __init__(self, field_code: str, field_title: str,
              table_name: str) -> None:
     super().__init__(field_code, field_title, table_name)
     self.output_column = escape_column_name(field_code)
     self.text_search_column = escape_column_name(field_code +
                                                  '_text_search')
Beispiel #18
0
 def __init__(self, field_code: str, field_title: str,
              table_name: str) -> None:
     super().__init__(field_code, field_title, table_name)
     self.currency_column = escape_column_name(field_code + '_currency')
     self.amount_column = escape_column_name(field_code + '_amount')
def doc_fields_table_name(document_type_code: str) -> str:
    return escape_column_name(TABLE_NAME_PREFIX + document_type_code)
Beispiel #20
0
 def __init__(self, field_code: str, field_title: str,
              table_name: str) -> None:
     super().__init__(field_code, field_title, table_name)
     self.column = escape_column_name(field_code)