def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.currency_column = escape_column_name(self.field_column_name_base + '_cur') self.amount_column = escape_column_name(self.field_column_name_base + '_amt')
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.numerator = escape_column_name(self.field_column_name_base + '_num') self.denominator = escape_column_name(self.field_column_name_base + '_den')
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.column = escape_column_name(self.field_column_name_base) self.text_column = escape_column_name(self.field_column_name_base) + '_txt'
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.document_ids_column = escape_column_name(self.field_column_name_base + '_ids') self.document_links_column = escape_column_name(self.field_column_name_base + '_lnks')
def __init__(self, field_code: str, field_title: str, table_name: str, default_value: str = None, field_column_name_base: str = None) -> None: super().__init__(field_code, field_title, table_name, default_value, field_column_name_base) self.output_column = escape_column_name(self.field_column_name_base) self.text_search_column = escape_column_name( self.field_column_name_base + '_text_search')
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None, is_suggested: bool = False) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base, is_suggested) self.numerator = escape_column_name(self.field_column_name_base + '_num') self.consequent = escape_column_name(self.field_column_name_base + '_con')
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value: str = None, field_column_name_base: str = None, is_suggested: bool = False, output_column_char_limit: int = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base, is_suggested) self.output_column = escape_column_name(self.field_column_name_base) self.text_search_column = escape_column_name( self.field_column_name_base + '_text_search') self.output_column_char_limit = output_column_char_limit
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value: bool = None, field_column_name_base: str = None, is_suggested: bool = False) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base, is_suggested) self.column = escape_column_name(self.field_column_name_base)
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value: str = None, field_column_name_base: str = None, column_output_char_limit: int = None) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.column = escape_column_name(self.field_column_name_base) self.column_output_char_limit = column_output_char_limit
def __init__(self, field_code: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None) -> None: super().__init__() self.field_code = field_code self.field_column_name_base = field_column_name_base or escape_column_name( field_code) self.field_title = field_title self.table_name = table_name self.default_value = default_value
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value: str = None, field_column_name_base: str = None, column_output_char_limit: int = None, explicit_text_conversion: bool = False) -> None: super().__init__(field_code, field_type, field_title, table_name, default_value, field_column_name_base) self.column = escape_column_name(self.field_column_name_base) self.column_output_char_limit = column_output_char_limit self.explicit_text_conversion = explicit_text_conversion
def __init__(self, field_code: str, field_type: Optional[str] = None, field_title: Optional[str] = None, table_name: Optional[str] = None, default_value=None, field_column_name_base: str = None) -> None: super().__init__() self.field_code = field_code self.field_type = field_type self.field_column_name_base = field_column_name_base or escape_column_name(field_code) self.field_title = field_title or field_code self.table_name = table_name self.default_value = default_value self.is_annotation = False
def __init__(self, field_code: str, field_type: str, field_title: str, table_name: str, default_value=None, field_column_name_base: str = None, is_suggested: bool = False) -> None: super().__init__() self.field_code = field_code self.field_type = field_type self.field_column_name_base = field_column_name_base or escape_column_name( field_code) self.field_title = field_title self.table_name = table_name self.default_value = default_value self.is_suggested = is_suggested
def build_field_handlers(document_type: DocumentType, table_name: str = None, include_generic_fields: bool = True, include_user_fields: bool = True, include_suggested_fields: bool = True, exclude_hidden_always_fields: bool = False) \ -> List[field_handlers.FieldHandler]: res = list() # type: List[field_handlers.FieldHandler] if not table_name: table_name = doc_fields_table_name(document_type.code) res.extend(_build_system_field_handlers(table_name)) if include_generic_fields: res.extend(_build_generic_field_handlers(table_name)) # Prevent repeating column names. # Lets assume generic field codes are unique as well as system field codes. # Assigning 1 to their usage count for further taking it into account when building # column name bases for the user fields. field_code_use_counts = { field_handler.field_column_name_base: 1 for field_handler in res } if include_user_fields: doc_field_qr = DocumentField.objects.filter( document_type=document_type) if exclude_hidden_always_fields: doc_field_qr = doc_field_qr.filter(hidden_always=False) for field in doc_field_qr.order_by('order', 'code'): # type: DocumentField field_type = field.get_field_type() # type: field_types.FieldType # Escape field code and take max N chars for using as the column name base field_code_escaped = escape_column_name( field.code)[:DOCUMENT_FIELD_CODE_MAX_LEN] # If we already have this escaped field code in the dict then # attach an index to it to avoid repeating of the column names. field_code_use_count = field_code_use_counts.get( field_code_escaped) if field_code_use_count is not None: field_code_use_counts[ field_code_escaped] = field_code_use_count + 1 counter_str = str(field_code_use_count) # make next repeated column name to be column1, column2, ... # make it fitting into N chars by cutting the field code on the required number of chars to fit the num field_code_escaped = field_code_escaped[:DOCUMENT_FIELD_CODE_MAX_LEN - len(counter_str) - 1] \ + '_' + counter_str else: field_code_use_counts[field_code_escaped] = 1 field_handler_class = FIELD_DB_SUPPORT_REGISTRY[field_type.code] field_handler = field_handler_class( field.code, field_type.code, field.title, table_name, field.default_value, field_column_name_base=field_code_escaped, is_suggested=False) res.append(field_handler) if include_suggested_fields and field.is_detectable( ) and not field.read_only: field_code_suggested = field.code + '_suggested' field_handler_suggested = field_handler_class( field_code_suggested, field_type.code, field.title + ': Suggested', table_name, field.default_value, field_column_name_base=field_code_escaped + '_sug', is_suggested=True) res.append(field_handler_suggested) return res
def process(self, **kwargs): with transaction.atomic(): csv_log = list() # type: List[Tuple[str, str, str]] for document_type in DocumentType.objects.all(): # type: DocumentType changed_field_codes = dict() # type: Dict[str, str] field_code_use_counts = dict() # type: Dict[str, int] for code in DocumentField.objects \ .filter(document_type=document_type) \ .order_by('order', 'code') \ .values_list('code', flat=True): field_code_use_counts[code] = 1 m = self.RE_FIELD_CODE_NUM.fullmatch(code) if m: base = m.group(1) num = int(m.group(2)) old_num = field_code_use_counts.get(base) or 0 field_code_use_counts[base] = max(old_num, num) for field in DocumentField.objects \ .filter(document_type=document_type) \ .order_by('order', 'code'): # type: DocumentField field_code_escaped = escape_column_name(field.code)[:DOCUMENT_FIELD_CODE_MAX_LEN] if field.code == field_code_escaped: field_code_use_counts[field.code] = (field_code_use_counts.get(field.code) or 0) + 1 long_code = DocumentField.get_long_code(field, document_type) if field.long_code != long_code: self.log_info('Updating field long code {0} to {1}' .format(field.long_code, long_code)) field.long_code = long_code field.save(update_fields={'long_code'}) else: field_code_use_count = field_code_use_counts.get(field_code_escaped) if field_code_use_count is not None: field_code_use_counts[field_code_escaped] = field_code_use_count + 1 counter_str = str(field_code_use_count) # make next repeated column name to be column1, column2, ... # make it fitting into N chars by cutting the field code on the required # number of chars to fit the num field_code_escaped = field_code_escaped[:DOCUMENT_FIELD_CODE_MAX_LEN - len(counter_str) - 1] \ + '_' + counter_str else: field_code_use_counts[field_code_escaped] \ = (field_code_use_counts.get(field_code_escaped) or 0) + 1 self.log_info('Updating field {0}.{1} to {2}' .format(document_type.code, field.code, field_code_escaped)) changed_field_codes[field.code] = field_code_escaped csv_log.append((document_type.code, field.code, field_code_escaped)) field.code = field_code_escaped field.long_code = DocumentField.get_long_code(field, document_type) field.save(update_fields={'code', 'long_code'}) hide_until_js = jiphy.to.javascript(field.hide_until_python) if field.hide_until_python else '' if hide_until_js != field.hide_until_js: field.hide_until_js = hide_until_js self.log_info('Updating hide_until_js for field {0}.{1}' .format(document_type.code, field.code)) field.save(update_fields={'hide_until_js'}) if len(changed_field_codes) > 0 and document_type.field_code_aliases: updated_aliases = {k: changed_field_codes.get(v) or v for k, v in document_type.field_code_aliases.items()} self.log_info('Updating field code aliases of document type {0}"\n{1}' .format(document_type.code, updated_aliases)) document_type.field_code_aliases = updated_aliases document_type.save(update_fields={'field_code_aliases'}) output = io.StringIO() writer = csv.writer(output) writer.writerow(('Document Type', 'Old Field Code', 'New Field Code')) for r in csv_log: writer.writerow(r) self.log_info('\n\n\n------------------\n' 'Changed fields csv:\n' + output.getvalue() + '\n------------------')
def clean(self): field_code = self.cleaned_data.get('code') formula = self.cleaned_data.get('formula') type_code = self.cleaned_data.get('type') depends_on_fields = self.cleaned_data.get('depends_on_fields') or [] document_type = self.cleaned_data.get('document_type') depends_on_fields = list(depends_on_fields) classifier_init_script = self.cleaned_data['classifier_init_script'] stop_words = self.cleaned_data['stop_words'] hide_until_python = self.cleaned_data['hide_until_python'] default_value = self.cleaned_data['default_value'] try: stop_words = compile_stop_words(stop_words) detect_value_with_stop_words(stop_words, 'dummy text') except Exception as err: self.add_error('stop_words', str(err)) try: FieldBasedMLOnlyFieldDetectionStrategy.init_classifier_impl( field_code, classifier_init_script) except ScriptError as err: self.add_error('classifier_init_script', str(err).split('\n')) fields_and_deps = { self.cleaned_data.get('code') or 'xxx': {f.code for f in depends_on_fields} } fields_and_deps = self._extract_field_and_deps(depends_on_fields, fields_and_deps) fields_and_deps = [(code, deps) for code, deps in fields_and_deps.items()] try: order_field_detection(fields_and_deps) except ValueError as ve: self.add_error(None, str(ve)) fields_to_values = { field.code: FIELD_TYPES_REGISTRY[field.type].example_python_value(field) for field in depends_on_fields } python_coded_field_code = self.cleaned_data.get('python_coded_field') if python_coded_field_code: python_coded_field = PYTHON_CODED_FIELDS_REGISTRY.get( python_coded_field_code) if not python_coded_field: self.add_error( 'python_coded_field', 'Unknown Python-coded field: {0}'.format( python_coded_field_code)) else: if type_code != python_coded_field.type: self.add_error( 'type', 'Python-coded field {0} is of type {1} but {2} is specified' ' as the field type'.format(python_coded_field.title, python_coded_field.type, type_code)) if formula and formula.strip() and type_code: self.calc_formula(field_code, type_code, formula, fields_to_values, 'formula') hide_until_python = hide_until_python.strip( ) if hide_until_python else None if hide_until_python: fields_to_values = { field.code: FIELD_TYPES_REGISTRY[field.type].example_python_value(field) for field in list(document_type.fields.all()) } code = self.instance.code if self.instance else None if code and code in fields_to_values: del fields_to_values[code] if type_code: fields_to_values[field_code] = FIELD_TYPES_REGISTRY[type_code] \ .example_python_value(DocumentField(**self.cleaned_data)) self.calc_formula(field_code, None, hide_until_python, fields_to_values, 'hide_until_python', formula_name='hide until python') if default_value and type_code == RelatedInfoField.code: self.add_error('default_value', 'Related info field can\'t have default value') try: DocumentField.compile_value_regexp( self.cleaned_data['value_regexp']) except Exception as exc: self.add_error('value_regexp', exc) # Ensure field code is not too long for Postgres column names # We use field codes to build column names for Postgres tables. # Max length of column name is 63. We escape them to snake case and sometimes add postfixes to them. # Lets assume that we should have max 23 chars for postfixes and max 40 chars for the field code. field_code_escaped = escape_column_name(field_code) if len(field_code_escaped) > self.MAX_ESCAPED_FIELD_CODE_LEN: self.add_error( 'code', '''Field code is too long. Field codes are used to build column names of DB tables. Escaped version should have max {max_length} chars but it is {length} chars long. Current escaped version of the specified field code is: "{field_code_escaped}"'''.format( max_length=self.MAX_ESCAPED_FIELD_CODE_LEN, length=len(field_code_escaped), field_code_escaped=field_code_escaped)) if not self.R_AZ.search(field_code_escaped): self.add_error( 'code', '''Field codes are used to build column names of DB tables. Escaped version of the specified field code should contain at least one latin letter. Current escaped version of the specified field code is: "{0}"'''.format(field_code_escaped)) if self.initial and 'type' in self.changed_data: wrong_field_detector_pks = [] for field_detector in DocumentFieldDetector.objects.filter( field=self.instance): try: DocumentFieldDetector.validate_detected_value( type_code, field_detector.detected_value) except Exception: wrong_field_detector_pks.append('#' + field_detector.pk) if wrong_field_detector_pks: self.add_error( 'type', 'Detected value is not allowed for this field type, please unset detected value ' 'for this field detectors: {0}'.format( ', '.join(wrong_field_detector_pks))) return self.cleaned_data
def __init__(self, field_code: str, field_title: str, table_name: str) -> None: super().__init__(field_code, field_title, table_name) self.output_column = escape_column_name(field_code) self.text_search_column = escape_column_name(field_code + '_text_search')
def __init__(self, field_code: str, field_title: str, table_name: str) -> None: super().__init__(field_code, field_title, table_name) self.currency_column = escape_column_name(field_code + '_currency') self.amount_column = escape_column_name(field_code + '_amount')
def doc_fields_table_name(document_type_code: str) -> str: return escape_column_name(TABLE_NAME_PREFIX + document_type_code)
def __init__(self, field_code: str, field_title: str, table_name: str) -> None: super().__init__(field_code, field_title, table_name) self.column = escape_column_name(field_code)