Exemple #1
0
 def remove_fields(self, *fields, multiple: bool = False, inplace: bool = True):
     removing_fields = arg.update(fields)
     removing_field_names = arg.get_names(removing_fields)
     existing_fields = self.get_fields()
     if inplace:
         for e in existing_fields:
             if arg.get_name(e) in removing_field_names:
                 existing_fields.remove(e)
                 if not multiple:
                     break
     else:
         new_fields = [f for f in existing_fields if arg.get_name(f) not in removing_field_names]
         return self.make_new(new_fields)
Exemple #2
0
 def __init__(self, name: Name, value: Union[Value, Auto] = AUTO, update: bool = False):
     if update or not self._is_initialized():
         name = get_name(name)
         if self._auto_value:
             value = Auto.acquire(value, name)
         self.name = name
         self.value = value
Exemple #3
0
 def get_parsed_line(
     self,
     line: str,
     item_type: Union[ItemType, Auto] = AUTO,
     struct: Union[Array, StructInterface, Auto] = AUTO,
 ) -> Item:
     item_type = arg.delayed_acquire(item_type, self.get_default_item_type)
     if item_type == ItemType.Line:
         return line
     line_parser = fs.csv_loads(delimiter=self.get_delimiter())
     row = line_parser(line)
     if isinstance(struct, StructInterface):
         field_converters = struct.get_converters()
         row_converter = self._get_row_converter(
             converters=field_converters)
         row = row_converter(row)
     if item_type in (ItemType.Row, ItemType.Any, ItemType.Auto):
         return row
     if not arg.is_defined(struct, check_name=False):
         column_count = len(row)
         struct = list(range(column_count))
     if item_type == ItemType.Record:
         return {arg.get_name(k): v for k, v in zip(struct, row)}
     elif item_type == ItemType.StructRow:
         return ItemType.StructRow.build(data=row, struct=struct)
     else:
         msg = 'item_type {} is not supported for {}.parse_lines()'
         raise ValueError(msg.format(item_type, self.__class__.__name__))
Exemple #4
0
 def get_one_column_values(self,
                           column: Field,
                           as_list: bool = False) -> Iterable:
     column = arg.get_name(column)
     if as_list:
         return list(self.get_one_column_values(column, as_list=False))
     else:
         for r in self.get_records():
             yield r.get(column)
Exemple #5
0
 def warning(self,
             msg: str,
             category: Optional[Type] = None,
             stacklevel: Optional[int] = None) -> None:
     if stacklevel:
         caller = getframeinfo(stack()[stacklevel][0])
         category_name = arg.get_name(category) if category else ''
         msg = '{}:{}: {} {}'.format(caller.filename, caller.lineno,
                                     category_name, msg)
     self.log(msg=msg, level=LoggingLevel.Warning)
Exemple #6
0
 def get_str_fields_count(self, types: Array = (str, int, float, bool)) -> str:
     total_count = self.get_fields_count()
     type_names = list()
     types_count = list()
     for t in types:
         types_count.append(self.get_type_count(t))
         type_names.append(arg.get_name(t, or_callable=False))
     other_count = total_count - sum(types_count)
     str_fields_count = ' + '.join(['{} {}'.format(c, t) for c, t in zip(types_count, type_names)])
     return '{} total = {} + {} other'.format(total_count, str_fields_count, other_count)
Exemple #7
0
 def get_type_count(self, field_type: Type = AUTO, by_prefix: bool = True) -> int:
     count = 0
     field_type_name = arg.get_name(field_type, or_callable=False)
     for f in self.get_fields():
         if by_prefix:
             is_selected_type = f.get_type_name().startswith(field_type_name)
         else:
             is_selected_type = f.get_type_name() == field_type_name
         if is_selected_type:
             count += 1
     return count
Exemple #8
0
def get_compatible_expression_tuples(expressions: dict) -> dict:
    prepared_expressions = dict()
    for k, v in expressions.items():
        name = arg.get_name(k)
        if isinstance(v, (list, tuple)):
            value = get_selection_tuple(v)
        elif is_expression_description(v):
            value = v.get_selection_tuple()
        else:
            value = get_name_or_function(v)
        prepared_expressions[name] = value
    return prepared_expressions
Exemple #9
0
def get_field_value_from_record(
        field: Union[FieldID, Callable], record: Record,
        default: Value = None, skip_missing: bool = True,
) -> Value:
    if isinstance(field, Callable):
        return field(record)
    else:
        field = arg.get_name(field)
    if skip_missing:
        return record.get(field, default)
    else:
        return record[field]
Exemple #10
0
    def validate_about(self, standard: StructInterface, ignore_moved: bool = False) -> Native:
        expected_struct = self.convert_to_native(standard)
        remaining_struct = expected_struct.copy()
        assert isinstance(expected_struct, FlatStruct)
        assert isinstance(remaining_struct, FlatStruct)
        updated_struct = FlatStruct([])
        for pos_received, f_received in enumerate(self.get_fields()):
            assert isinstance(f_received, AdvancedField)
            f_name = f_received.get_name()
            if f_name in updated_struct.get_field_names():
                is_valid = False
                warning = 'DUPLICATE_IN_DATA' if f_name in remaining_struct.get_field_names() else 'DUPLICATE'
                f_expected = updated_struct.get_field_description(f_name)
                f_updated = f_expected.set_valid(is_valid, inplace=False)
            elif f_name in expected_struct.get_field_names():
                is_valid = True
                pos_expected = expected_struct.get_field_position(f_name)
                warning = None if pos_received == pos_expected or ignore_moved else 'MOVED'
                f_expected = expected_struct.get_field_description(f_name)
                f_updated = f_expected.set_valid(is_valid, inplace=False)
            else:
                is_valid = False
                warning = 'UNEXPECTED'
                message = 'field has been found in actual struct, but not in expected standard struct'
                caption = '{} ({})'.format(f_received.get_caption(), message)
                f_updated = f_received.set_valid(is_valid, inplace=False).set_caption(caption, inplace=False)
            if warning:
                caption = '[{}] {}'.format(warning, f_updated.get_caption() or '')
                f_updated = f_updated.set_caption(caption, inplace=False)
            updated_struct.append_field(f_updated)
            if f_name in remaining_struct.get_field_names():
                remaining_struct.remove_fields(f_name, inplace=True)

        for f_remaining in remaining_struct.get_columns():
            f_name = arg.get_name(f_remaining)
            is_valid = False
            f_expected = expected_struct.get_field_description(f_name)
            if f_name in updated_struct.get_field_names():
                warning = 'DUPLICATE_IN_STRUCT'
            else:
                warning = 'MISSING_IN_FILE'
            caption = '[{}] {}'.format(warning, f_expected.get_caption() or '')
            f_updated = f_expected.set_valid(is_valid, inplace=False).set_caption(caption, inplace=False)
            updated_struct.append_field(f_updated)
        self.set_fields(updated_struct.get_fields(), inplace=True)
        return self
Exemple #11
0
def get_dialect_type_from_conn_type_name(
        conn_type: Union[DynamicEnum, str],
        default: DialectType = DialectType.Python,
        other: DialectType = DialectType.String,
) -> DialectType:
    if conn_type is None:
        dialect_type = default
    else:
        conn_name = get_name(conn_type)
        if 'Postgres' in conn_name:
            dialect_type = DialectType.Postgres
        elif 'Click' in conn_name:
            dialect_type = DialectType.Clickhouse
        else:
            dialect_type = other
    if not isinstance(dialect_type, DialectType):
        dialect_type = DialectType.find_instance(dialect_type)
    return dialect_type
Exemple #12
0
 def add_column(self,
                name: Field,
                values: Iterable,
                ignore_errors: bool = False) -> Native:
     name = arg.get_name(name)
     items = map(lambda i, v: fs.merge_two_items()(i, {
         name: v
     }), self.get_items(), values)
     stream = self.stream(items)
     if self.is_in_memory():
         if not ignore_errors:
             if not isinstance(values, ARRAY_TYPES):
                 values = list(values)
             msg = 'for add_column() stream and values must have same items count, got {} != {}'
             assert self.get_count() == len(values), msg.format(
                 self.get_count(), len(values))
         stream = stream.to_memory()
     return stream
Exemple #13
0
 def log(
     self,
     msg: Union[str, list, tuple],
     level: Level = arg.AUTO,
     logger: Union[BaseLogger, arg.Auto] = arg.AUTO,
     end: Union[str, arg.Auto] = arg.AUTO,
     verbose: bool = True,
     truncate: bool = True,
     category: Optional[Type] = None,
     stacklevel: Optional[int] = None,
 ) -> None:
     level = arg.acquire(
         level, LoggingLevel.Info if verbose else LoggingLevel.Debug)
     logger = arg.delayed_acquire(logger, self.get_base_logger)
     if isinstance(msg, BaseException):
         msg = str(msg)
     if isinstance(msg, str):
         msg = [msg]
     elif isinstance(msg, Iterable):
         msg = list(msg)
     else:
         raise TypeError(
             'Expected msg as str or list[str], got {}'.format(msg))
     if category:
         category_name = arg.get_name(category)
         msg = [category_name] + msg
     if stacklevel:
         caller = getframeinfo(stack()[stacklevel + 1][0])
         file_name_without_path = caller.filename.split('\\')[-1].split(
             '/')[-1]
         msg = ['{}:{}:'.format(file_name_without_path, caller.lineno)
                ] + msg
     if isinstance(msg, (list, tuple)):
         msg = self.format_message(*msg)
     if not isinstance(level, LoggingLevel):
         level = LoggingLevel(level)
     if logger:
         if self.is_suitable_level(level):
             logging_method = getattr(logger, level.get_method_name())
             logging_method(msg)
     if verbose and not self.is_suitable_level(level):
         self.show(msg, end=end, truncate=truncate)
Exemple #14
0
 def detect(cls, obj, default: Union[Optional[DynamicEnum], str] = 'str') -> EnumItem:
     if isinstance(obj, DialectType):
         return obj
     if isinstance(obj, str):
         name = obj
     elif isclass(obj):
         name = obj.__name__
     else:
         name = get_name(obj)
     dialect_type = DialectType.find_instance(name)
     if not dialect_type:
         if 'Postgres' in name:
             dialect_type = DialectType.Postgres
         elif 'Click' in name:
             dialect_type = DialectType.Clickhouse
         else:
             dialect_type = default
         if not isinstance(dialect_type, DialectType):
             dialect_type = DialectType.find_instance(dialect_type)
     return dialect_type
Exemple #15
0
 def set_file(
         self, file: Union[File, Name],
         encoding: str = DEFAULT_ENCODING,
         level: Level = DEFAULT_LOGGING_LEVEL,
         formatter: Formatter = DEFAULT_FORMATTER,
         if_not_added: bool = True,
 ) -> LoggerInterface:
     filename = arg.get_name(file)
     level = arg.get_value(level)
     is_added = False
     for h in self.get_handlers():
         if hasattr(h, 'baseFilename'):
             if h.baseFilename.endswidth(filename):
                 is_added = True
                 break
     if not (is_added and if_not_added):
         file_handler = logging.FileHandler(filename, encoding=encoding)
         file_handler.setLevel(level)
         if isinstance(formatter, str):
             formatter = logging.Formatter(formatter)
         file_handler.setFormatter(formatter)
         self.add_handler(file_handler)
     return self
Exemple #16
0
 def get_field_position(self, field: Field) -> Optional[FieldNo]:
     if isinstance(field, FieldNo):
         return field
     else:
         field_name = arg.get_name(field)
         return self.get_struct().get_field_position(field_name)
Exemple #17
0
 def get_source_name(self) -> Name:
     return arg.get_name(self.get_source_field())
Exemple #18
0
 def get_one_column_values(self, column: Field) -> Iterable:
     column_name = arg.get_name(column)
     return self.get_dataframe()[column_name]
Exemple #19
0
 def get_target_field_name(self) -> Name:
     return arg.get_name(self._target)
Exemple #20
0
 def get_type_name(self) -> str:
     type_name = arg.get_value(self.get_type())
     if not isinstance(type_name, str):
         type_name = arg.get_name(type_name)
     return str(type_name)
Exemple #21
0
def get_name_or_function(field) -> Union[int, str, Callable]:
    if isinstance(field, Callable):
        return field
    else:
        return arg.get_name(field)