def sorted_group_by( self, *keys, values: Optional[Iterable] = None, as_pairs: bool = False, ) -> StreamInterface: if as_pairs: return super().sorted_group_by(*keys, values=values, as_pairs=True) else: output_struct = FlatStruct([]) for f in list(keys) + list(values): if isinstance(f, ARRAY_TYPES): field_name = get_name(f[0]) else: field_name = get_name(f) if f in values: field_type = FieldType.Tuple elif isinstance(f, FieldInterface) or hasattr(f, 'get_type'): field_type = f.get_type() else: field_type = AUTO output_struct.append_field(field_name, field_type) return super().sorted_group_by(*keys, values=values, as_pairs=False, output_struct=output_struct)
def _get_value_repr(value: Value, default: str = '-') -> str: if isinstance(value, Callable): return get_name(value, or_callable=False) elif value is not None: return repr(value) else: return default
def validate_about(self, standard: StructInterface, ignore_moved: bool = False) -> Native: expected_struct = self.convert_to_native(standard) remaining_struct = expected_struct.copy() assert isinstance(expected_struct, FlatStruct), 'got {}'.format(expected_struct) assert isinstance(remaining_struct, FlatStruct), 'got {}'.format(remaining_struct) updated_struct = FlatStruct([]) for pos_received, f_received in enumerate(self.get_fields()): assert isinstance(f_received, AdvancedField) f_name = f_received.get_name() if f_name in updated_struct.get_field_names(): is_valid = False warning = 'DUPLICATE_IN_DATA' if f_name in remaining_struct.get_field_names( ) else 'DUPLICATE' f_expected = updated_struct.get_field_description(f_name) f_updated = f_expected.set_valid(is_valid, inplace=False) elif f_name in expected_struct.get_field_names(): is_valid = True pos_expected = expected_struct.get_field_position(f_name) warning = None if pos_received == pos_expected or ignore_moved else 'MOVED' f_expected = expected_struct.get_field_description(f_name) f_updated = f_expected.set_valid(is_valid, inplace=False) else: is_valid = False warning = 'UNEXPECTED' message = 'field has been found in actual struct, but not in expected standard struct' caption = '{} ({})'.format(f_received.get_caption(), message) f_updated = f_received.set_valid( is_valid, inplace=False).set_caption(caption, inplace=False) if warning: caption = '[{}] {}'.format(warning, f_updated.get_caption() or '') f_updated = f_updated.set_caption(caption, inplace=False) updated_struct.append_field(f_updated, exclude_duplicates=ignore_moved) if f_name in remaining_struct.get_field_names(): remaining_struct.remove_fields(f_name, inplace=True) for f_remaining in remaining_struct.get_columns(): f_name = get_name(f_remaining) is_valid = False f_expected = expected_struct.get_field_description(f_name) if f_name in updated_struct.get_field_names(): warning = 'DUPLICATE_IN_STRUCT' else: warning = 'MISSING_IN_FILE' caption = '[{}] {}'.format(warning, f_expected.get_caption() or '') f_updated = f_expected.set_valid( is_valid, inplace=False).set_caption(caption, inplace=False) updated_struct.append_field(f_updated, exclude_duplicates=ignore_moved) self.set_fields(updated_struct.get_fields(), inplace=True) return self
def remove_fields(self, *fields, multiple: bool = False, inplace: bool = True): removing_fields = update(fields) removing_field_names = get_names(removing_fields) existing_fields = self.get_fields() if inplace: for e in existing_fields: if get_name(e) in removing_field_names: existing_fields.remove(e) if not multiple: break else: new_fields = [ f for f in existing_fields if get_name(f) not in removing_field_names ] return self.make_new(new_fields)
def get_compatible_expression_tuples(expressions: dict) -> dict: prepared_expressions = dict() for k, v in expressions.items(): name = get_name(k) if isinstance(v, (list, tuple)): value = get_selection_tuple(v) elif is_expression_description(v): value = v.get_selection_tuple() else: value = get_name_or_function(v) prepared_expressions[name] = value return prepared_expressions
def __init__(self, func, in_fields, in_values, in_record, message): func_name = get_name(func, or_callable=False) if 'lambda' in func_name: func_name = 'lambda' super().__init__( func=func_name, in_fields=tuple(in_fields), in_values=tuple(in_values), in_record=tuple(in_record.items()), message=message, ) self.set_key_fields(['func', 'in_fields', 'message'], inplace=True)
def get_str_fields_count( self, types: Array = (str, int, float, bool)) -> str: total_count = self.get_fields_count() type_names = list() types_count = list() for t in types: types_count.append(self.get_type_count(t)) type_names.append(get_name(t, or_callable=False)) other_count = total_count - sum(types_count) str_fields_count = ' + '.join( ['{} {}'.format(c, t) for c, t in zip(types_count, type_names)]) return '{} total = {} + {} other'.format(total_count, str_fields_count, other_count)
def get_type_count(self, field_type: Type = AUTO, by_prefix: bool = True) -> int: count = 0 field_type_name = get_name(field_type, or_callable=False) for f in self.get_fields(): if by_prefix: is_selected_type = f.get_type_name().startswith( field_type_name) else: is_selected_type = f.get_type_name() == field_type_name if is_selected_type: count += 1 return count
def get_field_value_from_record( field: Union[FieldID, Callable], record: Record, default: Value = None, skip_missing: bool = True, ) -> Value: if isinstance(field, Callable): return field(record) else: field = get_name(field) if skip_missing: return record.get(field, default) else: return record[field]
def get_one_line_repr( self, str_meta: Union[str, Auto, None] = AUTO, max_len: int = JUPYTER_LINE_LEN, crop: str = CROP_SUFFIX, ) -> str: if not Auto.is_defined(str_meta): description_args = list() name = get_name(self) if name: description_args.append(name) if self.get_str_count(default=None) is not None: description_args.append(self.get_shape_repr()) str_meta = get_str_from_args_kwargs(*description_args) return super().get_one_line_repr(str_meta=str_meta, max_len=max_len, crop=crop)
def get_field_getter(self, field: Field) -> Callable: if isinstance(field, Callable): func = field elif isinstance(field, sn.AbstractDescription) or hasattr( field, 'get_functions'): func = field.get_function() else: # isinstance(field, Field) if isinstance(field, FieldNo): # int field_no = field else: # isinstance(field, (FieldName, FieldInterface)) if isinstance(field, FieldName): # str field_name = field else: # isinstance(field, FieldInterface) field_name = get_name(field) field_no = self.get_field_position(field_name) func = fs.partial(lambda r, n: r[n], field_no) return func
def format(self, *args, delimiter: str = COLUMN_DELIMITER, skip_errors: bool = False) -> str: if len(args) == 1 and isinstance( args[0], (*ROW_SUBCLASSES, *RECORD_SUBCLASSES)): item = args[0] else: item = args formatted_values = list() for n, f in enumerate(self.get_fields()): if is_row(item): value = item[n] if n < len(item) or not skip_errors else None elif is_record(item): value = item.get(get_name(f)) else: raise TypeError( 'Expected item as Row or Record, got {}'.format(item)) if isinstance(f, AdvancedField) or hasattr(f, 'format'): str_value = f.format(value, skip_errors=skip_errors) else: str_value = str(value) formatted_values.append(str_value) return delimiter.join(formatted_values)
def get_brief_repr(self) -> str: return "{}('{}')".format(self.__class__.__name__, get_name(self, or_callable=False))
def get_types_dict(self, dialect: Union[DialectType, Auto] = AUTO) -> dict: names = map(lambda f: get_name(f), self.get_fields()) types = self.get_types_list(dialect) return dict(zip(names, types))
def get_name_or_function(field) -> Union[int, str, Callable]: if isinstance(field, Callable): return field else: return get_name(field)
def get_target_field_name(self) -> Optional[str]: if hasattr(self, 'get_target_field'): target_field = self.get_target_field() if target_field: return get_name(target_field)
def get_section_lines(self, section: SqlSection) -> Iterable: method_name = 'get_{}_lines'.format(get_name(section).lower()) method = self.__getattribute__(method_name) yield from method()
def get_type_name(self) -> str: type_name = get_value(self.get_type()) if not isinstance(type_name, str): type_name = get_name(type_name) return str(type_name)
def get_field_position(self, field: Field) -> Optional[FieldNo]: if isinstance(field, FieldNo): return field else: # isinstance(field, FieldName) field_name = get_name(field) return self.get_struct().get_field_position(field_name)
def get_source_name(self) -> Name: return get_name(self.get_source_field())
def get_target_field_name(self) -> Name: return get_name(self.get_target_field())