Exemple #1
0
 def sorted_group_by(
     self,
     *keys,
     values: Optional[Iterable] = None,
     as_pairs: bool = False,
 ) -> StreamInterface:
     if as_pairs:
         return super().sorted_group_by(*keys, values=values, as_pairs=True)
     else:
         output_struct = FlatStruct([])
         for f in list(keys) + list(values):
             if isinstance(f, ARRAY_TYPES):
                 field_name = get_name(f[0])
             else:
                 field_name = get_name(f)
             if f in values:
                 field_type = FieldType.Tuple
             elif isinstance(f, FieldInterface) or hasattr(f, 'get_type'):
                 field_type = f.get_type()
             else:
                 field_type = AUTO
             output_struct.append_field(field_name, field_type)
         return super().sorted_group_by(*keys,
                                        values=values,
                                        as_pairs=False,
                                        output_struct=output_struct)
Exemple #2
0
 def get_output_struct(self) -> StructInterface:
     input_struct = self.get_input_struct()
     output_columns = self.get_output_columns()
     types = {
         f: t
         for f, t in input_struct.get_types_dict().items()
         if f in output_columns
     }
     struct = FlatStruct(output_columns).set_types(types)
     assert isinstance(struct, FlatStruct)
     struct.validate_about(input_struct, ignore_moved=True)
     return struct
Exemple #3
0
 def insert_data(
         self,
         table: Union[Table, Name], data: Data, struct: Struct = None,
         encoding: Optional[str] = None, skip_errors: bool = False,
         skip_lines: Count = 0, skip_first_line: bool = False,
         step: AutoCount = DEFAULT_STEP, verbose: AutoBool = AUTO,
 ) -> tuple:
     if not Auto.is_defined(skip_lines):
         skip_lines = 0
     is_struct_description = isinstance(struct, StructInterface) or hasattr(struct, 'get_struct_str')
     if not is_struct_description:
         message = 'Struct as {} is deprecated, use FlatStruct instead'.format(type(struct))
         self.log(msg=message, level=LoggingLevel.Warning)
         struct = FlatStruct(struct or [])
     input_stream = self._get_struct_stream_from_data(
         data, struct=struct,
         encoding=encoding, skip_first_line=skip_first_line, verbose=verbose,
     )
     if skip_lines:
         input_stream = input_stream.skip(skip_lines)
     if input_stream.get_stream_type() != StreamType.StructStream:
         input_stream = input_stream.structure(
             struct,
             skip_bad_rows=True,
             verbose=True,
         ).update_meta(
             count=input_stream.get_count(),
         )
     initial_count = input_stream.get_estimated_count() + skip_lines
     final_count = self.insert_struct_stream(
         table, input_stream,
         skip_errors=skip_errors, step=step,
         verbose=verbose,
     )
     return initial_count, final_count
Exemple #4
0
 def get_struct_from_database(
     self,
     types: AutoLinks = AUTO,
     set_struct: bool = False,
     skip_missing: bool = False,
     verbose: AutoBool = AUTO,
 ) -> StructInterface:
     struct = FlatStruct(self.describe_table(verbose=verbose))
     if struct.is_empty() and not skip_missing:
         raise ValueError(
             'Can not get struct for non-existing table {}'.format(self))
     if Auto.is_defined(types):
         struct.set_types(types, inplace=True)
     if set_struct:
         self.set_struct(struct, inplace=True)
     return struct
Exemple #5
0
 def set_struct(self, struct: GeneralizedStruct,
                inplace: bool) -> Optional[Native]:
     if isinstance(struct, StructInterface) or struct is None:
         pass
     elif isinstance(struct, ARRAY_TYPES):
         if max([isinstance(f, ARRAY_TYPES) for f in struct]):
             struct = FlatStruct(struct)
         else:
             struct = FlatStruct.get_struct_detected_by_title_row(struct)
     elif struct == AUTO:
         struct = self.get_struct_from_database()
     else:
         message = 'struct must be StructInterface or tuple with fields_description (got {})'.format(
             type(struct))
         raise TypeError(message)
     return super().set_struct(struct, inplace=inplace)
Exemple #6
0
 def __init__(
     self,
     data: Row,
     struct: Union[Row, StructInterface],
     check: bool = True,
 ):
     if not isinstance(struct, StructInterface):
         struct = FlatStruct(struct)
     self._struct = struct
     if check:
         data = self._structure_row(data, struct)
     super().__init__(data=data, name='-')
Exemple #7
0
def test_detect_struct_by_title_row():
    title_row = ('page_id', 'hits_count', 'conversion_rate')
    expected = 'page_id int, hits_count int, conversion_rate numeric'
    received = FlatStruct.get_struct_detected_by_title_row(title_row).get_struct_str(DialectType.Postgres)
    assert received == expected, '{} != {}'.format(received, expected)