Beispiel #1
0
 def __init__(
         self,
         data,
         name=AUTO, check=True,
         count=None, less_than=None,
         value_stream_type: Union[StreamType, str] = None,
         source=None, context=None,
         max_items_in_memory=AUTO,
         tmp_files=AUTO,
 ):
     super().__init__(
         data,
         name=name, check=check,
         count=count, less_than=less_than,
         source=source, context=context,
         max_items_in_memory=max_items_in_memory,
         tmp_files=tmp_files,
     )
     if value_stream_type is None:
         self.value_stream_type = StreamType.AnyStream
     else:
         try:
             value_stream_type = StreamType(value_stream_type)
         except ValueError:
             value_stream_type = StreamType(value_stream_type.value)
         self.value_stream_type = value_stream_type or StreamType.AnyStream
Beispiel #2
0
 def to_stream(
         self,
         data: Data = AUTO,
         stream_type: AutoStreamType = AUTO,
         ex: OptionalFields = None,
         **kwargs
 ) -> Stream:
     stream_type = arg.delayed_acquire(stream_type, self.get_stream_type)
     if isinstance(stream_type, str):
         stream_class = StreamType(stream_type).get_class()
     elif isclass(stream_type):
         stream_class = stream_type
     elif isinstance(stream_type, StreamType) or hasattr(stream_type, 'get_class'):
         stream_class = stream_type.get_class()
     else:
         raise TypeError('AnyStream.to_stream(data, stream_type): expected StreamType, got {}'.format(stream_type))
     if not arg.is_defined(data):
         if hasattr(self, 'get_items_of_type'):
             item_type = stream_class.get_item_type()
             data = self.get_items_of_type(item_type)
         else:
             data = self.get_data()
     meta = self.get_compatible_meta(stream_class, ex=ex)
     meta.update(kwargs)
     if 'count' not in meta:
         meta['count'] = self.get_count()
     if 'source' not in meta:
         meta['source'] = self.get_source()
     stream = stream_class(data, **meta)
     return self._assume_stream(stream)
Beispiel #3
0
 def stream(self,
            data: Iterable,
            stream_type: Union[StreamType, Stream, arg.Auto] = arg.AUTO,
            ex: OptionalFields = None,
            **kwargs) -> Stream:
     stream_type = arg.acquire(stream_type, self.get_stream_type())
     if isinstance(stream_type, str):
         stream_class = StreamType(stream_type).get_class()
     elif isclass(stream_type):
         stream_class = stream_type
     else:
         stream_class = stream_type.get_class()
     meta = self.get_compatible_meta(stream_class, ex=ex)
     meta.update(kwargs)
     return StreamType.of(stream_type).stream(data, **meta)
Beispiel #4
0
 def stream(self,
            data: Iterable,
            stream_type: AutoStreamType = AUTO,
            ex: OptionalArguments = None,
            save_name: bool = True,
            save_count: bool = True,
            **kwargs) -> Stream:
     if arg.is_defined(stream_type):
         if isinstance(stream_type, str):
             stream_class = StreamType(stream_type).get_class()
         else:
             stream_class = stream_type.get_class()
         meta = self.get_compatible_meta(stream_class, ex=ex)
     else:
         stream_class = self.__class__
         meta = self.get_meta()
     if not save_name:
         meta.pop('name')
     if not save_count:
         meta.pop('count')
     meta.update(kwargs)
     if 'context' not in meta:
         meta['context'] = self.get_context()
     stream = stream_class(data, **meta)
     return stream
Beispiel #5
0
 def parse_json(
         self,
         default_value=None,
         to: Union[StreamType, str] = StreamType.RecordStream) -> Stream:
     stream_type = StreamType.find_instance(to)
     assert isinstance(stream_type, StreamType)
     return self.map_to_type(fs.json_loads(default_value),
                             stream_type=stream_type)
Beispiel #6
0
def stream(stream_type, *args, **kwargs) -> StreamInterface:
    if is_stream_class(STREAM_CLASSES):
        stream_class = stream_type
    else:
        stream_class = StreamType(stream_type).get_class()
    if 'context' not in kwargs:
        kwargs['context'] = get_context()
    return stream_class(*args, **kwargs)
Beispiel #7
0
 def to_stream(self,
               stream_type: AutoStreamType = AUTO,
               *args,
               **kwargs) -> Stream:
     stream_type = arg.acquire(stream_type, self.get_stream_type())
     method_suffix = StreamType.of(stream_type).get_method_suffix()
     method_name = 'to_{}'.format(method_suffix)
     stream_method = self.__getattribute__(method_name)
     return stream_method(stream_type, *args, **kwargs)
Beispiel #8
0
 def flat_map(self, function: Callable, to: AutoStreamType = AUTO) -> Stream:
     if arg.is_defined(to):
         stream_class = StreamType.detect(to).get_class()
     else:
         stream_class = self.__class__
     new_props_keys = stream_class([]).get_meta().keys()
     props = {k: v for k, v in self.get_meta().items() if k in new_props_keys}
     props.pop('count')
     items = self._get_mapped_items(function=function, flat=True)
     return stream_class(items, **props)
Beispiel #9
0
 def get_class(cls, other: Stream = None):
     if other is None:
         return cls
     elif isinstance(other, (StreamType, str)):
         return StreamType(other).get_class()
     elif inspect.isclass(other):
         return other
     else:
         raise TypeError(
             '"other" parameter must be class or StreamType (got {})'.
             format(type(other)))
Beispiel #10
0
 def _get_stream_type(self,
                      stream_type: Union[StreamType,
                                         Auto] = AUTO) -> StreamType:
     if not Auto.is_defined(stream_type):
         if hasattr(self, 'get_stream_type'):
             stream_type = self.get_stream_type()
         elif hasattr(self, 'get_default_stream_type'):
             stream_type = self.get_default_stream_type()
         else:
             item_type = self.get_default_item_type()
             stream_type = StreamType.detect(item_type)
     return stream_type
Beispiel #11
0
 def filter(self,
            *args,
            item_type: ItemType = ItemType.Auto,
            skip_errors: bool = False,
            **kwargs) -> Native:
     item_type = arg.delayed_acquire(item_type, self.get_item_type)
     stream_type = StreamType.detect(item_type)
     filtered_items = self._get_filtered_items(*args,
                                               item_type=item_type,
                                               skip_errors=skip_errors,
                                               **kwargs)
     stream = self.to_stream(data=filtered_items, stream_type=stream_type)
     return self._assume_native(stream)
Beispiel #12
0
 def to_stream_type(
     self,
     stream_type: StreamType,
     step: AutoCount = AUTO,
     verbose: AutoBool = AUTO,
     **kwargs,
 ) -> Stream:
     stream_type = arg.delayed_acquire(stream_type, self._get_stream_type)
     item_type = self._get_item_type(stream_type)
     data = kwargs.pop('data', None)
     if not arg.is_defined(data):
         data = self._get_items_of_type(item_type,
                                        step=step,
                                        verbose=verbose)
     stream_kwargs = self.get_stream_kwargs(data=data,
                                            step=step,
                                            verbose=verbose,
                                            **kwargs)
     return stream_type.stream(**stream_kwargs)
Beispiel #13
0
 def to_stream(self,
               data: Data = AUTO,
               stream_type: AutoStreamType = AUTO,
               ex: OptionalFields = None,
               **kwargs) -> Stream:
     stream_type = arg.delayed_acquire(stream_type, self.get_stream_type)
     if isinstance(stream_type, str):
         stream_class = StreamType(stream_type).get_class()
     elif isclass(stream_type):
         stream_class = stream_type
     else:
         stream_class = stream_type.get_class()
     data = arg.delayed_acquire(data, self.get_data)
     meta = self.get_compatible_meta(stream_class, ex=ex)
     meta.update(kwargs)
     if 'count' not in meta:
         meta['count'] = self.get_count()
     if 'source' not in meta:
         meta['source'] = self.get_source()
     return stream_class(data, **meta)
Beispiel #14
0
 def to_stream(self,
               data: Optional[Iterable] = None,
               stream_type: AutoStreamType = AUTO,
               ex: OptionalFields = None,
               **kwargs) -> Union[RegularStream, Native]:
     stream_type = Auto.acquire(stream_type, self.get_stream_type())
     if data:
         stream_class = stream_type.get_class()
         meta = self.get_compatible_meta(stream_class, ex=ex)
         meta.update(kwargs)
         if 'count' not in meta:
             meta['count'] = self.get_count()
         if 'source' not in meta:
             meta['source'] = self.get_source()
         return stream_class(data, **meta)
     elif stream_type == StreamType.SqlStream:
         return self
     else:
         method_suffix = StreamType.of(stream_type).get_method_suffix()
         method_name = 'to_{}'.format(method_suffix)
         stream_method = self.__getattribute__(method_name)
         return stream_method()
Beispiel #15
0
 def to_stream_type(
     self,
     stream_type: StreamType,
     step: AutoCount = AUTO,
     verbose: AutoBool = AUTO,
     message: Union[str, Auto, None] = AUTO,
     **kwargs,
 ) -> Stream:
     stream_type = Auto.delayed_acquire(stream_type, self._get_stream_type)
     item_type = self._get_item_type(stream_type)
     if item_type == ItemType.StructRow and hasattr(
             self, 'get_struct') and 'struct' not in kwargs:
         kwargs['struct'] = self.get_struct()
     data = kwargs.pop('data', None)
     if not Auto.is_defined(data):
         data = self._get_items_of_type(item_type,
                                        step=step,
                                        verbose=verbose,
                                        message=message)
     stream_kwargs = self.get_stream_kwargs(data=data,
                                            step=step,
                                            verbose=verbose,
                                            **kwargs)
     return stream_type.stream(**stream_kwargs)
Beispiel #16
0
)
DICT_STREAM_CLASSES = dict(
    AnyStream=AnyStream,
    LineStream=LineStream,
    RowStream=RowStream,
    KeyValueStream=KeyValueStream,
    StructStream=StructStream,
    RecordStream=RecordStream,
    PandasStream=PandasStream,
    SqlStream=SqlStream,
)

_context = None  # global


StreamType.set_default(AnyStream.__name__)
StreamType.set_dict_classes(DICT_STREAM_CLASSES)


@deprecated_with_alternative('StreamType.get_class()')
def get_class(stream_type):
    return StreamType(stream_type).get_class()


DICT_ITEM_TO_STREAM_TYPE = {
    ItemType.Any: StreamType.AnyStream,
    ItemType.Line: StreamType.LineStream,
    ItemType.Record: StreamType.RecordStream,
    ItemType.Row: StreamType.RowStream,
    ItemType.StructRow: StreamType.StructStream,
}
Beispiel #17
0
def get_class(stream_type):
    return StreamType(stream_type).get_class()
Beispiel #18
0
 def to_stream(self,
               stream_type: Union[StreamType, str, Auto] = AUTO,
               **kwargs) -> Stream:
     stream_class = StreamType(stream_type).get_class()
     return stream_class(self.get_data(), **kwargs)
Beispiel #19
0
 def get_stream_type(cls) -> StreamType:
     stream_type = StreamType.detect(cls)
     assert isinstance(stream_type, StreamType)
     return stream_type
Beispiel #20
0
 def get_stream(self, to=AUTO, verbose: AutoBool = AUTO) -> Stream:
     to = arg.acquire(to, self.get_stream_type())
     return self.to_stream_class(
         stream_class=StreamType(to).get_class(),
         verbose=verbose,
     )