def __init__( self, data, name=AUTO, check=True, count=None, less_than=None, value_stream_type: Union[StreamType, str] = None, source=None, context=None, max_items_in_memory=AUTO, tmp_files=AUTO, ): super().__init__( data, name=name, check=check, count=count, less_than=less_than, source=source, context=context, max_items_in_memory=max_items_in_memory, tmp_files=tmp_files, ) if value_stream_type is None: self.value_stream_type = StreamType.AnyStream else: try: value_stream_type = StreamType(value_stream_type) except ValueError: value_stream_type = StreamType(value_stream_type.value) self.value_stream_type = value_stream_type or StreamType.AnyStream
def to_stream( self, data: Data = AUTO, stream_type: AutoStreamType = AUTO, ex: OptionalFields = None, **kwargs ) -> Stream: stream_type = arg.delayed_acquire(stream_type, self.get_stream_type) if isinstance(stream_type, str): stream_class = StreamType(stream_type).get_class() elif isclass(stream_type): stream_class = stream_type elif isinstance(stream_type, StreamType) or hasattr(stream_type, 'get_class'): stream_class = stream_type.get_class() else: raise TypeError('AnyStream.to_stream(data, stream_type): expected StreamType, got {}'.format(stream_type)) if not arg.is_defined(data): if hasattr(self, 'get_items_of_type'): item_type = stream_class.get_item_type() data = self.get_items_of_type(item_type) else: data = self.get_data() meta = self.get_compatible_meta(stream_class, ex=ex) meta.update(kwargs) if 'count' not in meta: meta['count'] = self.get_count() if 'source' not in meta: meta['source'] = self.get_source() stream = stream_class(data, **meta) return self._assume_stream(stream)
def stream(self, data: Iterable, stream_type: Union[StreamType, Stream, arg.Auto] = arg.AUTO, ex: OptionalFields = None, **kwargs) -> Stream: stream_type = arg.acquire(stream_type, self.get_stream_type()) if isinstance(stream_type, str): stream_class = StreamType(stream_type).get_class() elif isclass(stream_type): stream_class = stream_type else: stream_class = stream_type.get_class() meta = self.get_compatible_meta(stream_class, ex=ex) meta.update(kwargs) return StreamType.of(stream_type).stream(data, **meta)
def stream(self, data: Iterable, stream_type: AutoStreamType = AUTO, ex: OptionalArguments = None, save_name: bool = True, save_count: bool = True, **kwargs) -> Stream: if arg.is_defined(stream_type): if isinstance(stream_type, str): stream_class = StreamType(stream_type).get_class() else: stream_class = stream_type.get_class() meta = self.get_compatible_meta(stream_class, ex=ex) else: stream_class = self.__class__ meta = self.get_meta() if not save_name: meta.pop('name') if not save_count: meta.pop('count') meta.update(kwargs) if 'context' not in meta: meta['context'] = self.get_context() stream = stream_class(data, **meta) return stream
def parse_json( self, default_value=None, to: Union[StreamType, str] = StreamType.RecordStream) -> Stream: stream_type = StreamType.find_instance(to) assert isinstance(stream_type, StreamType) return self.map_to_type(fs.json_loads(default_value), stream_type=stream_type)
def stream(stream_type, *args, **kwargs) -> StreamInterface: if is_stream_class(STREAM_CLASSES): stream_class = stream_type else: stream_class = StreamType(stream_type).get_class() if 'context' not in kwargs: kwargs['context'] = get_context() return stream_class(*args, **kwargs)
def to_stream(self, stream_type: AutoStreamType = AUTO, *args, **kwargs) -> Stream: stream_type = arg.acquire(stream_type, self.get_stream_type()) method_suffix = StreamType.of(stream_type).get_method_suffix() method_name = 'to_{}'.format(method_suffix) stream_method = self.__getattribute__(method_name) return stream_method(stream_type, *args, **kwargs)
def flat_map(self, function: Callable, to: AutoStreamType = AUTO) -> Stream: if arg.is_defined(to): stream_class = StreamType.detect(to).get_class() else: stream_class = self.__class__ new_props_keys = stream_class([]).get_meta().keys() props = {k: v for k, v in self.get_meta().items() if k in new_props_keys} props.pop('count') items = self._get_mapped_items(function=function, flat=True) return stream_class(items, **props)
def get_class(cls, other: Stream = None): if other is None: return cls elif isinstance(other, (StreamType, str)): return StreamType(other).get_class() elif inspect.isclass(other): return other else: raise TypeError( '"other" parameter must be class or StreamType (got {})'. format(type(other)))
def _get_stream_type(self, stream_type: Union[StreamType, Auto] = AUTO) -> StreamType: if not Auto.is_defined(stream_type): if hasattr(self, 'get_stream_type'): stream_type = self.get_stream_type() elif hasattr(self, 'get_default_stream_type'): stream_type = self.get_default_stream_type() else: item_type = self.get_default_item_type() stream_type = StreamType.detect(item_type) return stream_type
def filter(self, *args, item_type: ItemType = ItemType.Auto, skip_errors: bool = False, **kwargs) -> Native: item_type = arg.delayed_acquire(item_type, self.get_item_type) stream_type = StreamType.detect(item_type) filtered_items = self._get_filtered_items(*args, item_type=item_type, skip_errors=skip_errors, **kwargs) stream = self.to_stream(data=filtered_items, stream_type=stream_type) return self._assume_native(stream)
def to_stream_type( self, stream_type: StreamType, step: AutoCount = AUTO, verbose: AutoBool = AUTO, **kwargs, ) -> Stream: stream_type = arg.delayed_acquire(stream_type, self._get_stream_type) item_type = self._get_item_type(stream_type) data = kwargs.pop('data', None) if not arg.is_defined(data): data = self._get_items_of_type(item_type, step=step, verbose=verbose) stream_kwargs = self.get_stream_kwargs(data=data, step=step, verbose=verbose, **kwargs) return stream_type.stream(**stream_kwargs)
def to_stream(self, data: Data = AUTO, stream_type: AutoStreamType = AUTO, ex: OptionalFields = None, **kwargs) -> Stream: stream_type = arg.delayed_acquire(stream_type, self.get_stream_type) if isinstance(stream_type, str): stream_class = StreamType(stream_type).get_class() elif isclass(stream_type): stream_class = stream_type else: stream_class = stream_type.get_class() data = arg.delayed_acquire(data, self.get_data) meta = self.get_compatible_meta(stream_class, ex=ex) meta.update(kwargs) if 'count' not in meta: meta['count'] = self.get_count() if 'source' not in meta: meta['source'] = self.get_source() return stream_class(data, **meta)
def to_stream(self, data: Optional[Iterable] = None, stream_type: AutoStreamType = AUTO, ex: OptionalFields = None, **kwargs) -> Union[RegularStream, Native]: stream_type = Auto.acquire(stream_type, self.get_stream_type()) if data: stream_class = stream_type.get_class() meta = self.get_compatible_meta(stream_class, ex=ex) meta.update(kwargs) if 'count' not in meta: meta['count'] = self.get_count() if 'source' not in meta: meta['source'] = self.get_source() return stream_class(data, **meta) elif stream_type == StreamType.SqlStream: return self else: method_suffix = StreamType.of(stream_type).get_method_suffix() method_name = 'to_{}'.format(method_suffix) stream_method = self.__getattribute__(method_name) return stream_method()
def to_stream_type( self, stream_type: StreamType, step: AutoCount = AUTO, verbose: AutoBool = AUTO, message: Union[str, Auto, None] = AUTO, **kwargs, ) -> Stream: stream_type = Auto.delayed_acquire(stream_type, self._get_stream_type) item_type = self._get_item_type(stream_type) if item_type == ItemType.StructRow and hasattr( self, 'get_struct') and 'struct' not in kwargs: kwargs['struct'] = self.get_struct() data = kwargs.pop('data', None) if not Auto.is_defined(data): data = self._get_items_of_type(item_type, step=step, verbose=verbose, message=message) stream_kwargs = self.get_stream_kwargs(data=data, step=step, verbose=verbose, **kwargs) return stream_type.stream(**stream_kwargs)
) DICT_STREAM_CLASSES = dict( AnyStream=AnyStream, LineStream=LineStream, RowStream=RowStream, KeyValueStream=KeyValueStream, StructStream=StructStream, RecordStream=RecordStream, PandasStream=PandasStream, SqlStream=SqlStream, ) _context = None # global StreamType.set_default(AnyStream.__name__) StreamType.set_dict_classes(DICT_STREAM_CLASSES) @deprecated_with_alternative('StreamType.get_class()') def get_class(stream_type): return StreamType(stream_type).get_class() DICT_ITEM_TO_STREAM_TYPE = { ItemType.Any: StreamType.AnyStream, ItemType.Line: StreamType.LineStream, ItemType.Record: StreamType.RecordStream, ItemType.Row: StreamType.RowStream, ItemType.StructRow: StreamType.StructStream, }
def get_class(stream_type): return StreamType(stream_type).get_class()
def to_stream(self, stream_type: Union[StreamType, str, Auto] = AUTO, **kwargs) -> Stream: stream_class = StreamType(stream_type).get_class() return stream_class(self.get_data(), **kwargs)
def get_stream_type(cls) -> StreamType: stream_type = StreamType.detect(cls) assert isinstance(stream_type, StreamType) return stream_type
def get_stream(self, to=AUTO, verbose: AutoBool = AUTO) -> Stream: to = arg.acquire(to, self.get_stream_type()) return self.to_stream_class( stream_class=StreamType(to).get_class(), verbose=verbose, )