def __init__( self, filepath_or_buffer: Optional[Any] = None, filter_: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, force_valid_ids: bool = True, fillvalues: Optional[Dict[str, Any]] = None, as_array: Optional[Any] = None, timeout: Optional[float] = None, save_context: Optional[Any] = None, # FIXME seems more like a bool recovery: int = 0, # FIXME seems more like a bool recovery_tag: Union[str, int] = "", recovery_table_size: int = 3, save_step_size: int = 100000, **kwds: Any, ) -> None: super(CSVLoader, self).__init__(**kwds) self.tags.add(self.TAG_SOURCE) self.default_step_size = kwds.get("chunksize", 1000) # initial guess kwds.setdefault("chunksize", self.default_step_size) # Filter out the module keywords from the csv loader keywords csv_kwds = filter_kwds(kwds, pd.read_csv) # When called with a specified chunksize, it returns a parser self.filepath_or_buffer = filepath_or_buffer self.force_valid_ids = force_valid_ids self.parser: Optional[Parser] = None self.csv_kwds = csv_kwds self._compression = csv_kwds.get("compression", "infer") csv_kwds["compression"] = None self._encoding = csv_kwds.get("encoding", None) csv_kwds["encoding"] = None self._rows_read = 0 if filter_ is not None and not callable(filter_): raise ProgressiveError( "filter parameter should be callable or None") self._filter = filter_ # self._input_stream: Optional[Any] = ( # None # stream that returns a position through the 'tell()' method # ) self._input_encoding = None self._input_compression = None self._input_size = 0 # length of the file or input stream when available self._timeout_csv = timeout self._table_params: Dict[str, Any] = dict(name=self.name, fillvalues=fillvalues) self._as_array = as_array self._save_context = (True if save_context is None and is_recoverable(filepath_or_buffer) else False) self._recovery = recovery self._recovery_table_size = recovery_table_size self._recovery_table: Optional[Table] = None self._recovery_table_name = f"csv_loader_recovery_{recovery_tag}" self._recovery_table_inv: Optional[Table] = None self._recovery_table_inv_name = f"csv_loader_recovery_invariant_{recovery_tag}" self._save_step_size = save_step_size self._last_saved_id = 0 if self._recovery and not self.recovery_tables_exist(): self._recovery = False if not self._recovery: self.trunc_recovery_tables()
def __init__(self, **kwds: Any) -> None: """Merge(how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False,suffixes=('_x', '_y'), copy=True, indicator=False) """ super(Merge, self).__init__(**kwds) self.merge_kwds = filter_kwds(kwds, merge) self._context: Dict[str, Any] = {}
def __init__(self, filepath_or_buffer: Optional[Any] = None, filter_: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, force_valid_ids: bool = True, fillvalues: Optional[Dict[str, Any]] = None, throttle: Union[bool, int, float] = False, **kwds: Any) -> None: super().__init__(**kwds) self.default_step_size = kwds.get("chunksize", 1000) # initial guess kwds.setdefault("chunksize", self.default_step_size) # Filter out the module keywords from the csv loader keywords csv_kwds: Dict[str, Any] = filter_kwds(kwds, pd.read_csv) # When called with a specified chunksize, it returns a parser self.filepath_or_buffer = filepath_or_buffer self.force_valid_ids = force_valid_ids if throttle and isinstance(throttle, integer_types + (float, )): self.throttle = throttle else: self.throttle = False self.parser: Optional[pd.TextReader] = None self.csv_kwds = csv_kwds self._compression: Any = csv_kwds.get("compression", "infer") csv_kwds["compression"] = None self._encoding: Any = csv_kwds.get("encoding", None) csv_kwds["encoding"] = None self._nrows = csv_kwds.get("nrows") csv_kwds["nrows"] = None # nrows clashes with chunksize self._rows_read = 0 if filter_ is not None and not callable(filter_): raise ProgressiveError( "filter parameter should be callable or None") self._filter: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = filter_ self._input_stream: Optional[ io. IOBase] = None # stream that returns a position through the 'tell()' method self._input_encoding: Optional[str] = None self._input_compression: Optional[str] = None self._input_size = 0 # length of the file or input stream when available self._file_mode = False self._table_params: Dict[str, Any] = dict(name=self.name, fillvalues=fillvalues)
def __init__(self, **kwds: Any) -> None: super(BinJoin, self).__init__(**kwds) self.join_kwds = filter_kwds(kwds, join) self._dialog = Dialog(self)
def __init__(self, **kwds: Any) -> None: """Join(on=None, how='left', lsuffix='', rsuffix='', sort=False,name=None) """ super(Join, self).__init__(**kwds) self.join_kwds = filter_kwds(kwds, join)
def __init__(self, **kwds: Any) -> None: super(Paste, self).__init__(**kwds) self.join_kwds = filter_kwds(kwds, join)