def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) self.assertNotEqual(expanded_name, filename) self.assertTrue(isabs(expanded_name)) self.assertEqual(os.path.expanduser(filename), expanded_name)
def test_expand_user(self): filename = "~/sometest" expanded_name = icom._expand_user(filename) assert expanded_name != filename assert os.path.isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name
def test_expand_user(self): filename = '~/sometest' expanded_name = icom._expand_user(filename) assert expanded_name != filename assert os.path.isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name
def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) self.assertNotEqual(expanded_name, filename) assert isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name
def test_expand_user(self): filename = '~/sometest' expanded_name = common._expand_user(filename) assert expanded_name != filename assert isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name
def test_expand_user_normal_path(self): filename = '/somefolder/sometest' expanded_name = common._expand_user(filename) self.assertEqual(expanded_name, filename) self.assertNotIn('~', expanded_name) self.assertEqual(os.path.expanduser(filename), expanded_name)
def read( self, path, columns=None, storage_options: StorageOptions = None, **kwargs ): if is_fsspec_url(path) and "filesystem" not in kwargs: import_optional_dependency("fsspec") import fsspec.core fs, path = fsspec.core.url_to_fs(path, **(storage_options or {})) should_close = False else: if storage_options: raise ValueError( "storage_options passed with buffer or non-fsspec filepath" ) fs = kwargs.pop("filesystem", None) should_close = False path = _expand_user(path) if not fs: path, _, _, should_close = get_filepath_or_buffer(path) kwargs["use_pandas_metadata"] = True result = self.api.parquet.read_table( path, columns=columns, filesystem=fs, **kwargs ).to_pandas() if should_close: path.close() return result
def write( self, df: DataFrame, path: FilePathOrBuffer[AnyStr], compression: Optional[str] = "snappy", index: Optional[bool] = None, storage_options: StorageOptions = None, partition_cols: Optional[List[str]] = None, **kwargs, ): self.validate_dataframe(df) from_pandas_kwargs: Dict[str, Any] = { "schema": kwargs.pop("schema", None) } if index is not None: from_pandas_kwargs["preserve_index"] = index table = self.api.Table.from_pandas(df, **from_pandas_kwargs) if is_fsspec_url(path) and "filesystem" not in kwargs: # make fsspec instance, which pyarrow will use to open paths import_optional_dependency("fsspec") import fsspec.core fs, path = fsspec.core.url_to_fs(path, **(storage_options or {})) kwargs["filesystem"] = fs else: if storage_options: raise ValueError( "storage_options passed with file object or non-fsspec file path" ) path = _expand_user(path) if partition_cols is not None: # writes to multiple files under the given path self.api.parquet.write_to_dataset( table, path, compression=compression, partition_cols=partition_cols, **kwargs, ) else: # write to single output file self.api.parquet.write_table(table, path, compression=compression, **kwargs)
def write( self, df: DataFrame, path, compression="snappy", index: Optional[bool] = None, partition_cols=None, **kwargs, ): self.validate_dataframe(df) from_pandas_kwargs: Dict[str, Any] = { "schema": kwargs.pop("schema", None) } if index is not None: from_pandas_kwargs["preserve_index"] = index table = self.api.Table.from_pandas(df, **from_pandas_kwargs) if is_fsspec_url(path) and "filesystem" not in kwargs: # make fsspec instance, which pyarrow will use to open paths import_optional_dependency("fsspec") import fsspec.core fs, path = fsspec.core.url_to_fs(path) kwargs["filesystem"] = fs else: path = _expand_user(path) if partition_cols is not None: # writes to multiple files under the given path self.api.parquet.write_to_dataset( table, path, compression=compression, partition_cols=partition_cols, **kwargs, ) else: # write to single output file self.api.parquet.write_table(table, path, compression=compression, **kwargs)
def test_expand_user_normal_path(self): filename = "/somefolder/sometest" expanded_name = icom._expand_user(filename) assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name
def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, compression=None, quoting=None, line_terminator='\n', chunksize=None, tupleize_cols=False, quotechar='"', date_format=None, doublequote=True, escapechar=None, decimal='.'): self.obj = obj if path_or_buf is None: path_or_buf = StringIO() self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) self.sep = sep self.na_rep = na_rep self.float_format = float_format self.decimal = decimal self.header = header self.index = index self.index_label = index_label self.mode = mode self.encoding = encoding self.compression = compression if quoting is None: quoting = csvlib.QUOTE_MINIMAL self.quoting = quoting if quoting == csvlib.QUOTE_NONE: # prevents crash in _csv quotechar = None self.quotechar = quotechar self.doublequote = doublequote self.escapechar = escapechar self.line_terminator = line_terminator self.date_format = date_format self.tupleize_cols = tupleize_cols self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and not self.tupleize_cols) # validate mi options if self.has_mi_columns: if cols is not None: raise TypeError("cannot specify cols with a MultiIndex on the " "columns") if cols is not None: if isinstance(cols, Index): cols = cols.to_native_types(na_rep=na_rep, float_format=float_format, date_format=date_format, quoting=self.quoting) else: cols = list(cols) self.obj = self.obj.loc[:, cols] # update columns to include possible multiplicity of dupes # and make sure sure cols is just a list of labels cols = self.obj.columns if isinstance(cols, Index): cols = cols.to_native_types(na_rep=na_rep, float_format=float_format, date_format=date_format, quoting=self.quoting) else: cols = list(cols) # save it self.cols = cols # preallocate data 2d list self.blocks = self.obj._data.blocks ncols = sum(b.shape[0] for b in self.blocks) self.data = [None] * ncols if chunksize is None: chunksize = (100000 // (len(self.cols) or 1)) or 1 self.chunksize = int(chunksize) self.data_index = obj.index if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): self.data_index = Index([ x.strftime(date_format) if notna(x) else '' for x in self.data_index ]) self.nlevels = getattr(self.data_index, 'nlevels', 1) if not index: self.nlevels = 0
def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, compression=None, quoting=None, line_terminator='\n', chunksize=None, tupleize_cols=False, quotechar='"', date_format=None, doublequote=True, escapechar=None, decimal='.'): self.obj = obj if path_or_buf is None: path_or_buf = StringIO() self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) self.sep = sep self.na_rep = na_rep self.float_format = float_format self.decimal = decimal self.header = header self.index = index self.index_label = index_label self.mode = mode self.encoding = encoding self.compression = compression if quoting is None: quoting = csvlib.QUOTE_MINIMAL self.quoting = quoting if quoting == csvlib.QUOTE_NONE: # prevents crash in _csv quotechar = None self.quotechar = quotechar self.doublequote = doublequote self.escapechar = escapechar self.line_terminator = line_terminator self.date_format = date_format self.tupleize_cols = tupleize_cols self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and not self.tupleize_cols) # validate mi options if self.has_mi_columns: if cols is not None: raise TypeError("cannot specify cols with a MultiIndex on the " "columns") if cols is not None: if isinstance(cols, Index): cols = cols.to_native_types(na_rep=na_rep, float_format=float_format, date_format=date_format, quoting=self.quoting) else: cols = list(cols) self.obj = self.obj.loc[:, cols] # update columns to include possible multiplicity of dupes # and make sure sure cols is just a list of labels cols = self.obj.columns if isinstance(cols, Index): cols = cols.to_native_types(na_rep=na_rep, float_format=float_format, date_format=date_format, quoting=self.quoting) else: cols = list(cols) # save it self.cols = cols # preallocate data 2d list self.blocks = self.obj._data.blocks ncols = sum(b.shape[0] for b in self.blocks) self.data = [None] * ncols if chunksize is None: chunksize = (100000 // (len(self.cols) or 1)) or 1 self.chunksize = int(chunksize) self.data_index = obj.index if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): self.data_index = Index([x.strftime(date_format) if notna(x) else '' for x in self.data_index]) self.nlevels = getattr(self.data_index, 'nlevels', 1) if not index: self.nlevels = 0
def test_expand_user_normal_path(self): filename = '/somefolder/sometest' expanded_name = icom._expand_user(filename) assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name
def to_string( self, buf=None, na_rep="NaN", float_format=None, header=True, index=True, length=False, dtype=False, name=False, max_rows=None, min_rows=None, ) -> Optional[str]: """ Render a string representation of the Series. Follows pandas implementation except when ``max_rows=None``. In this scenario, we set ``max_rows={0}`` to avoid accidentally dumping an entire index. This can be overridden by explicitly setting ``max_rows``. See Also -------- :pandas_api_docs:`pandas.Series.to_string` for argument details. """ # In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this # by limiting rows by default. num_rows = len(self) # avoid multiple calls if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED: if max_rows is None: max_rows = num_rows else: max_rows = min(num_rows, max_rows) elif max_rows is None: warnings.warn( f"Series.to_string called without max_rows set " f"- this will return entire index results. " f"Setting max_rows={DEFAULT_NUM_ROWS_DISPLAYED}" f" overwrite if different behaviour is required.", UserWarning, ) max_rows = DEFAULT_NUM_ROWS_DISPLAYED # because of the way pandas handles max_rows=0, not having this throws an error # see eland issue #56 if max_rows == 0: max_rows = 1 # Create a slightly bigger dataframe than display temp_series = self._build_repr(max_rows + 1) if buf is not None: _buf = _expand_user(stringify_path(buf)) else: _buf = StringIO() if num_rows == 0: # Empty series are rendered differently than # series with items. We can luckily use our # example series in this case. temp_series.head(0).to_string( buf=_buf, na_rep=na_rep, float_format=float_format, header=header, index=index, length=length, dtype=dtype, name=name, max_rows=max_rows, ) else: # Create repr of fake series without name, length, dtype summary temp_series.to_string( buf=_buf, na_rep=na_rep, float_format=float_format, header=header, index=index, length=False, dtype=False, name=False, max_rows=max_rows, ) # Create the summary footer = [] if name and self.name is not None: footer.append(f"Name: {self.name}") if length and len(self) > max_rows: footer.append(f"Length: {len(self.index)}") if dtype: footer.append(f"dtype: {temp_series.dtype}") if footer: _buf.write(f"\n{', '.join(footer)}") if buf is None: result = _buf.getvalue() return result
def test_expand_user_normal_path(self): filename = '/somefolder/sometest' expanded_name = common._expand_user(filename) assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name