Example #1
0
    def test_expand_user(self):
        filename = '~/sometest'
        expanded_name = common._expand_user(filename)

        self.assertNotEqual(expanded_name, filename)
        self.assertTrue(isabs(expanded_name))
        self.assertEqual(os.path.expanduser(filename), expanded_name)
Example #2
0
    def test_expand_user(self):
        filename = "~/sometest"
        expanded_name = icom._expand_user(filename)

        assert expanded_name != filename
        assert os.path.isabs(expanded_name)
        assert os.path.expanduser(filename) == expanded_name
Example #3
0
    def test_expand_user(self):
        filename = '~/sometest'
        expanded_name = icom._expand_user(filename)

        assert expanded_name != filename
        assert os.path.isabs(expanded_name)
        assert os.path.expanduser(filename) == expanded_name
Example #4
0
    def test_expand_user(self):
        filename = '~/sometest'
        expanded_name = common._expand_user(filename)

        self.assertNotEqual(expanded_name, filename)
        assert isabs(expanded_name)
        assert os.path.expanduser(filename) == expanded_name
Example #5
0
    def test_expand_user(self):
        filename = '~/sometest'
        expanded_name = common._expand_user(filename)

        assert expanded_name != filename
        assert isabs(expanded_name)
        assert os.path.expanduser(filename) == expanded_name
Example #6
0
    def test_expand_user_normal_path(self):
        filename = '/somefolder/sometest'
        expanded_name = common._expand_user(filename)

        self.assertEqual(expanded_name, filename)
        self.assertNotIn('~', expanded_name)
        self.assertEqual(os.path.expanduser(filename), expanded_name)
Example #7
0
    def read(
        self, path, columns=None, storage_options: StorageOptions = None, **kwargs
    ):
        if is_fsspec_url(path) and "filesystem" not in kwargs:
            import_optional_dependency("fsspec")
            import fsspec.core

            fs, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
            should_close = False
        else:
            if storage_options:
                raise ValueError(
                    "storage_options passed with buffer or non-fsspec filepath"
                )
            fs = kwargs.pop("filesystem", None)
            should_close = False
            path = _expand_user(path)

        if not fs:
            path, _, _, should_close = get_filepath_or_buffer(path)

        kwargs["use_pandas_metadata"] = True
        result = self.api.parquet.read_table(
            path, columns=columns, filesystem=fs, **kwargs
        ).to_pandas()
        if should_close:
            path.close()

        return result
Example #8
0
    def test_expand_user_normal_path(self):
        filename = '/somefolder/sometest'
        expanded_name = common._expand_user(filename)

        self.assertEqual(expanded_name, filename)
        self.assertNotIn('~', expanded_name)
        self.assertEqual(os.path.expanduser(filename), expanded_name)
Example #9
0
    def write(
        self,
        df: DataFrame,
        path: FilePathOrBuffer[AnyStr],
        compression: Optional[str] = "snappy",
        index: Optional[bool] = None,
        storage_options: StorageOptions = None,
        partition_cols: Optional[List[str]] = None,
        **kwargs,
    ):
        self.validate_dataframe(df)

        from_pandas_kwargs: Dict[str, Any] = {
            "schema": kwargs.pop("schema", None)
        }
        if index is not None:
            from_pandas_kwargs["preserve_index"] = index

        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)

        if is_fsspec_url(path) and "filesystem" not in kwargs:
            # make fsspec instance, which pyarrow will use to open paths
            import_optional_dependency("fsspec")
            import fsspec.core

            fs, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
            kwargs["filesystem"] = fs
        else:
            if storage_options:
                raise ValueError(
                    "storage_options passed with file object or non-fsspec file path"
                )
            path = _expand_user(path)
        if partition_cols is not None:
            # writes to multiple files under the given path
            self.api.parquet.write_to_dataset(
                table,
                path,
                compression=compression,
                partition_cols=partition_cols,
                **kwargs,
            )
        else:
            # write to single output file
            self.api.parquet.write_table(table,
                                         path,
                                         compression=compression,
                                         **kwargs)
Example #10
0
    def write(
        self,
        df: DataFrame,
        path,
        compression="snappy",
        index: Optional[bool] = None,
        partition_cols=None,
        **kwargs,
    ):
        self.validate_dataframe(df)

        from_pandas_kwargs: Dict[str, Any] = {
            "schema": kwargs.pop("schema", None)
        }
        if index is not None:
            from_pandas_kwargs["preserve_index"] = index

        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)

        if is_fsspec_url(path) and "filesystem" not in kwargs:
            # make fsspec instance, which pyarrow will use to open paths
            import_optional_dependency("fsspec")
            import fsspec.core

            fs, path = fsspec.core.url_to_fs(path)
            kwargs["filesystem"] = fs
        else:
            path = _expand_user(path)
        if partition_cols is not None:
            # writes to multiple files under the given path
            self.api.parquet.write_to_dataset(
                table,
                path,
                compression=compression,
                partition_cols=partition_cols,
                **kwargs,
            )
        else:
            # write to single output file
            self.api.parquet.write_table(table,
                                         path,
                                         compression=compression,
                                         **kwargs)
Example #11
0
    def test_expand_user_normal_path(self):
        filename = "/somefolder/sometest"
        expanded_name = icom._expand_user(filename)

        assert expanded_name == filename
        assert os.path.expanduser(filename) == expanded_name
Example #12
0
    def __init__(self,
                 obj,
                 path_or_buf=None,
                 sep=",",
                 na_rep='',
                 float_format=None,
                 cols=None,
                 header=True,
                 index=True,
                 index_label=None,
                 mode='w',
                 nanRep=None,
                 encoding=None,
                 compression=None,
                 quoting=None,
                 line_terminator='\n',
                 chunksize=None,
                 tupleize_cols=False,
                 quotechar='"',
                 date_format=None,
                 doublequote=True,
                 escapechar=None,
                 decimal='.'):

        self.obj = obj

        if path_or_buf is None:
            path_or_buf = StringIO()

        self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
        self.sep = sep
        self.na_rep = na_rep
        self.float_format = float_format
        self.decimal = decimal

        self.header = header
        self.index = index
        self.index_label = index_label
        self.mode = mode
        self.encoding = encoding
        self.compression = compression

        if quoting is None:
            quoting = csvlib.QUOTE_MINIMAL
        self.quoting = quoting

        if quoting == csvlib.QUOTE_NONE:
            # prevents crash in _csv
            quotechar = None
        self.quotechar = quotechar

        self.doublequote = doublequote
        self.escapechar = escapechar

        self.line_terminator = line_terminator

        self.date_format = date_format

        self.tupleize_cols = tupleize_cols
        self.has_mi_columns = (isinstance(obj.columns, MultiIndex)
                               and not self.tupleize_cols)

        # validate mi options
        if self.has_mi_columns:
            if cols is not None:
                raise TypeError("cannot specify cols with a MultiIndex on the "
                                "columns")

        if cols is not None:
            if isinstance(cols, Index):
                cols = cols.to_native_types(na_rep=na_rep,
                                            float_format=float_format,
                                            date_format=date_format,
                                            quoting=self.quoting)
            else:
                cols = list(cols)
            self.obj = self.obj.loc[:, cols]

        # update columns to include possible multiplicity of dupes
        # and make sure sure cols is just a list of labels
        cols = self.obj.columns
        if isinstance(cols, Index):
            cols = cols.to_native_types(na_rep=na_rep,
                                        float_format=float_format,
                                        date_format=date_format,
                                        quoting=self.quoting)
        else:
            cols = list(cols)

        # save it
        self.cols = cols

        # preallocate data 2d list
        self.blocks = self.obj._data.blocks
        ncols = sum(b.shape[0] for b in self.blocks)
        self.data = [None] * ncols

        if chunksize is None:
            chunksize = (100000 // (len(self.cols) or 1)) or 1
        self.chunksize = int(chunksize)

        self.data_index = obj.index
        if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex))
                and date_format is not None):
            self.data_index = Index([
                x.strftime(date_format) if notna(x) else ''
                for x in self.data_index
            ])

        self.nlevels = getattr(self.data_index, 'nlevels', 1)
        if not index:
            self.nlevels = 0
Example #13
0
    def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
                 float_format=None, cols=None, header=True, index=True,
                 index_label=None, mode='w', nanRep=None, encoding=None,
                 compression=None, quoting=None, line_terminator='\n',
                 chunksize=None, tupleize_cols=False, quotechar='"',
                 date_format=None, doublequote=True, escapechar=None,
                 decimal='.'):

        self.obj = obj

        if path_or_buf is None:
            path_or_buf = StringIO()

        self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
        self.sep = sep
        self.na_rep = na_rep
        self.float_format = float_format
        self.decimal = decimal

        self.header = header
        self.index = index
        self.index_label = index_label
        self.mode = mode
        self.encoding = encoding
        self.compression = compression

        if quoting is None:
            quoting = csvlib.QUOTE_MINIMAL
        self.quoting = quoting

        if quoting == csvlib.QUOTE_NONE:
            # prevents crash in _csv
            quotechar = None
        self.quotechar = quotechar

        self.doublequote = doublequote
        self.escapechar = escapechar

        self.line_terminator = line_terminator

        self.date_format = date_format

        self.tupleize_cols = tupleize_cols
        self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
                               not self.tupleize_cols)

        # validate mi options
        if self.has_mi_columns:
            if cols is not None:
                raise TypeError("cannot specify cols with a MultiIndex on the "
                                "columns")

        if cols is not None:
            if isinstance(cols, Index):
                cols = cols.to_native_types(na_rep=na_rep,
                                            float_format=float_format,
                                            date_format=date_format,
                                            quoting=self.quoting)
            else:
                cols = list(cols)
            self.obj = self.obj.loc[:, cols]

        # update columns to include possible multiplicity of dupes
        # and make sure sure cols is just a list of labels
        cols = self.obj.columns
        if isinstance(cols, Index):
            cols = cols.to_native_types(na_rep=na_rep,
                                        float_format=float_format,
                                        date_format=date_format,
                                        quoting=self.quoting)
        else:
            cols = list(cols)

        # save it
        self.cols = cols

        # preallocate data 2d list
        self.blocks = self.obj._data.blocks
        ncols = sum(b.shape[0] for b in self.blocks)
        self.data = [None] * ncols

        if chunksize is None:
            chunksize = (100000 // (len(self.cols) or 1)) or 1
        self.chunksize = int(chunksize)

        self.data_index = obj.index
        if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and
                date_format is not None):
            self.data_index = Index([x.strftime(date_format) if notna(x) else
                                     '' for x in self.data_index])

        self.nlevels = getattr(self.data_index, 'nlevels', 1)
        if not index:
            self.nlevels = 0
Example #14
0
    def test_expand_user_normal_path(self):
        filename = '/somefolder/sometest'
        expanded_name = icom._expand_user(filename)

        assert expanded_name == filename
        assert os.path.expanduser(filename) == expanded_name
Example #15
0
    def to_string(
        self,
        buf=None,
        na_rep="NaN",
        float_format=None,
        header=True,
        index=True,
        length=False,
        dtype=False,
        name=False,
        max_rows=None,
        min_rows=None,
    ) -> Optional[str]:
        """
        Render a string representation of the Series.

        Follows pandas implementation except when ``max_rows=None``. In this scenario, we set ``max_rows={0}`` to avoid
        accidentally dumping an entire index. This can be overridden by explicitly setting ``max_rows``.

        See Also
        --------
        :pandas_api_docs:`pandas.Series.to_string`
            for argument details.
        """
        # In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
        # by limiting rows by default.
        num_rows = len(self)  # avoid multiple calls
        if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED:
            if max_rows is None:
                max_rows = num_rows
            else:
                max_rows = min(num_rows, max_rows)
        elif max_rows is None:
            warnings.warn(
                f"Series.to_string called without max_rows set "
                f"- this will return entire index results. "
                f"Setting max_rows={DEFAULT_NUM_ROWS_DISPLAYED}"
                f" overwrite if different behaviour is required.",
                UserWarning,
            )
            max_rows = DEFAULT_NUM_ROWS_DISPLAYED

        # because of the way pandas handles max_rows=0, not having this throws an error
        # see eland issue #56
        if max_rows == 0:
            max_rows = 1

        # Create a slightly bigger dataframe than display
        temp_series = self._build_repr(max_rows + 1)

        if buf is not None:
            _buf = _expand_user(stringify_path(buf))
        else:
            _buf = StringIO()

        if num_rows == 0:
            # Empty series are rendered differently than
            # series with items. We can luckily use our
            # example series in this case.
            temp_series.head(0).to_string(
                buf=_buf,
                na_rep=na_rep,
                float_format=float_format,
                header=header,
                index=index,
                length=length,
                dtype=dtype,
                name=name,
                max_rows=max_rows,
            )
        else:
            # Create repr of fake series without name, length, dtype summary
            temp_series.to_string(
                buf=_buf,
                na_rep=na_rep,
                float_format=float_format,
                header=header,
                index=index,
                length=False,
                dtype=False,
                name=False,
                max_rows=max_rows,
            )

            # Create the summary
            footer = []
            if name and self.name is not None:
                footer.append(f"Name: {self.name}")
            if length and len(self) > max_rows:
                footer.append(f"Length: {len(self.index)}")
            if dtype:
                footer.append(f"dtype: {temp_series.dtype}")

            if footer:
                _buf.write(f"\n{', '.join(footer)}")

        if buf is None:
            result = _buf.getvalue()
            return result
Example #16
0
    def test_expand_user_normal_path(self):
        filename = '/somefolder/sometest'
        expanded_name = common._expand_user(filename)

        assert expanded_name == filename
        assert os.path.expanduser(filename) == expanded_name