def test_open_fh_compression(decompress, compression): buf = io.BytesIO() with sh.open(buf, "w", compression=compression) as fh: fh.write("hello world") assert decompress(buf.getvalue()) == b"hello world" buf.seek(0) with sh.open(buf, "r", compression=compression) as fh: assert fh.read() == "hello world"
def test_open_binary(tmpdir, openfunc, ext, compression): fname = '%s/test_open%s' % (tmpdir, ext) with sh.open(fname, 'wb', compression=compression) as fh: fh.write(b"Hello world") with openfunc(fname, 'rb') as fh: assert fh.read() == b"Hello world" with sh.open(fname, 'ab', compression=compression) as fh: fh.write(b" and universe") with sh.open(fname, 'rb', compression=compression) as fh: assert fh.read() == b"Hello world and universe"
def test_open_binary(str_or_path, tmpdir, openfunc, ext, compression): fname = str_or_path(f"{tmpdir}/test_open{ext}") with sh.open(fname, "wb", compression=compression) as fh: fh.write(b"Hello world") with openfunc(fname, "rb") as fh: assert fh.read() == b"Hello world" with sh.open(fname, "ab", compression=compression) as fh: fh.write(b" and universe") with sh.open(fname, "rb", compression=compression) as fh: assert fh.read() == b"Hello world and universe"
def test_open_context(tmpdir, openfunc, ext, compression): os.environ['UNITTEST_BASH'] = str(tmpdir) fname = '%s/test_open%s' % (tmpdir, ext) fname_env = '$UNITTEST_BASH/test_open%s' % ext with sh.open(fname_env, 'w', compression=compression) as fh: fh.write("Hello world") check_fd_was_closed('test_open') with openfunc(fname, 'rt') as fh: assert fh.read() == "Hello world" with sh.open(fname_env, 'a', compression=compression) as fh: fh.write(" and universe") check_fd_was_closed('test_open') with sh.open(fname_env, 'r', compression=compression) as fh: assert fh.read() == "Hello world and universe" check_fd_was_closed('test_open')
def test_open_context(str_or_path, tmpdir, openfunc, ext, compression): os.environ["UNITTEST_BASH"] = str(tmpdir) fname = f"{tmpdir}/test_open{ext}" fname_env = str_or_path(f"$UNITTEST_BASH/test_open{ext}") with sh.open(fname_env, "w", compression=compression) as fh: fh.write("Hello world") check_fd_was_closed("test_open") with openfunc(fname, "rt") as fh: assert fh.read() == "Hello world" with sh.open(fname_env, "a", compression=compression) as fh: fh.write(" and universe") check_fd_was_closed("test_open") with sh.open(fname_env, "r", compression=compression) as fh: assert fh.read() == "Hello world and universe" check_fd_was_closed("test_open")
def test_open_exclusive_success(str_or_path, tmpdir, openfunc, ext, compression): fname = str_or_path(f"{tmpdir}/test_open{ext}") with sh.open(fname, "x", compression=compression) as fh: fh.write("Hello world") with openfunc(fname, "rt") as fh: assert fh.read() == "Hello world"
def test_open_kwargs(tmpdir, openfunc, ext, compression, newline): # **kwargs are passed verbatim to the underlying function fname = f"{tmpdir}/test_open{ext}" with sh.open(fname, "w", compression=compression, newline=newline) as fh: fh.write("Hello\nworld") with openfunc(fname, "rb") as fh: assert fh.read() == b"Hello" + newline.encode("utf8") + b"world"
def test_open_nocontext(tmpdir, openfunc, ext, compression): fname = '%s/test_open%s' % (tmpdir, ext) fh = sh.open(fname, 'w', compression=compression) fh.write("Hello world") fh.close() check_fd_was_closed('test_open') with openfunc(fname, 'rt') as fh: assert fh.read() == "Hello world"
def test_open_kwargs(tmpdir, openfunc, ext, compression, newline): # **kwargs are passed verbatim to the underlying function fname = '%s/test_open%s' % (tmpdir, ext) with sh.open(fname, 'w', compression=compression, newline=newline) as fh: fh.write("Hello\nworld") with openfunc(fname, 'rb') as fh: assert fh.read() == b"Hello" + newline.encode('utf8') + b"world"
def test_open_nocontext(str_or_path, tmpdir, openfunc, ext, compression): fname = str_or_path(f"{tmpdir}/test_open{ext}") fh = sh.open(fname, "w", compression=compression) fh.write("Hello world") fh.close() check_fd_was_closed("test_open") with openfunc(fname, "rt") as fh: assert fh.read() == "Hello world"
def test_concatenate_t4(str_or_path, tmpdir): # Output file does not already exist out = str_or_path(f"{tmpdir}/out") in1 = str_or_path(f"{tmpdir}/in1") in2 = str_or_path(f"{tmpdir}/in2") with sh.open(in1, "w") as fh: fh.write("2") with sh.open(in2, "w") as fh: fh.write("3") sh.concatenate([in1, in2], out, "a") with sh.open(out) as fh: assert fh.read() == "2\n3\n" sh.concatenate([in1, in2], out) with sh.open(out) as fh: assert fh.read() == "2\n3\n"
def test_concatenate_t4(tmpdir): # Output file does not already exist out = '%s/out' % tmpdir in1 = '%s/in1' % tmpdir in2 = '%s/in2' % tmpdir with sh.open(in1, 'w') as fh: fh.write('2') with sh.open(in2, 'w') as fh: fh.write('3') sh.concatenate([in1, in2], out, 'a') with sh.open(out) as fh: assert fh.read() == '2\n3\n' sh.concatenate([in1, in2], out) with sh.open(out) as fh: assert fh.read() == '2\n3\n'
def test_concatenate_b(str_or_path, tmpdir): # Binary mode out = str_or_path(f"{tmpdir}/out") in1 = str_or_path(f"{tmpdir}/in1") in2 = str_or_path(f"{tmpdir}/in2") with sh.open(out, "wb") as fh: fh.write(b"1") with sh.open(in1, "wb") as fh: fh.write(b"2") with sh.open(in2, "wb") as fh: fh.write(b"3") sh.concatenate([in1, in2], out, "ab") with sh.open(out, "rb") as fh: assert fh.read() == b"123" sh.concatenate([in1, in2], out, "wb") with sh.open(out, "rb") as fh: assert fh.read() == b"23"
def test_concatenate_t3(tmpdir): # Output file already exists and it is empty out = '%s/out' % tmpdir in1 = '%s/in1' % tmpdir in2 = '%s/in2' % tmpdir with sh.open(out, 'w') as fh: pass with sh.open(in1, 'w') as fh: fh.write('2\n') with sh.open(in2, 'w') as fh: fh.write('3\n') sh.concatenate([in1, in2], out, 'a') with sh.open(out) as fh: assert fh.read() == '2\n3\n' sh.concatenate([in1, in2], out) with sh.open(out) as fh: assert fh.read() == '2\n3\n'
def test_concatenate_b(tmpdir): # Binary mode out = '%s/out' % tmpdir in1 = '%s/in1' % tmpdir in2 = '%s/in2' % tmpdir with sh.open(out, 'wb') as fh: fh.write(b'1') with sh.open(in1, 'wb') as fh: fh.write(b'2') with sh.open(in2, 'wb') as fh: fh.write(b'3') sh.concatenate([in1, in2], out, 'ab') with sh.open(out, 'rb') as fh: assert fh.read() == b'123' sh.concatenate([in1, in2], out, 'wb') with sh.open(out, 'rb') as fh: assert fh.read() == b'23'
def test_concatenate_t3(str_or_path, tmpdir): # Output file already exists and it is empty out = str_or_path(f"{tmpdir}/out") in1 = str_or_path(f"{tmpdir}/in1") in2 = str_or_path(f"{tmpdir}/in2") with sh.open(out, "w") as fh: pass with sh.open(in1, "w") as fh: fh.write("2\n") with sh.open(in2, "w") as fh: fh.write("3\n") sh.concatenate([in1, in2], out, "a") with sh.open(out) as fh: assert fh.read() == "2\n3\n" sh.concatenate([in1, in2], out) with sh.open(out) as fh: assert fh.read() == "2\n3\n"
def test_concatenate_t2(str_or_path, tmpdir, newline): # Output file already exists and is non-empty. Files end with a newline. out = str_or_path(f"{tmpdir}/out") in1 = str_or_path(f"{tmpdir}/in1") in2 = str_or_path(f"{tmpdir}/in2") with sh.open(out, "w", newline=newline) as fh: fh.write("1\n") with sh.open(in1, "w", newline=newline) as fh: fh.write("2\n3\n") with sh.open(in2, "w", newline=newline) as fh: fh.write("4\n") n = newline.encode("utf-8") sh.concatenate([in1, in2], out, "a", newline=newline) with sh.open(out, "rb") as fh: assert fh.read() == b"1" + n + b"2" + n + b"3" + n + b"4" + n sh.concatenate([in1, in2], out, newline=newline) with sh.open(out, "rb") as fh: assert fh.read() == b"2" + n + b"3" + n + b"4" + n
def test_concatenate_t2(tmpdir, newline): # Output file already exists and is non-empty. Files end with a newline. out = '%s/out' % tmpdir in1 = '%s/in1' % tmpdir in2 = '%s/in2' % tmpdir with sh.open(out, 'w', newline=newline) as fh: fh.write('1\n') with sh.open(in1, 'w', newline=newline) as fh: fh.write('2\n3\n') with sh.open(in2, 'w', newline=newline) as fh: fh.write('4\n') n = newline.encode('utf-8') sh.concatenate([in1, in2], out, 'a', newline=newline) with sh.open(out, 'rb') as fh: assert fh.read() == b'1' + n + b'2' + n + b'3' + n + b'4' + n sh.concatenate([in1, in2], out, newline=newline) with sh.open(out, 'rb') as fh: assert fh.read() == b'2' + n + b'3' + n + b'4' + n
def read_csv(path_or_buf, unstack=True): """Parse an NDCSV file into a :class:`xarray.DataArray`. This function is conceptually similar to :func:`pandas.read_csv`, except that it only works for files that are strictly formatted according to :doc:`format` and, by design, does not offer any of the many config switches available in :func:`pandas.read_csv`. :param path_or_buf: One of: - .csv file path - .csv.gz / .csv.bz2 / .csv.xz file path (the compression algorithm is inferred automatically) - file-like object open for reading. It must support rewinding through ``seek(0)``. :param bool unstack: Set to True (the default) to automatically unstack any and all stacked dimensions in the output xarray, using first-seen order. Note that this differs from :meth:`xarray.DataArray.unstack`, which may occasionally use alphabetical order instead. All indices must be unique for the unstack to succeed. Non-index coords can be duplicated. Set to False to return the stacked dimensions as they appear in the CSV file. :returns: xarray.DataArray """ if isinstance(path_or_buf, str): with sh.open(path_or_buf) as fh: return read_csv(fh, unstack=unstack) xa = _buf_to_xarray(path_or_buf) assert xa.ndim in (0, 1, 2) # print("==== _buf_to_array:\n%s" % xa) xa = _coords_format_conversion(xa) assert xa.ndim in (0, 1, 2) # print("==== _coords_format_conversion:\n%s" % xa) if xa.ndim == 1: xa = _unpack(xa, xa.dims[0], unstack) # print("==== _unpack(dim_0):\n%s" % xa) elif xa.ndim == 2: dims = xa.dims xa = _unpack(xa, dims[0], unstack) # print("==== _unpack(dim_0):\n%s" % xa) xa = _unpack(xa, dims[1], unstack) # print("==== _unpack(dim_1):\n%s" % xa) return xa
def test_concatenate_t1(str_or_path, tmpdir, newline): # Output file already exists and is non-empty. Files end without a newline. # Test compression. out = str_or_path(f"{tmpdir}/out.gz") in1 = str_or_path(f"{tmpdir}/in1") in2 = str_or_path(f"{tmpdir}/in2.bz2") with sh.open(out, "w") as fh: fh.write("1") with sh.open(in1, "w") as fh: fh.write("2\n3") with sh.open(in2, "w") as fh: fh.write("4") n = newline.encode("utf-8") sh.concatenate([in1, in2], out, "a", newline=newline) with sh.open(out, "rb") as fh: assert fh.read() == b"1" + n + b"2" + n + b"3" + n + b"4" + n # Defaults to mode='w' sh.concatenate([in1, in2], out, newline=newline) with sh.open(out, "rb") as fh: assert fh.read() == b"2" + n + b"3" + n + b"4" + n
def test_concatenate_t1(tmpdir, newline): # Output file already exists and is non-empty. Files end without a newline. # Test compression. out = '%s/out.gz' % tmpdir in1 = '%s/in1' % tmpdir in2 = '%s/in2.bz2' % tmpdir with sh.open(out, 'w') as fh: fh.write('1') with sh.open(in1, 'w') as fh: fh.write('2\n3') with sh.open(in2, 'w') as fh: fh.write('4') n = newline.encode('utf-8') sh.concatenate([in1, in2], out, 'a', newline=newline) with sh.open(out, 'rb') as fh: assert fh.read() == b'1' + n + b'2' + n + b'3' + n + b'4' + n # Defaults to mode='w' sh.concatenate([in1, in2], out, newline=newline) with sh.open(out, 'rb') as fh: assert fh.read() == b'2' + n + b'3' + n + b'4' + n
def write_csv( array: xarray.DataArray | pandas.Series | pandas.DataFrame, path_or_buf: str | IO | None = None, ): """Write an n-dimensional array to an NDCSV file. Any number of dimensions are supported. If the array has more than two dimensions, all dimensions beyond the first are automatically stacked together on the columns of the CSV file; if you want to stack dimensions on the rows you'll need to manually invoke :meth:`xarray.DataArray.stack` beforehand. This function is conceptually similar to :meth:`pandas.DataFrame.to_csv`, except that none of the many configuration settings is made available to the end user, in order to ensure consistency in the output file. :param array: One of: - :class:`xarray.DataArray` - :class:`pandas.Series` - :class:`pandas.DataFrame` :param path_or_buf: One of: - .csv file path - .csv.gz / .csv.bz2 / .csv.xz file path (the compression algorithm is inferred automatically) - file-like object open for writing - None (the result is returned as a string) """ if path_or_buf is None: buf = io.StringIO() write_csv(array, buf) return buf.getvalue() if isinstance(path_or_buf, str): # Automatically detect .csv or .csv.gz extension with sh.open(path_or_buf, "w") as fh: write_csv(array, fh) return if isinstance(array, xarray.DataArray): _write_csv_dataarray(array, path_or_buf) elif isinstance(array, (pandas.Series, pandas.DataFrame)): _write_csv_pandas(array, path_or_buf) else: raise TypeError( "Input data is not a xarray.DataArray, pandas.Series or pandas.DataFrame" )
def test_open_encoding(tmpdir, openfunc, ext, compression): TEXT = "Crème brûlée" TEXT_REPLACED = "Cr�me br�l�e" fname_utf8 = '%s/test_utf8%s' % (tmpdir, ext) fname_latin1 = '%s/test_latin1%s' % (tmpdir, ext) with openfunc(fname_utf8, 'wt', encoding='utf-8') as fh: fh.write(TEXT) with openfunc(fname_latin1, 'wt', encoding='latin1') as fh: fh.write(TEXT) # sh.open must always default to utf-8 with sh.open(fname_utf8, compression=compression) as fh: assert fh.read() == TEXT with sh.open(fname_latin1, compression=compression, encoding='latin1') as fh: assert fh.read() == TEXT # sh.open must always default to replace unrecognized characters with ? with sh.open(fname_latin1, compression=compression) as fh: assert fh.read() == TEXT_REPLACED with pytest.raises(UnicodeDecodeError): with sh.open(fname_latin1, errors='strict', compression=compression) as fh: fh.read()
def test_open_encoding(tmpdir, openfunc, ext, compression): TEXT = "Crème brûlée" TEXT_REPLACED = "Cr�me br�l�e" fname_utf8 = f"{tmpdir}/test_utf8{ext}" fname_latin1 = f"{tmpdir}/test_latin1{ext}" with openfunc(fname_utf8, "wt", encoding="utf-8") as fh: fh.write(TEXT) with openfunc(fname_latin1, "wt", encoding="latin1") as fh: fh.write(TEXT) # sh.open must always default to utf-8 with sh.open(fname_utf8, compression=compression) as fh: assert fh.read() == TEXT with sh.open(fname_latin1, compression=compression, encoding="latin1") as fh: assert fh.read() == TEXT # sh.open must always default to replace unrecognized characters with ? with sh.open(fname_latin1, compression=compression) as fh: assert fh.read() == TEXT_REPLACED with pytest.raises(UnicodeDecodeError): with sh.open(fname_latin1, errors="strict", compression=compression) as fh: fh.read()
def test_open_exclusive_failure(tmpdir, openfunc, ext, compression): fname = f"{tmpdir}/test_open{ext}" with open(fname, "w"): pass with pytest.raises(FileExistsError): sh.open(fname, "x", compression=compression)
def test_open_fh_no_compression(compression): buf = io.BytesIO() with pytest.raises(TypeError): sh.open(buf, compression=compression)
def test_open_exclusive_failure(tmpdir, openfunc, ext, compression): fname = '%s/test_open%s' % (tmpdir, ext) with open(fname, 'w'): pass with pytest.raises(FileExistsError): sh.open(fname, 'x', compression=compression)
def test_open_fd_invalid_compression(): r, _ = os.pipe() with pytest.raises(TypeError): sh.open(r, "rb", compression="gzip")
def test_open_invalid_compression(): with pytest.raises(ValueError): sh.open("foo", compression="unk")
def test_open_fd(): r, w = os.pipe() with sh.open(r, "rb", buffering=0) as fh_r: with sh.open(w, "wb", buffering=0) as fh_w: fh_w.write(b"hello world\n") assert fh_r.readline() == b"hello world\n"