def test_filesystem_uri(tmpdir): from pyarrow import orc table = pa.table({"a": [1, 2, 3]}) directory = tmpdir / "data_dir" directory.mkdir() path = directory / "data.orc" orc.write_table(table, str(path)) # filesystem object result = orc.read_table(path, filesystem=fs.LocalFileSystem()) assert result.equals(table) # filesystem URI result = orc.read_table("data_dir/data.orc", filesystem=util._filesystem_uri(tmpdir)) assert result.equals(table)
def test_readwrite(tmpdir): from pyarrow import orc a = pa.array([1, None, 3, None]) b = pa.array([None, "Arrow", None, "ORC"]) table = pa.table({"int64": a, "utf8": b}) file = tmpdir.join("test.orc") orc.write_table(table, file) output_table = orc.read_table(file) assert table.equals(output_table) output_table = orc.read_table(file, []) assert 4 == output_table.num_rows assert 0 == output_table.num_columns output_table = orc.read_table(file, columns=["int64"]) assert 4 == output_table.num_rows assert 1 == output_table.num_columns