def test_json_compressed(compression): if compression == 'xz' and lzma is None: pytest.skip( "LZMA not available. Please install backports.lzma on Python 2.") with tmpdir() as path: dd.to_json(ddf, path, compression=compression) actual = dd.read_json(os.path.join(path, '*'), compression=compression) assert_eq(df, actual.compute(), check_index=False)
def test_json_compressed(compression): if compression == 'xz' and lzma is None: pytest.skip( "LZMA not available. Please install backports.lzma on Python 2." ) with tmpdir() as path: dd.to_json(ddf, path, compression=compression) actual = dd.read_json(os.path.join(path, '*'), compression=compression) assert_eq(df, actual.compute(), check_index=False)
def persist(self, output): assert output is not None if type(output) == df.core.DataFrame: if self.ext == "csv": df.to_csv(output, self.loc, index=False, encoding="utf-8") elif self.wxt == "json": df.to_json(output, self.loc, encoding="utf-8") else: raise Exception(self.ext + " not supported") else: if type(output) != db.core.Bag: logging.getLogger("system").warn("WARNING: converting to bag") assert isinstance(output, collections.Iterable) output = db.from_sequence(output, npartitions=self.npartitions) output.map(json.dumps).to_textfiles(self.loc)
def test_read_json_inferred_compression(): with tmpdir() as path: fn = os.path.join(path, "*.json.gz") dd.to_json(ddf, fn, compression="gzip") actual = dd.read_json(fn) assert_eq(df, actual.compute(), check_index=False)
def test_json_compressed(compression): with tmpdir() as path: dd.to_json(ddf, path, compression=compression) actual = dd.read_json(os.path.join(path, "*"), compression=compression) assert_eq(df, actual.compute(), check_index=False)
def test_read_json_inferred_compression(): with tmpdir() as path: fn = os.path.join(path, '*.json.gz') dd.to_json(ddf, fn, compression='gzip') actual = dd.read_json(fn) assert_eq(df, actual.compute(), check_index=False)
def test_write_orient_not_records_and_lines(): with tmpfile("json") as f: with pytest.raises(ValueError, match="Line-delimited JSON"): dd.to_json(ddf, f, orient="split", lines=True)