def test_aggregate_count(self, use_numba): if use_numba and not dataiter.USE_NUMBA: pytest.skip("No Numba") with patch("dataiter.USE_NUMBA", use_numba): data = DataFrame(g=GROUPS) stat = data.group_by("g").aggregate(n=count()) assert (stat.n == 2).all()
def test_split(self): data = DataFrame( x=[1, 2, 2, 3, 3, 3], y=[1, 1, 1, 1, 1, 2], ) rows = data.split("x", "y") rows = [x.tolist() for x in rows] assert rows == [[0], [1, 2], [3, 4], [5]]
def test_cbind_broadcast(self): orig = test.data_frame("vehicles.csv") data = orig.cbind(DataFrame(test=1)) assert data.nrow == orig.nrow assert data.ncol == orig.ncol + 1 assert np.all(data.test == 1) assert data.unselect("test") == orig
def read_csv(path, *, encoding="utf-8", sep=",", header=True, columns=[], dtypes={}): return DataFrame.read_csv(path, encoding=encoding, sep=sep, header=header, columns=columns, dtypes=dtypes)
def test_aggregate(self, function, input, output, use_numba): if use_numba and not dataiter.USE_NUMBA: pytest.skip("No Numba") with patch("dataiter.USE_NUMBA", use_numba): data = DataFrame(g=GROUPS, a=input) stat = data.group_by("g").aggregate(a=function("a")) expected = Vector(output) try: assert stat.a.equal(expected) except AssertionError: print("") print(data) print("Expected:") print(expected) print("Got:") print(stat.a) raise
def test_from_pandas(self): import pandas as pd orig = test.data_frame("vehicles.csv") data = orig.to_pandas() assert isinstance(data, pd.DataFrame) assert data.shape[0] == orig.nrow assert data.shape[1] == orig.ncol data = DataFrame.from_pandas(data) assert data == orig
def to_data_frame(self): """ Return list converted to a :class:`.DataFrame`. >>> data = di.read_json("data/listings.json") >>> data.to_data_frame() """ from dataiter import DataFrame return DataFrame(**self._to_columns())
def test___init___given_data_frame_column(self): data = DataFrame(a=DataFrameColumn([1, 2, 3])) assert data.a.tolist() == [1, 2, 3]
def test___init___empty(self): data = DataFrame() assert data.nrow == 0 assert data.ncol == 0 assert not data.columns assert not data.colnames
def test___init___broadcast(self): data = DataFrame(a=[1, 2, 3], b=[1], c=1) assert data.a.tolist() == [1, 2, 3] assert data.b.tolist() == [1, 1, 1] assert data.c.tolist() == [1, 1, 1]
def test_write_pickle(self): orig = test.data_frame("vehicles.csv") handle, path = tempfile.mkstemp(".pkl") orig.write_pickle(path) data = DataFrame.read_pickle(path) assert data == orig
def test_read_json(self): path = str(test.get_data_path("downloads.json")) data = DataFrame.read_json(path) assert data.nrow == 905 assert data.ncol == 3
def read_npz(path, *, allow_pickle=True): return DataFrame.read_npz(path, allow_pickle=allow_pickle)
def test_read_pickle_path(self): orig = test.data_frame("vehicles.csv") handle, path = tempfile.mkstemp(".pkl") orig.write_pickle(path) DataFrame.read_pickle(Path(path))
def test_read_npz_path(self): orig = test.data_frame("vehicles.csv") handle, path = tempfile.mkstemp(".npz") orig.write_npz(path) DataFrame.read_npz(Path(path))
def test_read_npz(self): orig = test.data_frame("vehicles.csv") handle, path = tempfile.mkstemp(".npz") orig.write_npz(path) data = DataFrame.read_npz(path) assert data == orig
def test_read_json_path(self): DataFrame.read_json(test.get_data_path("vehicles.json"))
def test_read_json_dtypes(self): path = str(test.get_data_path("vehicles.json")) dtypes = {"make": object, "model": object} data = DataFrame.read_json(path, dtypes=dtypes) assert data.make.is_object() assert data.model.is_object()
def test_read_json_columns(self): path = str(test.get_data_path("vehicles.json")) data = DataFrame.read_json(path, columns=["make", "model"]) assert data.colnames == ["make", "model"]
def test___init___given_list(self): data = DataFrame(a=[1, 2, 3]) assert data.a.tolist() == [1, 2, 3]
def test___delattr__(self): data = DataFrame(a=DataFrameColumn([1, 2, 3])) assert "a" in data del data.a assert "a" not in data
def test_read_csv(self): path = str(test.get_data_path("vehicles.csv")) data = DataFrame.read_csv(path) assert data.nrow == 33442 assert data.ncol == 12
def test_modify_group_wise(self): data = DataFrame(g=[1, 2, 2, 3, 3, 3]) data = data.group_by("g").modify(f=lambda x: 1 / x.nrow) assert data.f.tolist() == [1, 1 / 2, 1 / 2, 1 / 3, 1 / 3, 1 / 3]
def test_write_json(self): orig = test.data_frame("downloads.json") handle, path = tempfile.mkstemp(".json") orig.write_json(path) data = DataFrame.read_json(path) assert data == orig
def test_to_json(self): orig = test.data_frame("downloads.json") text = orig.to_json() data = DataFrame.from_json(text) assert data == orig
def test_read_csv_path(self): DataFrame.read_csv(test.get_data_path("vehicles.csv"))
def test_nrow(self): data = DataFrame(x=range(10)) assert nrow(data) == 10
def to_string(self, *, max_rows=None, max_width=None): geometry = [f"<{x['type']}>" for x in self.geometry] data = self.modify(geometry=Vector.fast(geometry, object)) return DataFrame.to_string(data, max_rows, max_width)