def test_encode_df(): labels = { 'int': 7, 'str': 'wassup?', } df = pd.read_csv('{}/weather.csv'.format(here)) df['STATION_CAT'] = df['STATION'].astype('category') df['WDF2_F'] = df['WDF2'].astype(np.float) msg = pbutils.df2msg(df, labels) names = [col.name for col in msg.columns] assert set(names) == set(df.columns), 'columns mismatch' assert not msg.indices, 'has index' assert pbutils.pb2py(msg.labels) == labels, 'lables mismatch' # Now with index index_name = 'DATE' df = df.set_index(index_name) msg = pbutils.df2msg(df, None) names = [col.name for col in msg.columns] assert set(names) == set(df.columns), 'columns mismatch' assert msg.indices, 'no index' assert msg.indices[0].name == index_name, 'bad index name'
def test_index_cols(): cols = list('abcdef') size = 10 df = pd.DataFrame({col: np.random.rand(size) for col in cols}) index_cols = np.random.choice(cols, size=2) cols = set(col for col in cols if col not in index_cols) msg = pbutils.df2msg(df, index_cols=index_cols) assert set(col.name for col in msg.columns) == cols, 'bad columns' assert set(col.name for col in msg.indices) == set(index_cols), \ 'bad indices'
def test_multi_index(): tuples = [('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'), ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')] index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) df = pd.DataFrame(index=index) df['x'] = range(len(df)) data = pbutils.df2msg(df).SerializeToString() msg = fpb.Frame.FromString(data) for col in msg.indices: values = col.strings assert len(values) == len(df), 'bad index length'
def _read(self, *args, **kw): io = BytesIO() for df in self.data: data = df2msg(df, None).SerializeToString() io.write(struct.pack(http.header_fmt, len(data))) io.write(data) io.seek(0, 0) class Response: raw = io ok = True return Response
def test_encode_df(): labels = { 'int': 7, 'str': 'wassup?', } df = pd.read_csv('{}/weather.csv'.format(here)) msg = pbutils.df2msg(df, labels) names = [col.name for col in msg.columns] assert set(names) == set(df.columns), 'columns mismatch' assert not msg.indices, 'has index' assert pbutils.pb2py(msg.labels) == labels, 'lables mismatch' # Now with index index_name = 'DATE' df.index = df.pop(index_name) msg = pbutils.df2msg(df, None) names = [col.name for col in msg.columns] assert set(names) == set(df.columns), 'columns mismatch' assert msg.indices, 'no index' assert msg.indices[0].name == index_name, 'bad index name'