def test_train_test_split(self): sdf = dummy_streaming_dataframe(100) tr, te = sdf.train_test_split(index=False, streaming=False) trsdf = StreamingDataFrame.read_str(tr) tesdf = StreamingDataFrame.read_str(te) trdf = trsdf.to_dataframe() tedf = tesdf.to_dataframe() df_exp = sdf.to_dataframe() df_val = pandas.concat([trdf, tedf]) self.assertEqual(df_exp.shape, df_val.shape) df_val = df_val.sort_values("cint").reset_index(drop=True) self.assertEqualDataFrame(df_val, df_exp)
def test_train_test_split(self): sdf = dummy_streaming_dataframe(100) tr, te = sdf.train_test_split(index=False, streaming=False) self.assertRaise( lambda: StreamingDataFrame.read_str(tr, chunksize=None), ValueError) self.assertRaise( lambda: StreamingDataFrame.read_str(tr, iterator=False), ValueError) StreamingDataFrame.read_str(tr.encode('utf-8')) trsdf = StreamingDataFrame.read_str(tr) tesdf = StreamingDataFrame.read_str(te) trdf = trsdf.to_dataframe() tedf = tesdf.to_dataframe() df_exp = sdf.to_dataframe() df_val = pandas.concat([trdf, tedf]) self.assertEqual(df_exp.shape, df_val.shape) df_val = df_val.sort_values("cint").reset_index(drop=True) self.assertEqualDataFrame(df_val, df_exp)