def testReadCSV(self): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c'], dtype=np.int64) df.to_csv(file_path) mdf = read_csv(file_path, index_col=0, chunk_bytes=10) self.assertIsInstance(mdf.op, DataFrameReadCSV) self.assertEqual(mdf.shape[1], 3) pd.testing.assert_index_equal(df.columns, mdf.columns_value.to_pandas()) mdf = mdf.tiles() self.assertEqual(len(mdf.chunks), 4) index_keys = set() for chunk in mdf.chunks: index_keys.add(chunk.index_value.key) pd.testing.assert_index_equal(df.columns, chunk.columns_value.to_pandas()) pd.testing.assert_series_equal(df.dtypes, chunk.dtypes) self.assertGreater(len(index_keys), 1) finally: shutil.rmtree(tempdir)
def test_read_csv(): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c'], dtype=np.int64) df.to_csv(file_path) mdf = read_csv(file_path, index_col=0, chunk_bytes=10) assert isinstance(mdf.op, DataFrameReadCSV) assert mdf.shape[1] == 3 pd.testing.assert_index_equal(df.columns, mdf.columns_value.to_pandas()) mdf = tile(mdf) assert len(mdf.chunks) == 4 index_keys = set() for chunk in mdf.chunks: index_keys.add(chunk.index_value.key) pd.testing.assert_index_equal(df.columns, chunk.columns_value.to_pandas()) pd.testing.assert_series_equal(df.dtypes, chunk.dtypes) assert len(index_keys) > 1 finally: shutil.rmtree(tempdir)