def test_dataset(version): num_values = (100, 100) num_files = 5 paths = [random_path() for i in range(num_files)] data = { "col_" + str(i): np.random.randn(num_values[0]) for i in range(num_values[1]) } table = pa.table(data) TEST_FILES.extend(paths) for index, path in enumerate(paths): rows = ( index * (num_values[0] // num_files), (index + 1) * (num_values[0] // num_files), ) write_feather(table[rows[0]:rows[1]], path, version=version) data = FeatherDataset(paths).read_table() assert data.equals(table)
def test_dataset(version): num_values = (100, 100) num_files = 5 paths = [random_path() for i in range(num_files)] df = pd.DataFrame(np.random.randn(*num_values), columns=['col_' + str(i) for i in range(num_values[1])]) TEST_FILES.extend(paths) for index, path in enumerate(paths): rows = (index * (num_values[0] // num_files), (index + 1) * (num_values[0] // num_files)) write_feather(df.iloc[rows[0]:rows[1]], path, version=version) data = FeatherDataset(paths).read_pandas() assert_frame_equal(data, df)
def test_dataset(self): num_values = (100, 100) num_files = 5 paths = [random_path() for i in range(num_files)] df = pd.DataFrame( np.random.randn(*num_values), columns=['col_' + str(i) for i in range(num_values[1])]) self.test_files.extend(paths) for index, path in enumerate(paths): rows = (index * (num_values[0] // num_files), (index + 1) * (num_values[0] // num_files)) writer = FeatherWriter() writer.open(path) for col in range(num_values[1]): writer.write_array(df.columns[col], df.iloc[rows[0]:rows[1], col]) writer.close() data = FeatherDataset(paths).read_pandas() assert_frame_equal(data, df)