def test_file_reader_writer(): data = [ pa.array([1, 2, 3, 4]), pa.array(['foo', 'bar', 'baz', None]), pa.array([True, None, False, True]) ] batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2']) sink = pa.BufferOutputStream() with pytest.warns(FutureWarning): stream_writer = pa.StreamWriter(sink, batch.schema) assert isinstance(stream_writer, pa.RecordBatchStreamWriter) sink2 = pa.BufferOutputStream() with pytest.warns(FutureWarning): file_writer = pa.FileWriter(sink2, batch.schema) assert isinstance(file_writer, pa.RecordBatchFileWriter) file_writer.write_batch(batch) stream_writer.write_batch(batch) file_writer.close() stream_writer.close() buf = sink.get_result() buf2 = sink2.get_result() with pytest.warns(FutureWarning): stream_reader = pa.StreamReader(buf) assert isinstance(stream_reader, pa.RecordBatchStreamReader) with pytest.warns(FutureWarning): file_reader = pa.FileReader(buf2) assert isinstance(file_reader, pa.RecordBatchFileReader)
def test_read_all(self): batches = self.write_batches() file_contents = self._get_source() reader = pa.FileReader(file_contents) result = reader.read_all() expected = pa.Table.from_batches(batches) assert result.equals(expected)
def test_simple_roundtrip(self): batches = self.write_batches() file_contents = self._get_source() reader = pa.FileReader(file_contents) assert reader.num_record_batches == len(batches) for i, batch in enumerate(batches): # it works. Must convert back to DataFrame batch = reader.get_batch(i) assert batches[i].equals(batch)
def read_file(source): reader = pa.FileReader(source) return [reader.get_batch(i) for i in range(reader.num_record_batches)]