def java_to_python_record_batch(self, root): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) c_array = ffi.new("struct ArrowArray*") ptr_array = int(ffi.cast("uintptr_t", c_array)) self.java_c.Data.exportVectorSchemaRoot( self.java_allocator, root, None, self.java_c.ArrowArray.wrap(ptr_array), self.java_c.ArrowSchema.wrap(ptr_schema)) return pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
def java_to_python_array(self, vector, dictionary_provider=None): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) c_array = ffi.new("struct ArrowArray*") ptr_array = int(ffi.cast("uintptr_t", c_array)) self.java_c.Data.exportVector(self.java_allocator, vector, dictionary_provider, self.java_c.ArrowArray.wrap(ptr_array), self.java_c.ArrowSchema.wrap(ptr_schema)) return pa.Array._import_from_c(ptr_array, ptr_schema)
def check_export_import_batch(batch_factory): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) c_array = ffi.new("struct ArrowArray*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() # Schema is known up front batch = batch_factory() schema = batch.schema py_value = batch.to_pydict() batch._export_to_c(ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer del batch batch_new = pa.RecordBatch._import_from_c(ptr_array, schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == schema assert pa.total_allocated_bytes() > old_allocated del batch_new, schema assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: pa.RecordBatch._import_from_c(ptr_array, make_schema()) # Type is exported and imported at the same time batch = batch_factory() py_value = batch.to_pydict() batch._export_to_c(ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del batch batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema) assert batch_new.to_pydict() == py_value assert batch_new.schema == batch_factory().schema assert pa.total_allocated_bytes() > old_allocated del batch_new assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: pa.RecordBatch._import_from_c(ptr_array, ptr_schema) # Not a struct type pa.int32()._export_to_c(ptr_schema) batch_factory()._export_to_c(ptr_array) with pytest.raises(ValueError, match="ArrowSchema describes non-struct type"): pa.RecordBatch._import_from_c(ptr_array, ptr_schema) # Now released with assert_schema_released: pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
def test_export_import_type(): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() typ = pa.list_(pa.int32()) typ._export_to_c(ptr_schema) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer del typ assert pa.total_allocated_bytes() > old_allocated typ_new = pa.DataType._import_from_c(ptr_schema) assert typ_new == pa.list_(pa.int32()) assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: pa.DataType._import_from_c(ptr_schema) # Invalid format string pa.int32()._export_to_c(ptr_schema) bad_format = ffi.new("char[]", b"zzz") c_schema.format = bad_format with pytest.raises(ValueError, match="Invalid or unsupported format string"): pa.DataType._import_from_c(ptr_schema) # Now released with assert_schema_released: pa.DataType._import_from_c(ptr_schema)
def test_imported_batch_reader_error(): c_stream = ffi.new("struct ArrowArrayStream*") ptr_stream = int(ffi.cast("uintptr_t", c_stream)) schema = pa.schema([('foo', pa.int32())]) batches = [ pa.record_batch([[1, 2, 3]], schema=schema), pa.record_batch([[4, 5, 6]], schema=schema) ] buf = make_serialized(schema, batches) # Open a corrupt/incomplete stream and export it reader = pa.ipc.open_stream(buf[:-16]) reader._export_to_c(ptr_stream) del reader reader_new = pa.RecordBatchReader._import_from_c(ptr_stream) batch = reader_new.read_next_batch() assert batch == batches[0] with pytest.raises(OSError, match="Expected to be able to read 16 bytes " "for message body, got 8"): reader_new.read_next_batch() # Again, but call read_all() reader = pa.ipc.open_stream(buf[:-16]) reader._export_to_c(ptr_stream) del reader reader_new = pa.RecordBatchReader._import_from_c(ptr_stream) with pytest.raises(OSError, match="Expected to be able to read 16 bytes " "for message body, got 8"): reader_new.read_all()
def check_export_import_schema(schema_factory): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() schema_factory()._export_to_c(ptr_schema) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer schema_new = pa.Schema._import_from_c(ptr_schema) assert schema_new == schema_factory() assert pa.total_allocated_bytes() == old_allocated del schema_new assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: pa.Schema._import_from_c(ptr_schema) # Not a struct type pa.int32()._export_to_c(ptr_schema) with pytest.raises(ValueError, match="ArrowSchema describes non-struct type"): pa.Schema._import_from_c(ptr_schema) # Now released with assert_schema_released: pa.Schema._import_from_c(ptr_schema)
def test_export_import_array(): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) c_array = ffi.new("struct ArrowArray*") ptr_array = int(ffi.cast("uintptr_t", c_array)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() # Type is known up front typ = pa.list_(pa.int32()) arr = pa.array([[1], [2, 42]], type=typ) py_value = arr.to_pylist() arr._export_to_c(ptr_array) assert pa.total_allocated_bytes() > old_allocated # Delete recreate C++ object from exported pointer del arr arr_new = pa.Array._import_from_c(ptr_array, typ) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated del arr_new, typ assert pa.total_allocated_bytes() == old_allocated # Now released with assert_array_released: pa.Array._import_from_c(ptr_array, pa.list_(pa.int32())) # Type is exported and imported at the same time arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32())) py_value = arr.to_pylist() arr._export_to_c(ptr_array, ptr_schema) # Delete and recreate C++ objects from exported pointers del arr arr_new = pa.Array._import_from_c(ptr_array, ptr_schema) assert arr_new.to_pylist() == py_value assert arr_new.type == pa.list_(pa.int32()) assert pa.total_allocated_bytes() > old_allocated del arr_new assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: pa.Array._import_from_c(ptr_array, ptr_schema)
def _next(data_handle: int) -> int: from pyarrow.cffi import ffi try: batch = next(data_iter) c_schemas.append(ffi.new("struct ArrowSchema*")) c_arrays.append(ffi.new("struct ArrowArray*")) ptr_schema = int(ffi.cast("uintptr_t", c_schemas[-1])) ptr_array = int(ffi.cast("uintptr_t", c_arrays[-1])) # pylint: disable=protected-access batch._export_to_c(ptr_array, ptr_schema) _check_call( _LIB.XGImportArrowRecordBatch( ctypes.c_void_p(data_handle), ctypes.c_void_p(ptr_array), ctypes.c_void_p(ptr_schema), )) return 1 except StopIteration: return 0
def test_export_import_schema_float_pointer(): # Previous versions of the R Arrow library used to pass pointer # values as a double. c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) match = "Passing a pointer value as a float is unsafe" with pytest.warns(UserWarning, match=match): make_schema()._export_to_c(float(ptr_schema)) with pytest.warns(UserWarning, match=match): schema_new = pa.Schema._import_from_c(float(ptr_schema)) assert schema_new == make_schema()
def test_export_import_batch_reader(reader_factory): c_stream = ffi.new("struct ArrowArrayStream*") ptr_stream = int(ffi.cast("uintptr_t", c_stream)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() _export_import_batch_reader(ptr_stream, reader_factory) assert pa.total_allocated_bytes() == old_allocated # Now released with assert_stream_released: pa.RecordBatchReader._import_from_c(ptr_stream)
def test_batch_roundtrip(self): with self.assert_pyarrow_memory_released(): # make sure that Python -> Go -> Python for record # batches works correctly and gets the same data in the end schema = self.make_schema() batch = self.make_batch() schema._export_to_c(self.ptr_schema) batch._export_to_c(self.ptr_array) del schema del batch c_schema = ffi.new("struct ArrowSchema*") c_batch = ffi.new("struct ArrowArray*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) ptr_batch = int(ffi.cast("uintptr_t", c_batch)) cgotest.importThenExportRecord(self.ptr_schema, self.ptr_array, ptr_schema, ptr_batch) batch_new = pa.RecordBatch._import_from_c(ptr_batch, ptr_schema) assert batch_new == self.make_batch() del batch_new del c_schema del c_batch
def test_schema_roundtrip(self): with self.assert_pyarrow_memory_released(): # make sure that Python -> Go -> Python ends up with # the same exact schema schema = self.make_schema() schema._export_to_c(self.ptr_schema) del schema c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) cgotest.importThenExportSchema(self.ptr_schema, ptr_schema) schema_new = pa.Schema._import_from_c(ptr_schema) assert schema_new == self.make_schema() del c_schema
def test_export_import_field(): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) gc.collect() # Make sure no Arrow data dangles in a ref cycle old_allocated = pa.total_allocated_bytes() field = pa.field("test", pa.list_(pa.int32()), nullable=True) field._export_to_c(ptr_schema) assert pa.total_allocated_bytes() > old_allocated # Delete and recreate C++ object from exported pointer del field assert pa.total_allocated_bytes() > old_allocated field_new = pa.Field._import_from_c(ptr_schema) assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True) assert pa.total_allocated_bytes() == old_allocated # Now released with assert_schema_released: pa.Field._import_from_c(ptr_schema)
def java_to_python_field(self, jfield): c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) self.java_c.Data.exportField(self.java_allocator, jfield, None, self.java_c.ArrowSchema.wrap(ptr_schema)) return pa.Field._import_from_c(ptr_schema)
def setUp(self): self.c_schema = ffi.new("struct ArrowSchema*") self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) self.c_array = ffi.new("struct ArrowArray*") self.ptr_array = int(ffi.cast("uintptr_t", self.c_array))