def from_func(_): a = pa.array([1, 2, 3], type=pa.int32()) b = pa.array([4, 5, 6], type=pa.int64()) c = pa.array([7, 8, 9], type=pa.float32()) t = pa.Table.from_arrays([a, b, c], ["a", "b", "c"]) foo = IOTensor.from_arrow(t, spec={1: tf.int64}) return foo(1).to_tensor()
def test_arrow_io_tensor_chunked(self): """test_arrow_io_tensor_chunked""" num_chunks = 2 chunk_data = TruthData( self.scalar_data + self.list_fixed_data, self.scalar_dtypes + self.list_fixed_dtypes, self.scalar_shapes + self.list_fixed_shapes, ) # Make a table with double the data for 2 chunks table = self.make_table(chunk_data) table = pa.concat_tables([table] * num_chunks) # Double the batch size of the truth data output_shapes = self.scalar_shapes + self.list_fixed_shapes output_shapes = [ tf.TensorShape( [d + d if i == 0 else d for i, d in enumerate(shape)]) for shape in output_shapes ] truth_data = TruthData( [d * num_chunks for d in chunk_data.data], self.scalar_dtypes + self.list_fixed_dtypes, output_shapes, ) self.assertGreater(table[0].num_chunks, 1) iot = IOTensor.from_arrow(table) self.run_test_case(iot, truth_data, table.column_names)
def test_arrow_io_tensor_lists(self): """test_arrow_io_tensor_lists""" truth_data = TruthData(self.list_fixed_data, self.list_fixed_dtypes, self.list_fixed_shapes) table = self.make_table(truth_data) iot = IOTensor.from_arrow(table) self.run_test_case(iot, truth_data, table.column_names)
def from_py_func(filename): table_res = ArrowIOResource.from_py_function( read_table, [filename]) tio = IOTensor.from_arrow(table_res, spec=spec) return tio(column).to_tensor()
def from_file(_): reader = pa.RecordBatchFileReader(f.name) t = reader.read_all() tio = IOTensor.from_arrow(t, spec=spec) return tio(column).to_tensor()