예제 #1
0
 def from_func(_):
     a = pa.array([1, 2, 3], type=pa.int32())
     b = pa.array([4, 5, 6], type=pa.int64())
     c = pa.array([7, 8, 9], type=pa.float32())
     t = pa.Table.from_arrays([a, b, c], ["a", "b", "c"])
     foo = IOTensor.from_arrow(t, spec={1: tf.int64})
     return foo(1).to_tensor()
예제 #2
0
    def test_arrow_io_tensor_chunked(self):
        """test_arrow_io_tensor_chunked"""

        num_chunks = 2

        chunk_data = TruthData(
            self.scalar_data + self.list_fixed_data,
            self.scalar_dtypes + self.list_fixed_dtypes,
            self.scalar_shapes + self.list_fixed_shapes,
        )

        # Make a table with double the data for 2 chunks
        table = self.make_table(chunk_data)
        table = pa.concat_tables([table] * num_chunks)

        # Double the batch size of the truth data
        output_shapes = self.scalar_shapes + self.list_fixed_shapes
        output_shapes = [
            tf.TensorShape(
                [d + d if i == 0 else d for i, d in enumerate(shape)])
            for shape in output_shapes
        ]

        truth_data = TruthData(
            [d * num_chunks for d in chunk_data.data],
            self.scalar_dtypes + self.list_fixed_dtypes,
            output_shapes,
        )

        self.assertGreater(table[0].num_chunks, 1)
        iot = IOTensor.from_arrow(table)
        self.run_test_case(iot, truth_data, table.column_names)
예제 #3
0
    def test_arrow_io_tensor_lists(self):
        """test_arrow_io_tensor_lists"""
        truth_data = TruthData(self.list_fixed_data, self.list_fixed_dtypes,
                               self.list_fixed_shapes)

        table = self.make_table(truth_data)
        iot = IOTensor.from_arrow(table)
        self.run_test_case(iot, truth_data, table.column_names)
예제 #4
0
 def from_py_func(filename):
     table_res = ArrowIOResource.from_py_function(
         read_table, [filename])
     tio = IOTensor.from_arrow(table_res, spec=spec)
     return tio(column).to_tensor()
예제 #5
0
 def from_file(_):
     reader = pa.RecordBatchFileReader(f.name)
     t = reader.read_all()
     tio = IOTensor.from_arrow(t, spec=spec)
     return tio(column).to_tensor()