def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]: # Transpose the indices array (and materialize the result in C-order) # because later we will use individual columns of the original indices. indices_np = ( np.ascontiguousarray( np.transpose(np.asarray(tensor.indices)), dtype=np.int64)) # the first column of indices identifies which row each sparse value belongs # to. parent_indices = pa.array(indices_np[0, :], type=pa.int64()) num_rows = int(np.asarray(tensor.dense_shape)[0]) result = [ array_util.MakeListArrayFromParentIndicesAndValues( num_rows, parent_indices, pa.array(np.asarray(tensor.values), type=self._values_arrow_type), empty_list_as_null=False) ] for i in range(len(self._index_column_names)): result.append( array_util.MakeListArrayFromParentIndicesAndValues( num_rows, parent_indices, pa.array(indices_np[i + 1, :], type=pa.int64()), empty_list_as_null=False)) return result
def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]: # Algorithm: # Assume: # - the COO indices are sorted (partially checked below) # - the SparseTensor is 2-D (checked in can_handle()) # - the SparseTensor is ragged # Then the first dim of those COO indices contains "parent indices": # parent_index[i] == j means i-th value belong to j-th sub list. # Then we have a C++ util to convert parent indices + values to a ListArray. # # Note that the resulting ListArray doesn't explicitly store the second # dense dimension. When it is converted back to SparseTensor with # tensor_adapter the second dense dimension is recovered as an upper bound # for second indices + 1. Therefore, if SparseTensor's second dense # dimension is not tight, then the composition # TensorAdapter(TensorsToRecordBatchConverter()) is not an identity. dense_shape = np.asarray(tensor.dense_shape) indices = np.asarray(tensor.indices) parent_indices = indices[:, 0] assert np.min(np.diff(parent_indices), initial=0) >= 0, ( "The sparse indices must be sorted") return [ array_util.MakeListArrayFromParentIndicesAndValues( dense_shape[0], pa.array(parent_indices, type=pa.int64()), pa.array(np.asarray(tensor.values), type=self._values_arrow_type), empty_list_as_null=False) ]
def testMakeListArray(self, num_parents, parent_indices, values, empty_list_as_null, expected): actual = array_util.MakeListArrayFromParentIndicesAndValues( num_parents, parent_indices, values, empty_list_as_null) actual.validate() if not empty_list_as_null: self.assertEqual(actual.null_count, 0) self.assertTrue(actual.equals(expected), "actual: {}, expected: {}".format(actual, expected))
def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]: # Algorithm: # Assume: # - the COO indices are sorted (partially checked below) # - the SparseTensor is 2-D (checked in can_handle()) # - the SparseTensor is ragged (partially checked below) # Then the first dim of those COO indices contains "parent indices": # parent_index[i] == j means i-th value belong to j-th sub list. # Then we have a C++ util to convert parent indices + values to a ListArray. dense_shape = np.asarray(tensor.dense_shape) indices = np.asarray(tensor.indices) assert indices.size == 0 or dense_shape[1] == np.max(indices, 0)[1] + 1, ( "SparseTensor is not 2-D ragged") parent_indices = indices[:, 0] assert np.min(np.diff(parent_indices), initial=0) >= 0, ( "The sparse indices must be sorted") return [ array_util.MakeListArrayFromParentIndicesAndValues( dense_shape[0], pa.array(parent_indices, type=pa.int64()), pa.array(np.asarray(tensor.values), type=self._values_arrow_type), empty_list_as_null=False) ]
def testMakeListArray(self, num_parents, parent_indices, values, expected): actual = array_util.MakeListArrayFromParentIndicesAndValues( num_parents, parent_indices, values) self.assertTrue( actual.equals(expected), "actual: {}, expected: {}".format(actual, expected))
def testInvalidInput(self, num_parents, parent_indices, values, expected_error, expected_error_regexp): with self.assertRaisesRegex(expected_error, expected_error_regexp): array_util.MakeListArrayFromParentIndicesAndValues( num_parents, parent_indices, values)