def call(self, inputs): if not self.max_tokens and not self._vocab_size: raise ValueError("You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32: inputs = math_ops.cast(inputs, dtypes.int64) lookup_result = self._table_handler.lookup(inputs) if self.output_mode == INT: return lookup_result binary_output = (self.output_mode == BINARY) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount(lookup_result, out_depth, binary_output) else: bincounts = category_encoding.dense_bincount(lookup_result, out_depth, binary_output) if self.output_mode == TFIDF: return math_ops.multiply(bincounts, self.tf_idf_weights) return bincounts
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if not self.max_tokens and self._vocab_size is None: raise ValueError("You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32: inputs = math_ops.cast(inputs, dtypes.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs oov_indices = array_ops.where_v2(math_ops.equal(lookup_values, -1)) oov_inputs = array_ops.gather_nd(input_values, oov_indices) msg = string_ops.string_format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs,)) assertion = control_flow_ops.Assert( math_ops.equal(array_ops.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with ops.control_dependencies(lookup_checks): if self.output_mode == INT: return array_ops.identity(lookup_result) multi_hot_output = (self.output_mode == MULTI_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount(lookup_result, out_depth, multi_hot_output) else: bincounts = category_encoding.dense_bincount(lookup_result, out_depth, multi_hot_output) if self.output_mode == TF_IDF: return math_ops.multiply(bincounts, self.tf_idf_weights) return bincounts
def call(self, inputs): if not self.max_tokens: raise ValueError("You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32: inputs = math_ops.cast(inputs, dtypes.int64) lookup_result = self._table_handler.lookup(inputs) if self.output_mode == INT: return lookup_result binary_output = (self.output_mode == BINARY) if self.sparse: return category_encoding.sparse_bincount( lookup_result, self.max_tokens, binary_output) else: return category_encoding.dense_bincount( lookup_result, self.max_tokens, binary_output)
def _encode_output(self, lookup_result): def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return sparse_ops.sparse_expand_dims(inputs, axis) else: return array_ops.expand_dims(inputs, axis) original_shape = lookup_result.shape # In all cases, we should uprank scalar input to a single sample. if lookup_result.shape.rank == 0: lookup_result = expand_dims(lookup_result, -1) # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if lookup_result.shape[-1] != 1: lookup_result = expand_dims(lookup_result, -1) # TODO(b/190445202): remove output rank restriction. if lookup_result.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported for " "`output_mode={}`.".format(original_shape, lookup_result.shape.rank, self.output_mode)) binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount( lookup_result, out_depth, binary_output) else: bincounts = category_encoding.dense_bincount( lookup_result, out_depth, binary_output) if self.output_mode == TF_IDF: return math_ops.multiply(bincounts, self.tf_idf_weights) return bincounts