def call(self, inputs, training=None, mask=None): if training is None: training = tf.keras.backend.learning_phase() input_logits, input_targets = inputs input_logits = tf.cast(input_logits, self.compute_dtype) input_logits, row_lengths = convert_inputs_if_ragged(input_logits) input_targets, _ = convert_inputs_if_ragged(input_targets) is_ragged_input = (row_lengths is not None) loss_weights = tf.ones_like(input_targets, dtype=tf.bool) loss_weights = maybe_convert_to_ragged(is_ragged_input, loss_weights, row_lengths) if is_ragged_input: loss_weights = loss_weights.to_tensor(False) if mask is not None: loss_weights = tf.logical_and(loss_weights, mask) loss_weights = tf.cast(loss_weights, self.compute_dtype) probs, loss = control_flow_util.smart_cond( training, lambda: self._train_probs_loss(input_logits, input_targets, loss_weights), lambda: self._eval_probs_loss(input_logits, input_targets, loss_weights) ) self.add_loss(loss, inputs=True) probs = maybe_convert_to_ragged(is_ragged_input, probs, row_lengths) return probs
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training input_shape = Kr.int_shape(inputs) inputs, row_lengths = Kr.convert_inputs_if_ragged(inputs) is_ragged_input = row_lengths is not None self.layer.compute_K(inputs) # batch size matters, use rnn-based implementation def step(x, _): output = self.layer(x, **kwargs) return output, [] _, outputs, _ = Kr.rnn( step, inputs, initial_states=[], input_length=row_lengths[0] if is_ragged_input else input_shape[1], mask=mask, unroll=False) y = Kr.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths) return y
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training input_shape = K.int_shape(inputs) if input_shape[0] and not self._always_use_reshape: inputs, row_lengths = K.convert_inputs_if_ragged(inputs) is_ragged_input = row_lengths is not None # batch size matters, use rnn-based implementation def step(x, _): output = self.layer(x, **kwargs) return output, [] _, outputs, _ = K.rnn( step, inputs, initial_states=[], input_length=row_lengths[0] if is_ragged_input else input_shape[1], mask=mask, unroll=False) y = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths) else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. if isinstance(inputs, ragged_tensor.RaggedTensor): y = self.layer(inputs.values, **kwargs) y = ragged_tensor.RaggedTensor.from_row_lengths( y, inputs.nested_row_lengths()[0]) else: input_length = input_shape[1] if not input_length: input_length = array_ops.shape(inputs)[1] inner_input_shape = self._get_shape_tuple((-1,), inputs, 2) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. inputs = array_ops.reshape(inputs, inner_input_shape) # (num_samples * timesteps, ...) if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) kwargs['mask'] = K.reshape(mask, inner_mask_shape) y = self.layer(inputs, **kwargs) # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() output_shape = self._get_shape_tuple((-1, input_length), y, 1, output_shape[2:]) y = array_ops.reshape(y, output_shape) if not context.executing_eagerly(): # Set the static shape for the result since it might be lost during # array_ops reshape, eg, some `None` dim in the result could be # inferred. y.set_shape(self.compute_output_shape(input_shape)) return y
def call(self, inputs, training=None, mask=None): with tf.device('cpu:0'): if training is None: training = tf.keras.backend.learning_phase() input_logits, input_targets = inputs input_logits = tf.cast(input_logits, self.compute_dtype) input_logits, row_lengths = convert_inputs_if_ragged(input_logits) input_targets, _ = convert_inputs_if_ragged(input_targets) is_ragged_input = (row_lengths is not None) loss_weights = tf.ones_like(input_targets, dtype=tf.bool) loss_weights = maybe_convert_to_ragged(is_ragged_input, loss_weights, row_lengths) if is_ragged_input: loss_weights = loss_weights.to_tensor(False) if mask is not None: loss_weights = tf.logical_and(loss_weights, mask) loss_weights = tf.cast(loss_weights, self.compute_dtype) input_shape = tf.shape(input_logits) output_shape = tf.stack(tf.unstack(input_shape)[:-1] + [self.units]) input_logits = tf.reshape(input_logits, [-1, self.num_channels]) input_targets = tf.reshape(input_targets, [-1]) loss_weights = tf.reshape(loss_weights, [-1]) output_logits = tf.matmul(input_logits, self.kernel, transpose_b=True) output_logits = tf.nn.bias_add(output_logits, self.bias) loss = control_flow_util.smart_cond( training, lambda: self._train_loss(input_logits, input_targets), lambda: self._eval_loss(output_logits, input_targets) ) loss = compute_weighted_loss(loss, sample_weight=loss_weights, reduction=self.loss_reduction) self.add_loss(loss, inputs=True) output_probs = tf.nn.softmax(output_logits) output_probs = tf.reshape(output_probs, output_shape) output_probs = maybe_convert_to_ragged(is_ragged_input, output_probs, row_lengths) return output_probs
def call(self, inputs, **kwargs): layer_kwargs = {} for key in kwargs.keys(): if generic_utils.has_arg(self.layer.call, key): layer_kwargs[key] = kwargs[key] inputs_dense, row_lengths = convert_inputs_if_ragged(inputs) inputs_dense = self.masking_layer(inputs_dense) outputs_dense = self.layer.call(inputs_dense, **layer_kwargs) outputs = maybe_convert_to_ragged(row_lengths is not None, outputs_dense, row_lengths) return outputs
def call(self, inputs, training=None, mask=None): kwargs = {} if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training input_shape = nest.map_structure( lambda x: tensor_shape.TensorShape(backend.int_shape(x)), inputs) batch_size = tf_utils.convert_shapes(input_shape) batch_size = nest.flatten(batch_size)[0] if batch_size and not self._always_use_reshape: inputs, row_lengths = backend.convert_inputs_if_ragged(inputs) is_ragged_input = row_lengths is not None input_length = tf_utils.convert_shapes(input_shape) input_length = nest.flatten(input_length)[1] # batch size matters, use rnn-based implementation def step(x, _): output = self.layer(x, **kwargs) return output, [] _, outputs, _ = backend.rnn(step, inputs, initial_states=[], input_length=row_lengths[0] if is_ragged_input else input_length, mask=mask, unroll=False) # pylint: disable=g-long-lambda y = nest.map_structure( lambda output: backend.maybe_convert_to_ragged( is_ragged_input, output, row_lengths), outputs) else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. is_ragged_input = nest.map_structure( lambda x: isinstance(x, ragged_tensor.RaggedTensor), inputs) is_ragged_input = nest.flatten(is_ragged_input) if all(is_ragged_input): input_values = nest.map_structure(lambda x: x.values, inputs) input_row_lenghts = nest.map_structure( lambda x: x.nested_row_lengths()[0], inputs) y = self.layer(input_values, **kwargs) y = nest.map_structure( ragged_tensor.RaggedTensor.from_row_lengths, y, input_row_lenghts) elif any(is_ragged_input): raise ValueError( 'All inputs has to be either ragged or not, ' 'but not mixed. You passed: {}'.format(inputs)) else: input_length = tf_utils.convert_shapes(input_shape) input_length = nest.flatten(input_length)[1] if not input_length: input_length = nest.map_structure( lambda x: array_ops.shape(x)[1], inputs) input_length = generic_utils.to_list( nest.flatten(input_length))[0] inner_input_shape = nest.map_structure( lambda x: self._get_shape_tuple((-1, ), x, 2), inputs) # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. inputs = nest.map_structure_up_to(inputs, array_ops.reshape, inputs, inner_input_shape) # (num_samples * timesteps, ...) if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None: inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2) kwargs['mask'] = backend.reshape(mask, inner_mask_shape) y = self.layer(inputs, **kwargs) # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape) # pylint: disable=g-long-lambda output_shape = nest.map_structure( lambda tensor, int_shape: self._get_shape_tuple( (-1, input_length), tensor, 1, int_shape[2:]), y, output_shape) y = nest.map_structure_up_to(y, array_ops.reshape, y, output_shape) if not context.executing_eagerly(): # Set the static shape for the result since it might be lost during # array_ops reshape, eg, some `None` dim in the result could be # inferred. nest.map_structure_up_to( y, lambda tensor, shape: tensor.set_shape(shape), y, self.compute_output_shape(input_shape)) return y
def call(self, inputs, mask=None, training=None, initial_state=None): # The input should be dense, padded with zeros. If a ragged input is fed # into the layer, it is padded and the row lengths are used for masking. inputs, row_lengths = K.convert_inputs_if_ragged(inputs) is_ragged_input = (row_lengths is not None) self._validate_args_if_ragged(is_ragged_input, mask) # LSTM does not support constants. Ignore it during process. inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None) if isinstance(mask, list): mask = mask[0] input_shape = K.int_shape(inputs) timesteps = input_shape[0] if self.time_major else input_shape[1] if not self._could_use_gpu_kernel: # Fall back to use the normal LSTM. kwargs = {'training': training} self._maybe_reset_cell_dropout_mask(self.cell) def step(inputs, states): return self.cell(inputs, states, **kwargs) last_output, outputs, states = K.rnn( step, inputs, initial_state, constants=None, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=row_lengths if row_lengths is not None else timesteps, time_major=self.time_major, zero_output_for_mask=self.zero_output_for_mask) runtime = _runtime(_RUNTIME_UNKNOWN) else: # Use the new defun approach for backend implementation swap. # Note that different implementations need to have same function # signature, eg, the tensor parameters need to have same shape and dtypes. # Since the CuDNN has an extra set of bias, those bias will be passed to # both normal and CuDNN implementations. self.reset_dropout_mask() dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) if dropout_mask is not None: inputs = inputs * dropout_mask[0] gpu_lstm_kwargs = { 'inputs': inputs, 'init_h': initial_state[0], 'init_c': initial_state[1], 'kernel': self.cell.kernel, 'recurrent_kernel': self.cell.recurrent_kernel, 'bias': self.cell.bias, 'mask': mask, 'time_major': self.time_major, 'go_backwards': self.go_backwards, 'sequence_lengths': row_lengths } normal_lstm_kwargs = gpu_lstm_kwargs.copy() normal_lstm_kwargs.update({ 'activation': self.activation, 'recurrent_activation': self.recurrent_activation, 'zero_output_for_mask': self.zero_output_for_mask, }) if context.executing_eagerly(): device_type = _get_context_device_type() can_use_gpu = ( # Either user specified GPU or unspecified but GPU is available. (device_type == _GPU_DEVICE_NAME or (device_type is None and context.num_gpus() > 0)) and (mask is None or is_sequence_right_padded(mask, self.time_major))) # Under eager context, check the device placement and prefer the # GPU implementation when GPU is available. if can_use_gpu: last_output, outputs, new_h, new_c, runtime = gpu_lstm( **gpu_lstm_kwargs) else: last_output, outputs, new_h, new_c, runtime = standard_lstm( **normal_lstm_kwargs) else: (last_output, outputs, new_h, new_c, runtime) = lstm_with_backend_selection(**normal_lstm_kwargs) states = [new_h, new_c] if self.stateful: updates = [] for i in range(len(states)): updates.append(state_ops.assign(self.states[i], states[i])) self.add_update(updates) if self.return_sequences: output = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths) else: output = last_output if self.return_state: return [output] + list(states) elif self.return_runtime: return output, runtime else: return output