def get_constants(self, inputs, training=None): constants = [] if self.implementation != 0 and 0 < self.dropout < 1: input_shape = K.int_shape(inputs) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
def call(self, inputs): if not isinstance(inputs, list): raise ValueError('A merge layer should be called ' 'on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(K.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): x = K.expand_dims(x, 1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: x_shape = K.shape(x) batch_size = x_shape[0] new_shape = K.concatenate([x_shape[1:], K.expand_dims(batch_size)]) x_transposed = K.reshape(x, K.stack([batch_size, K.prod(x_shape[1:])])) x_transposed = K.permute_dimensions(x_transposed, (1, 0)) x_transposed = K.reshape(x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append(K.permute_dimensions(x, dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = K.ndim(y) if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: y_shape = K.shape(y) y_ndim = K.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = K.concatenate( [K.expand_dims(batch_size), y_shape[:y_ndim - 1]]) y = K.reshape(y, (-1, batch_size)) y = K.permute_dimensions(y, (1, 0)) y = K.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = K.permute_dimensions(y, dims) return y else: return self._merge_function(inputs)
def _linear(self, x, kernel, bias): in_shape = x.shape if len(in_shape) > 2: x_shape = [int(dim) for dim in x.shape] x = K.reshape(x, (x_shape[0] * x_shape[1], x_shape[2])) x = K.dot(x, kernel) x = K.bias_add(x, bias) if len(in_shape) > 2: x = K.reshape(x, (x_shape[0], x_shape[1], int(kernel.shape[1]))) return x
def _layer_norm(self, x, offset, scale): in_shape = x.shape if len(in_shape) > 2: x_shape = [int(dim) for dim in x.shape] x = K.reshape(x, (x_shape[0] * x_shape[1], x_shape[2])) mean, var = tf.nn.moments(x, [1], keep_dims=True) x = tf.nn.batch_normalization(x, mean, var, offset, scale, K.epsilon()) if len(in_shape) > 2: x = K.reshape(x, x_shape) return x
def call(self, inputs, training=None, mask=None): kwargs = {} if has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False # pylint: disable=redefined-outer-name input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, use rnn-based implementation def step(x, _): global uses_learning_phase # pylint: disable=global-variable-undefined output = self.layer.call(x, **kwargs) if hasattr(output, '_uses_learning_phase'): uses_learning_phase = (output._uses_learning_phase or uses_learning_phase) return output, [] _, outputs, _ = K.rnn( step, inputs, initial_states=[], unroll=False) y = outputs else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. input_uid = tf_layers_util.object_list_uid(inputs) inputs = K.reshape(inputs, (-1,) + input_shape[2:]) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:])) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None): regularization_loss = self.layer.activity_regularizer(y) self.add_loss(regularization_loss, inputs) if uses_learning_phase: y._uses_learning_phase = True return y
def call(self, inputs, training=None, mask=None): kwargs = {} if has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False # pylint: disable=redefined-outer-name input_shape = K.int_shape(inputs) if input_shape[0]: # batch size matters, use rnn-based implementation def step(x, _): global uses_learning_phase # pylint: disable=global-variable-undefined output = self.layer.call(x, **kwargs) if hasattr(output, '_uses_learning_phase'): uses_learning_phase = (output._uses_learning_phase or uses_learning_phase) return output, [] _, outputs, _ = K.rnn(step, inputs, initial_states=[], unroll=False) y = outputs else: # No batch size specified, therefore the layer will be able # to process batches of any size. # We can go with reshape-based implementation for performance. input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. input_uid = tf_layers_util.object_list_uid(inputs) inputs = K.reshape(inputs, (-1, ) + input_shape[2:]) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): uses_learning_phase = y._uses_learning_phase # Shape: (num_samples, timesteps, ...) output_shape = self.compute_output_shape(input_shape).as_list() y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:])) # Apply activity regularizer if any: if (hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None): regularization_loss = self.layer.activity_regularizer(y) self.add_loss(regularization_loss, inputs) if uses_learning_phase: y._uses_learning_phase = True return y
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. Arguments: x: input tensor. w: weight matrix. b: optional bias vector. dropout: whether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. Returns: Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, inputs): # In case the target shape is not fully defined, # we need access to the shape of x. target_shape = self.target_shape if -1 in target_shape: # target shape not fully defined target_shape = self._compute_output_shape(inputs.get_shape()) target_shape = target_shape.as_list()[1:] return K.reshape(inputs, (-1,) + tuple(target_shape))
def _multihead_attention(self, memory): """Perform multi-head attention from 'Attention is All You Need'. Implementation of the attention mechanism from https://arxiv.org/abs/1706.03762. Args: memory: Memory tensor to perform attention on. Returns: new_memory: New memory tensor. """ batch_size = int(memory.shape[0]) qkv = self._linear(memory, self.kernel_qkv, self.bias_qkv) # qkv = self._layer_norm(qkv, self.offset_qkv, self.scale_qkv) mem_slots = memory.get_shape().as_list()[1] # Denoted as N. # [B, N, F] -> [B, N, H, F/H] qkv_reshape = K.reshape( qkv, (batch_size, mem_slots, self.num_heads, self.qkv_size)) # [B, N, H, F/H] -> [B, H, N, F/H] qkv_transpose = K.permute_dimensions(qkv_reshape, [0, 2, 1, 3]) q, k, v = tf.split(qkv_transpose, [self.key_size, self.key_size, self.value_size], -1) q *= self.qkv_size**-0.5 dot_product = tf.matmul(q, k, transpose_b=True) # [B, H, N, N] weights = K.softmax(dot_product) output = tf.matmul(weights, v) # [B, H, N, V] # [B, H, N, V] -> [B, N, H, V] output_transpose = K.permute_dimensions(output, [0, 2, 1, 3]) # [B, N, H, V] -> [B, N, H * V] new_memory = K.reshape(output_transpose, (batch_size, mem_slots, self.mem_size)) return new_memory
def compute_spectral_normal(self, training=True): # Spectrally Normalized Weight if self.spectral_normalization: # Get kernel tensor shape W_shape = self.kernel.shape.as_list() # Flatten the Tensor W_mat = K.reshape(self.kernel, [W_shape[-1], -1]) # [out_c, N] sigma, u, v = power_iteration(W_mat, self.u) if training: # Update estimated 1st singular vector self.u.assign(u) return self.kernel / sigma else: return self.kernel
def call(self, inputs, memory, training=None): batch_size = int(inputs.shape[0]) memory = K.reshape(memory, (batch_size, self.mem_slots, self.mem_size)) inputs = self._linear(inputs, self.kernel_in, self.bias_in) inputs_reshape = K.expand_dims(inputs, axis=1) memory_plus_input = K.concatenate([memory, inputs_reshape], axis=1) next_memory = self._attend_over_memory(memory_plus_input) n = inputs_reshape.get_shape().as_list()[1] next_memory = next_memory[:, :-n, :] input_gate, forget_gate = self._create_gates(inputs_reshape, memory) next_memory = input_gate * K.tanh(next_memory) next_memory += forget_gate * memory next_memory = K.batch_flatten(next_memory) return next_memory, (next_memory, )
def call(self, inputs): if not isinstance(inputs, list): raise ValueError('A merge layer should be called ' 'on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(K.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): x = K.expand_dims(x, 1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: x_shape = K.shape(x) batch_size = x_shape[0] new_shape = K.concatenate( [x_shape[1:], K.expand_dims(batch_size)]) x_transposed = K.reshape( x, K.stack([batch_size, K.prod(x_shape[1:])])) x_transposed = K.permute_dimensions( x_transposed, (1, 0)) x_transposed = K.reshape(x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append(K.permute_dimensions(x, dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = K.ndim(y) if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: y_shape = K.shape(y) y_ndim = K.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = K.concatenate( [K.expand_dims(batch_size), y_shape[:y_ndim - 1]]) y = K.reshape(y, (-1, batch_size)) y = K.permute_dimensions(y, (1, 0)) y = K.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = K.permute_dimensions(y, dims) return y else: return self._merge_function(inputs)
def call(self, inputs): return K.reshape(inputs, (K.shape(inputs)[0], ) + self.target_shape)
def call(self, inputs): return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape)