def get_constants(self, inputs, training=None): constants = [] if self.implementation != 0 and 0 < self.dropout < 1: input_shape = K.int_shape(inputs) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
def call(self, inputs, training=None): input_shape = inputs.get_shape().as_list() # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) normed, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if training in {0, False}: return normed else: self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) def normalize_inference(): if needs_broadcasting: # In this case we must explictly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape( self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization(inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed, normalize_inference, training=training)
def call(self, inputs, training=None): def noised(): return inputs + K.random_normal( shape=K.shape(inputs), mean=0., stddev=self.stddev) return K.in_train_phase(noised, inputs, training=training)
def call(self, inputs, training=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(inputs) def dropped_inputs(): return K.dropout(inputs, self.rate, noise_shape, seed=self.seed) return K.in_train_phase(dropped_inputs, inputs, training=training) return inputs
def call(self, inputs, training=None): if 0 < self.rate < 1: def noised(): stddev = np.sqrt(self.rate / (1.0 - self.rate)) return inputs * K.random_normal( shape=K.shape(inputs), mean=1.0, stddev=stddev) return K.in_train_phase(noised, inputs, training=training) return inputs
def call(self, inputs, training=None): input_shape = inputs.get_shape().as_list() # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) normed, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if training in {0, False}: return normed else: self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) def normalize_inference(): if needs_broadcasting: # In this case we must explictly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed, normalize_inference, training=training)
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. Arguments: x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. Returns: Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def get_constants(self, inputs, training=None): constants = [] if self.implementation == 0 and 0 < self.dropout < 1: ones = K.zeros_like(inputs) ones = K.sum(ones, axis=1) ones += 1 def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] constants.append(dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(4)]) if 0 < self.recurrent_dropout < 1: shape = list(self.kernel_shape) shape[-1] = self.filters ones = K.zeros_like(inputs) ones = K.sum(ones, axis=1) ones = self.input_conv(ones, K.zeros(shape), padding=self.padding) ones += 1. def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(4)]) return constants
def call(self, inputs, training=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(inputs) alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b return K.in_train_phase(dropped_inputs, inputs, training=training) return inputs
def call(self, inputs, training=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(inputs) alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b return K.in_train_phase(dropped_inputs, inputs, training=training) return inputs