Exemple #1
0
    def call(self, inputs, training=None):
        assert self.built, 'Layer must be built before being called'
        input_shape = K.int_shape(inputs)

        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        mean_batch, var_batch = K.moments(inputs,
                                          reduction_axes,
                                          shift=None,
                                          keep_dims=False)
        std_batch = (K.sqrt(var_batch + self.epsilon))

        r_max_value = K.get_value(self.r_max)
        r = std_batch / (K.sqrt(self.running_variance + self.epsilon))
        r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

        d_max_value = K.get_value(self.d_max)
        d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance +
                                                      self.epsilon)
        d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

        if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]:
            x_normed_batch = (inputs - mean_batch) / std_batch
            x_normed = (x_normed_batch * r + d) * self.gamma + self.beta
        else:
            # need broadcasting
            broadcast_mean = K.reshape(mean_batch, broadcast_shape)
            broadcast_std = K.reshape(std_batch, broadcast_shape)
            broadcast_r = K.reshape(r, broadcast_shape)
            broadcast_d = K.reshape(d, broadcast_shape)
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)

            x_normed_batch = (inputs - broadcast_mean) / broadcast_std
            x_normed = (x_normed_batch * broadcast_r +
                        broadcast_d) * broadcast_gamma + broadcast_beta

        # explicit update to moving mean and standard deviation
        self.add_update([
            K.moving_average_update(self.running_mean, mean_batch,
                                    self.momentum),
            K.moving_average_update(self.running_variance, std_batch**2,
                                    self.momentum)
        ], inputs)

        # update r_max and d_max
        t_val = K.get_value(self.t)
        r_val = self.r_max_value / (1 +
                                    (self.r_max_value - 1) * np.exp(-t_val))
        d_val = self.d_max_value / (1 + (
            (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
        t_val += float(self.t_delta)

        self.add_update([
            K.update(self.r_max, r_val),
            K.update(self.d_max, d_val),
            K.update(self.t, t_val)
        ], inputs)

        if training in {0, False}:
            return x_normed
        else:

            def normalize_inference():
                if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]:
                    x_normed_running = K.batch_normalization(
                        inputs,
                        self.running_mean,
                        self.running_variance,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)

                    return x_normed_running
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_variance,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        inputs,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                    return x_normed_running

            # pick the normalized form of inputs corresponding to the training phase
            # for batch renormalization, inference time remains same as batchnorm
            x_normed = K.in_train_phase(x_normed,
                                        normalize_inference,
                                        training=training)

            return x_normed
Exemple #2
0
    def call(self, inputs, training=None):
        def augmented():
            return tf.image.rgb_to_grayscale(inputs)

        return K.in_train_phase(augmented, augmented, training=training)
    def call(self, x, mask=None):
        if self.mode == 0 or self.mode == 2:
            assert self.built, 'Layer must be built before being called'
            input_shape = K.int_shape(x)

            reduction_axes = list(range(len(input_shape)))
            del reduction_axes[self.axis]
            broadcast_shape = [1] * len(input_shape)
            broadcast_shape[self.axis] = input_shape[self.axis]

            # mean_batch, var_batch = K.moments(x, reduction_axes, shift=None, keep_dims=False)
            normed, mean_batch, var_batch = K.normalize_batch_in_training(
                x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon)

            std_batch = (K.sqrt(var_batch + self.epsilon))

            r_max_value = K.get_value(self.r_max)
            r = std_batch / (K.sqrt(self.running_std + self.epsilon))
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (mean_batch - self.running_mean) / K.sqrt(self.running_std +
                                                          self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                x_normed_batch = (x - mean_batch) / std_batch
                x_normed = (x_normed_batch * r + d) * self.gamma + self.beta
            else:
                # need broadcasting
                broadcast_mean = K.reshape(mean_batch, broadcast_shape)
                broadcast_std = K.reshape(std_batch, broadcast_shape)
                broadcast_r = K.reshape(r, broadcast_shape)
                broadcast_d = K.reshape(d, broadcast_shape)
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)

                x_normed_batch = (x - broadcast_mean) / broadcast_std
                x_normed = (x_normed_batch * broadcast_r +
                            broadcast_d) * broadcast_gamma + broadcast_beta

            # explicit update to moving mean and standard deviation
            self.add_update([
                K.moving_average_update(self.running_mean, mean_batch,
                                        self.momentum),
                K.moving_average_update(self.running_std, std_batch**2,
                                        self.momentum)
            ], x)

            # update r_max and d_max
            t_val = K.get_value(self.t)
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * np.exp(-t_val))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
            t_val += float(self.t_delta)

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update(self.t, t_val)
            ], x)

            if self.mode == 0:
                if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
                    x_normed_running = K.batch_normalization(
                        x,
                        self.running_mean,
                        self.running_std,
                        self.beta,
                        self.gamma,
                        epsilon=self.epsilon)
                else:
                    # need broadcasting
                    broadcast_running_mean = K.reshape(self.running_mean,
                                                       broadcast_shape)
                    broadcast_running_std = K.reshape(self.running_std,
                                                      broadcast_shape)
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                    x_normed_running = K.batch_normalization(
                        x,
                        broadcast_running_mean,
                        broadcast_running_std,
                        broadcast_beta,
                        broadcast_gamma,
                        epsilon=self.epsilon)

                # pick the normalized form of x corresponding to the training phase
                # for batch renormalization, inference time remains same as batchnorm
                x_normed = K.in_train_phase(x_normed, x_normed_running)

        elif self.mode == 1:
            # sample-wise normalization
            m = K.mean(x, axis=self.axis, keepdims=True)
            std = K.sqrt(
                K.var(x, axis=self.axis, keepdims=True) + self.epsilon)
            x_normed_batch = (x - m) / (std + self.epsilon)

            r_max_value = K.get_value(self.r_max)
            r = std / (self.running_std + self.epsilon)
            r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))

            d_max_value = K.get_value(self.d_max)
            d = (m - self.running_mean) / (self.running_std + self.epsilon)
            d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))

            x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta

            # update r_max and d_max
            t_val = K.get_value(self.t)
            r_val = self.r_max_value / (
                1 + (self.r_max_value - 1) * np.exp(-t_val))
            d_val = self.d_max_value / (1 + (
                (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val)))
            t_val += float(self.t_delta)

            self.add_update([
                K.update(self.r_max, r_val),
                K.update(self.d_max, d_val),
                K.update(self.t, t_val)
            ], x)

        return x_normed
Exemple #4
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)
        # Prepare broadcasting shape.
        ndim = len(input_shape)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # Determines whether broadcasting is needed.
        needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])

        def normalize_inference():
            if needs_broadcasting:
                # In this case we must explicitly broadcast all parameters.
                broadcast_moving_mean = K.reshape(self.moving_mean,
                                                  broadcast_shape)
                broadcast_moving_variance = K.reshape(self.moving_variance,
                                                      broadcast_shape)
                if self.center:
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                else:
                    broadcast_beta = None
                if self.scale:
                    broadcast_gamma = K.reshape(self.gamma,
                                                broadcast_shape)
                else:
                    broadcast_gamma = None
                return tf.nn.batch_normalization(#K.batch_normalization(
                    inputs,
                    broadcast_moving_mean,
                    broadcast_moving_variance,
                    broadcast_beta,
                    broadcast_gamma,
                    #axis=self.axis,
                    self.epsilon)#epsilon=self.epsilon)
            else:
                return tf.nn.batch_normalization(#K.batch_normalization(
                    inputs,
                    self.moving_mean,
                    self.moving_variance,
                    self.beta,
                    self.gamma,
                    #axis=self.axis,
                    self.epsilon)#epsilon=self.epsilon)

        # If the learning phase is *static* and set to inference:
        if training in {0, False}:
            return normalize_inference()

        # If the learning is either dynamic, or set to training:
        normed_training, mean, variance = _regular_normalize_batch_in_training(#K.normalize_batch_in_training(
            inputs, self.gamma, self.beta, reduction_axes,
            epsilon=self.epsilon)

        if K.backend() != 'cntk':
            sample_size = K.prod([K.shape(inputs)[axis]
                                  for axis in reduction_axes])
            sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

            # sample variance - unbiased estimator of population variance
            variance *= sample_size / (sample_size - (1.0 + self.epsilon))

        self.add_update([K.moving_average_update(self.moving_mean,
                                                 mean,
                                                 self.momentum),
                         K.moving_average_update(self.moving_variance,
                                                 variance,
                                                 self.momentum)],
                        inputs)

        # Pick the normalized form corresponding to the training phase.
        return K.in_train_phase(normed_training,
                                normalize_inference,
                                training=training)
Exemple #5
0
 def call(self, x):
     y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end)
     nb_classes = self.input_spec[0].shape[2]
     y_pred_one_hot = K.one_hot(y_pred, nb_classes)
     return K.in_train_phase(x, y_pred_one_hot)
Exemple #6
0
def PadSymmetricInTestPhase():
    pad = Lambda(lambda x: K.in_train_phase(
        x, tf.pad(x, tf.constant([[0, 0], [2, 2], [2, 2], [0, 0]]), 'SYMMETRIC'
                  )))
    pad.uses_learning_phase = True
    return pad
Exemple #7
0
 def __call__(self, loss):
     reg = - 0.5 * K.mean(1 + self.p - K.exp(self.p), axis=None)
     return K.in_train_phase(loss + self.weight*reg, loss)
Exemple #8
0
 def one_zero(x):
     return K.in_train_phase(K.zeros_like(x), K.ones_like(x))
Exemple #9
0
 def call(self, x, training=None):
     def noised():
         return x + K.clip(  self.amount*K.random_normal(K.shape(x)),   self.lower,   self.upper)
     return K.in_train_phase(noised, x, training=training)
Exemple #10
0
 def call(self, x, training=None):
     def noised():
         Ni = K.shape(x)[0]
         Nipd = K.shape(x)[2]
         return x + K.random_uniform((Ni,Nipd), self.lower,self.upper)[:,None,:]
     return K.in_train_phase(noised, x, training=training)
Exemple #11
0
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     _embeddings = K.in_train_phase(K.dropout(self.embeddings, self.dropout_rate, noise_shape=[self.input_dim,1]), self.embeddings) if self.dropout_rate > 0 else self.embeddings
     out = K.gather(_embeddings, inputs)
     return out
    def call(self, inputs, training=None):
        # These were moved here from build() because tf2 eager was not
        # tracking gradients:
        repeated_gamma = K.reshape(
            K.tile(K.expand_dims(self.gamma, -1), [1, self.n]),
            [-1],
        )
        repeated_beta = K.reshape(
            K.tile(K.expand_dims(self.beta, -1), [1, self.n]),
            [-1],
        )

        repeated_moving_mean = K.reshape(
            K.tile(K.expand_dims(self.moving_mean, -1), [1, self.n]),
            [-1],
        )
        repeated_moving_variance = K.reshape(
            K.tile(K.expand_dims(self.moving_variance, -1), [1, self.n]),
            [-1],
        )

        def unrepeat(w):
            n = 1
            if self.h == 'C4':
                n *= 4
            elif self.h == 'D4':
                n *= 8
            elif self.h == 'Z2':
                n *= 1
            else:
                raise ValueError('Wrong h: %s' % self.h)

            return K.mean(K.reshape(w, (K.int_shape(w)[0] // n, n)), -1)

        input_shape = K.int_shape(inputs)
        # Prepare broadcasting shape.
        ndim = len(input_shape)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # Determines whether broadcasting is needed.
        needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])

        def normalize_inference():
            if needs_broadcasting:
                # In this case we must explicitly broadcast all parameters.
                broadcast_moving_mean = K.reshape(repeated_moving_mean,
                                                  broadcast_shape)
                broadcast_moving_variance = K.reshape(repeated_moving_variance,
                                                      broadcast_shape)

                broadcast_beta = K.reshape(repeated_beta, broadcast_shape)

                broadcast_gamma = K.reshape(repeated_gamma, broadcast_shape)

                return K.batch_normalization(inputs,
                                             broadcast_moving_mean,
                                             broadcast_moving_variance,
                                             broadcast_beta,
                                             broadcast_gamma,
                                             epsilon=self.epsilon)
            else:
                return K.batch_normalization(inputs,
                                             repeated_moving_mean,
                                             repeated_moving_variance,
                                             repeated_beta,
                                             repeated_gamma,
                                             epsilon=self.epsilon)

        def _get_training_value(training, trainable_flag):
            """
            Return a flag indicating whether a layer should be called in training
            or inference mode.
            Modified from https://git.io/JUGHX
            training: the setting used when layer is called for inference.
            trainable: flag indicating whether the layer is trainable.
            """
            if training is None:
                training = K.learning_phase()

            if isinstance(training, int):
                training = bool(training)

            # If layer not trainable, override value passed from model.
            if trainable_flag is False:
                training = False

            return training

        # If the learning phase is *static* and set to inference:
        training_val = _get_training_value(training, self.trainable)
        if training_val is False:
            return normalize_inference()

        # If the learning is either dynamic, or set to training:
        normed_training, mean, variance = K.normalize_batch_in_training(
            inputs,
            repeated_gamma,
            repeated_beta,
            reduction_axes,
            epsilon=self.epsilon)

        if K.backend() != 'cntk':
            sample_size = K.prod(
                [K.shape(inputs)[axis] for axis in reduction_axes])
            sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

            # sample variance - unbiased estimator of population variance
            variance *= sample_size / (sample_size - (1.0 + self.epsilon))

        self.add_update([
            K.moving_average_update(self.moving_mean, unrepeat(mean),
                                    self.momentum),
            K.moving_average_update(self.moving_variance, unrepeat(variance),
                                    self.momentum)
        ], inputs)

        # Pick the normalized form corresponding to the training phase.
        return K.in_train_phase(normed_training,
                                normalize_inference,
                                training=training)
    def call(self, inputs, training = None):

        input_shape = K.int_shape(inputs) # .shape
        ndim        = len(input_shape) # 4

        reduction_axes = list(range(ndim)) # If ndim == 4, list(range(ndim)) == [0, 1, 2, 3]
        del reduction_axes[self.axis] # --> [0, 1, 2], self.axis == -1

        input_dim = input_shape[self.axis] // 2

        mu = K.mean(inputs, axis = reduction_axes) # real mu, imag mu

        broadcast_mu_shape            = [1] * len(input_shape) # [1, 1, 1, 1]
        broadcast_mu_shape[self.axis] = input_shape[self.axis] # [1, 1, 1, input_shape[self.axis]]
        broadcast_mu                  = K.reshape(mu, broadcast_mu_shape) # mu shape is [1, 1, 1, 2]

        """
        real parts에는 real mean을 빼고
        imag parts에는 imag mean을 뺀다
        centred_squared == (x - E(x))^2
        """
        if self.center:
            input_centred = inputs - broadcast_mu
        else:
            input_centred = inputs

        centred_squared = input_centred ** 2

        'for Conv2D'
        centred_squared_real = centred_squared[:, :, :, :input_dim] # real
        centred_squared_imag = centred_squared[:, :, :, input_dim:] # imag
        centred_real = input_centred[:, :, :, :input_dim] # real
        centred_imag = input_centred[:, :, :, input_dim:] # imag

        if self.scale:
            Vrr = K.mean(centred_squared_real, axis=reduction_axes) + self.epsilon
            Vii = K.mean(centred_squared_imag, axis=reduction_axes) + self.epsilon
            Vri = K.mean(centred_real * centred_imag, axis=reduction_axes,) # Vri contains the real and imaginary covariance for each feature map.
        elif self.center:
            Vrr = None
            Vii = None
            Vri = None
        else:
            raise ValueError('Error. Both scale and center in batchnorm are set to False.')

        """
        1. Calcultae BatchNormalization for real parts, imag parts of complex numbers
        2. If Training == True, Under self.center and self.scale condition, Update parameter moving mean, moving_Vrr, moving_Vii, moving_Vri
        """
        input_bn = complex_batchnorm(input_centred, Vrr, Vii, Vri, self.beta, self.gamma_rr, self.gamma_ri, self.gamma_ii, self.scale, self.center, axis = self.axis)

        if training in {0, False}:
            return input_bn
        else: # traning is True!!!
            update_list = []
            if self.center:
                update_list.append(K.moving_average_update(self.moving_mean, mu, self.momentum))
            if self.scale:
                update_list.append(K.moving_average_update(self.moving_Vrr, Vrr, self.momentum))
                update_list.append(K.moving_average_update(self.moving_Vii, Vii, self.momentum))
                update_list.append(K.moving_average_update(self.moving_Vri, Vri, self.momentum))
            self.add_update(update_list, inputs)

            def normalize_inference():
                if self.center:
                    inference_centred = inputs - K.reshape(self.moving_mean, broadcast_mu_shape)
                else:
                    inference_centred = inputs
                return complex_batchnorm(inference_centred, 
                                self.moving_Vrr, self.moving_Vii, self.moving_Vri, self.beta, 
                                self.gamma_rr, self.gamma_ri, self.gamma_ii, self.scale, self.center, axis = self.axis)

        # Pick the normalized form corresponding to the training phase.
        return K.in_train_phase(input_bn, normalize_inference, training = training)
Exemple #14
0
 def _apply_dropout(self, inputs):
     dropped = K.dropout(inputs, self.dropout)
     return K.in_train_phase(dropped, inputs)
Exemple #15
0
    def call(self,
             inputs,
             initial_state=None,
             initial_readout=None,
             ground_truth=None,
             mask=None,
             training=None):
        # input shape: `(samples, time (padded with zeros), input_dim)`
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        if type(mask) is list:
            mask = mask[0]
        if self.model is None:
            raise Exception('Empty RecurrentModel.')
        num_req_states = self.num_states
        if self.readout:
            num_actual_states = num_req_states - 1
        else:
            num_actual_states = num_req_states
        if type(inputs) is list:
            inputs_list = inputs[:]
            inputs = inputs_list.pop(0)
            initial_states = inputs_list[:num_actual_states]
            if len(initial_states) > 0:
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)
            inputs_list = inputs_list[num_actual_states:]
            if self.readout:
                initial_readout = inputs_list.pop(0)
                if self.teacher_force:
                    ground_truth = inputs_list.pop()
        else:
            if initial_state is not None:
                if not isinstance(initial_state, (list, tuple)):
                    initial_states = [initial_state]
                else:
                    initial_states = list(initial_state)
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)

            elif self.stateful:
                initial_states = self.states
            else:
                initial_states = self.get_initial_state(inputs)
        if self.readout:
            if initial_readout is None or self._is_optional_input_placeholder(
                    initial_readout):
                output_shape = K.int_shape(_to_list((self.model.output))[0])
                output_ndim = len(output_shape)
                input_ndim = K.ndim(inputs)
                initial_readout = K.zeros_like(inputs)
                slices = [slice(None)] + [0] * (input_ndim - 1)
                initial_readout = initial_readout[slices]  # (batch_size,)
                initial_readout = K.reshape(initial_readout,
                                            (-1, ) + (1, ) * (output_ndim - 1))
                initial_readout = K.tile(initial_readout,
                                         (1, ) + tuple(output_shape[1:]))
            initial_states.append(initial_readout)
            if self.teacher_force:
                if ground_truth is None or self._is_optional_input_placeholder(
                        ground_truth):
                    raise Exception(
                        'ground_truth must be provided for RecurrentModel with teacher_force=True.'
                    )
                if K.backend() == 'tensorflow':
                    with tf.control_dependencies(None):
                        counter = K.zeros((1, ))
                else:
                    counter = K.zeros((1, ))
                counter = K.cast(counter, 'int32')
                initial_states.insert(-1, counter)
                initial_states[-2]
                initial_states.insert(-1, ground_truth)
                num_req_states += 2
        if len(initial_states) != num_req_states:
            raise ValueError('Layer requires ' + str(num_req_states) +
                             ' states but was passed ' +
                             str(len(initial_states)) + ' initial states.')
        input_shape = K.int_shape(inputs)
        if self.unroll and input_shape[1] is None:
            raise ValueError('Cannot unroll a RNN if the '
                             'time dimension is undefined. \n'
                             '- If using a Sequential model, '
                             'specify the time dimension by passing '
                             'an `input_shape` or `batch_input_shape` '
                             'argument to your first layer. If your '
                             'first layer is an Embedding, you can '
                             'also use the `input_length` argument.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a `shape` '
                             'or `batch_shape` argument to your Input layer.')
        preprocessed_input = self.preprocess_input(inputs, training=None)
        constants = self.get_constants(inputs, training=None)
        if self.decode:
            initial_states.insert(0, inputs)
            preprocessed_input = K.zeros((1, self.output_length, 1))
            input_length = self.output_length
        else:
            input_length = input_shape[1]
        if self.uses_learning_phase:
            with learning_phase_scope(0):
                last_output_test, outputs_test, states_test, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)
            with learning_phase_scope(1):
                last_output_train, outputs_train, states_train, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)

            last_output = K.in_train_phase(last_output_train,
                                           last_output_test,
                                           training=training)
            outputs = K.in_train_phase(outputs_train,
                                       outputs_test,
                                       training=training)
            states = []
            for state_train, state_test in zip(states_train, states_test):
                states.append(
                    K.in_train_phase(state_train,
                                     state_test,
                                     training=training))

        else:
            last_output, outputs, states, updates = rnn(
                self.step,
                preprocessed_input,
                initial_states,
                go_backwards=self.go_backwards,
                mask=mask,
                constants=constants,
                unroll=self.unroll,
                input_length=input_length)
        states = list(states)
        if self.decode:
            states.pop(0)
        if self.readout:
            states.pop()
            if self.teacher_force:
                states.pop()
                states.pop()
        if len(updates) > 0:
            self.add_update(updates)
        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append((self.states[i], states[i]))
            self.add_update(updates, inputs)

        # Properly set learning phase
        if 0 < self.dropout + self.recurrent_dropout:
            last_output._uses_learning_phase = True
            outputs._uses_learning_phase = True

        if self.return_sequences:
            y = outputs
        else:
            y = last_output
        if self.return_states:
            return [y] + states
        else:
            return y
Exemple #16
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)

        # Prepare broadcasting shape.
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]

        if self.axis != 0:
            del reduction_axes[0]

        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        mean_instance = K.mean(inputs, reduction_axes, keepdims=True)
        variance_instance = K.var(inputs, reduction_axes, keepdims=True)

        mean_layer = K.mean(mean_instance, self.axis, keepdims=True)
        temp = variance_instance + K.square(mean_instance)
        variance_layer = K.mean(temp, self.axis,
                                keepdims=True) - K.square(mean_layer)

        def training_phase():
            mean_batch = K.mean(mean_instance, axis=0, keepdims=True)
            variance_batch = K.mean(temp, axis=0,
                                    keepdims=True) - K.square(mean_batch)

            mean_batch_reshaped = K.flatten(mean_batch)
            variance_batch_reshaped = K.flatten(variance_batch)

            if K.backend() != 'cntk':
                sample_size = K.prod(
                    [K.shape(inputs)[axis] for axis in reduction_axes])
                sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

                # sample variance - unbiased estimator of population variance
                variance_batch_reshaped *= sample_size / (sample_size -
                                                          (1.0 + self.epsilon))

            self.add_update([
                K.moving_average_update(self.moving_mean, mean_batch_reshaped,
                                        self.momentum),
                K.moving_average_update(self.moving_variance,
                                        variance_batch_reshaped, self.momentum)
            ], )

            return normalize_func(mean_batch, variance_batch)

        def inference_phase():
            mean_batch = self.moving_mean
            variance_batch = self.moving_variance

            return normalize_func(mean_batch, variance_batch)

        def normalize_func(mean_batch, variance_batch):
            mean_batch = K.reshape(mean_batch, broadcast_shape)
            variance_batch = K.reshape(variance_batch, broadcast_shape)

            mean_weights = K.softmax(self.mean_weights, axis=0)
            variance_weights = K.softmax(self.variance_weights, axis=0)

            mean = (mean_weights[0] * mean_instance +
                    mean_weights[1] * mean_layer +
                    mean_weights[2] * mean_batch)

            variance = (variance_weights[0] * variance_instance +
                        variance_weights[1] * variance_layer +
                        variance_weights[2] * variance_batch)

            outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

            if self.scale:
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                outputs = outputs * broadcast_gamma

            if self.center:
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                outputs = outputs + broadcast_beta

            return outputs

        if training in {0, False}:
            return inference_phase()

        return K.in_train_phase(training_phase,
                                inference_phase,
                                training=training)
Exemple #17
0
    def call(self, x, training=None):

        batch_size, length, input_dim = x.shape.as_list()

        # NOTE Get padding
        is_padding_value = tf.equal(x, self.padding_value)

        is_padding = tf.reduce_all(is_padding_value, axis=-1, keepdims=True)
        is_padding = tf.to_float(is_padding)

        pad_mask = tf.reshape(is_padding, [-1])
        non_pad_indices = tf.to_int32(tf.where(pad_mask < self.epsilon))

        # Reshape x to [batch_size*length, hidden_size] to remove padding
        x = tf.reshape(x, [-1, input_dim])
        x = tf.gather_nd(x, indices=non_pad_indices)

        # Reshape x from 2 dimensions to 3 dimensions.
        x.set_shape([None, input_dim])
        x = tf.expand_dims(x, axis=0)
        # print('x / expand_dims: {}'.format(x.shape))

        output = K.dot(x, self.kernel_filter) + self.bias_filter

        if self.activation is not None:
            output = self.activation(output)

        # Dropout
        if 0.0 < self.rate < 1.0:
            noise_shape = self._get_noise_shape(output)

            def dropped_inputs():
                return K.dropout(
                    x=output,
                    level=self.rate,
                    noise_shape=noise_shape,
                    seed=self.seed)

            output = K.in_train_phase(
                dropped_inputs,
                output,
                training=training)

        # Dense
        # output = tf.einsum('ijk,kl->ijl', output, self.kernel_hidden)
        output = K.dot(output, self.kernel_hidden) + self.bias_hidden
        if self.activation is not None:
            output = self.activation(output)

        output = tf.squeeze(output, axis=0)

        scatter_shape = (-1, self.hidden_size)
        output = tf.scatter_nd(
            indices=non_pad_indices,
            updates=output,
            shape=scatter_shape)

        out_shape = (-1, length, self.hidden_size)
        output = tf.reshape(output, out_shape)

        return output
Exemple #18
0
 def call(self, layer, inputs, *args, **kwargs):
     output = K.in_train_phase(
         K.switch(K.random_uniform([]) > self.rate, layer(inputs, *args, **kwargs), inputs),
         layer(inputs, *args, **kwargs))
     return output
Exemple #19
0
 def call(self, inputs, **kwargs):
     x = tf.ones_like(inputs)
     y = tf.zeros_like(inputs)
     return K.in_train_phase(x,
                             y,
                             training=kwargs.get('training', None))