コード例 #1
0
ファイル: AbstractRNN.py プロジェクト: aniltrue/MLProject
    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)

        def step_f(inputs, states):
            output, new_states = self.cell_f.call(inputs, states, **kwargs)

            return output, new_states

        def step_b(inputs, states):
            output, new_states = self.cell_b.call(inputs, states, **kwargs)

            return output, new_states

        initial_states_f = self.cell_f.get_initial_state(inputs)
        initial_states_b = self.cell_b.get_initial_state(inputs)

        last_output_f, outputs_f, states_f = K.rnn(step_f, inputs, initial_states=initial_states_f, go_backwards=False, input_length=input_shape[1])
        last_output_b, outputs_b, states_b = K.rnn(step_b, inputs, initial_states=initial_states_b, go_backwards=True, input_length=input_shape[1])

        last_output = K.concatenate([last_output_f, last_output_b])
        outputs = K.concatenate([outputs_f, outputs_b])

        if self.return_sequences:
            return outputs
        else:
            return last_output
コード例 #2
0
ファイル: attention.py プロジェクト: airenas/punctuation
    def call(self, inputs):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence, encoder_last_state]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq, initState = inputs

        projected_context = K.dot(encoder_out_seq, self.W_projected) + self.B_projected

        dec_hidden = decoder_out_seq.shape[2]
        timesteps = encoder_out_seq.shape[1]

        def step(inputs, states):
            state = states[0]
            ha = K.expand_dims(K.dot(state, self.W_a), 1)
            e = K.tanh(projected_context + ha)
            alphas = K.exp(K.dot(e, self.V_a))
            alphas = K.reshape(alphas, (-1, timesteps))
            alphas = alphas / (K.sum(alphas, axis=1, keepdims=True) + K.epsilon())
            weighted_context = encoder_out_seq * alphas[:, :, None]
            weighted_context = K.sum(weighted_context, axis=1)
            return weighted_context, [inputs]
        # initState = K.zeros_like(projected_context[:, 1, 0:dec_hidden])
        _, wc, _ = K.rnn(step, decoder_out_seq, [initState])
        return wc
コード例 #3
0
    def call(self,
             inputs,
             mask=None,
             training=None,
             initial_state=None,
             constants=None):
        if isinstance(inputs, list):
            if self._num_constants is None: initial_state = inputs[1:]
            else: initial_state = inputs[1:-self._num_constants]
            inputs = inputs[0]
        input_shape = K.int_shape(inputs)
        timesteps = input_shape[1]

        kwargs = {}

        def step(inputs, states):
            constants = states[-self._num_constants:]
            states = states[:-self._num_constants]
            return self.cell.call(inputs,
                                  states,
                                  constants=constants,
                                  **kwargs)

        last_output, outputs, states = K.rnn(step,
                                             inputs,
                                             initial_state,
                                             constants=constants,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             unroll=False,
                                             input_length=timesteps)
        output = outputs if self.return_sequences else last_output
        return output
コード例 #4
0
    def call(self, x, mask=None):
        input_shape = self.input_spec[0].shape #input_shape = (None, 4, 512, 30, 40)
        initial_states = self.get_initial_states(x) #x.shape = (1, 4, 512, 30, 40) output shape=(1, 512, 30, 40)
        constants = self.get_constants(x)
        preprocessed_input = self.preprocess_input(x) #output shape=(1, 4, 512, 30, 40)

        #print("______________Llegamos________________")
        last_output, outputs, states = K.rnn(self.step, preprocessed_input,
                                             initial_states,
                                             go_backwards=False,
                                             mask=mask,
                                             constants=constants,
                                             unroll=False,
                                             input_length=input_shape[1])
        
        #print("Estamos en la salida ______________________")
        #print(last_output) # shape=(1, 512, 30, 40)
        #print(outputs)     # shape=(1, 4, 512, 30, 40)
        #print(states)      # shape=(1, 512, 30, 40)
        if last_output.get_shape().ndims == 3:            #Nueva version
            last_output = K.expand_dims(last_output, dim=0)
        #if last_output.ndim == 3:
        #    last_output = K.expand_dims(last_output, dim=0)

        print("Red attentive convLSTM cargada")
        return last_output
コード例 #5
0
ファイル: layers.py プロジェクト: adowu/bert4one
 def dense_loss(self, y_true, y_pred):
     """y_true需要是one hot形式
     """
     # 导出mask并转换数据类型
     # [B, T, 1] 这里也就是看 这个 T 是否是 pad 的
     mask = K.all(K.greater(y_pred, -1e6), axis=2, keepdims=True)
     mask = K.cast(mask, K.floatx())
     # 计算目标分数
     y_true, y_pred = y_true * mask, y_pred * mask
     target_score = self.target_score(y_true, y_pred)
     # 递归计算log Z
     init_states = [y_pred[:, 0]]
     # [B, T, output_dim] [B, T, 1] -> [B, T, output_dim+1]
     # 这里是为了传递 mask 到 rnn 中 
     y_pred = K.concatenate([y_pred, mask], axis=2)
     input_length = K.int_shape(y_pred[:, 1:])[1]
     # 这里会把 y_pred[:, 1:] 在 time 维度 split 开 一个个继续迭代计算
     log_norm, _, _ = K.rnn(
         self.log_norm_step,
         y_pred[:, 1:],
         init_states,
         input_length=input_length
     )  # 最后一步的log Z向量
     log_norm = tf.reduce_logsumexp(log_norm, 1)  # logsumexp得标量
     # 计算损失 -log p
     return log_norm - target_score
コード例 #6
0
    def call(self, x, mask=None):
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        input_shape = self.input_spec[0].shape
        if isinstance(x, (tuple, list)):
            # x, *custom_initial = x
            custom_initial = x[1:]
            x = x[0]
        else:
            custom_initial = None
        if K._BACKEND == 'tensorflow':
            if not input_shape[1]:
                raise Exception('When using TensorFlow, you should define '
                                'explicitly the number of timesteps of '
                                'your sequences.\n'
                                'If your first layer is an Embedding, '
                                'make sure to pass it an "input_length" '
                                'argument. Otherwise, make sure '
                                'the first layer has '
                                'an "input_shape" or "batch_input_shape" '
                                'argument, including the time axis. '
                                'Found input shape at layer ' + self.name +
                                ': ' + str(input_shape))
        if self.stateful and custom_initial:
            raise Exception(('Initial states should not be specified '
                             'for stateful LSTMs, since they would overwrite '
                             'the memorized states.'))
        elif custom_initial:
            initial_states = custom_initial
        elif self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(x)
        constants = self.get_constants(x)
        preprocessed_input = self.preprocess_input(x)

        # only use the main input mask
        if isinstance(mask, list):
            mask = mask[0]

        last_output, outputs, states = K.rnn(self.step,
                                             preprocessed_input,
                                             initial_states,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             constants=constants,
                                             unroll=self.unroll,
                                             input_length=input_shape[1])
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return [outputs] + states
        else:
            return [last_output] + states
コード例 #7
0
ファイル: crf_keras.py プロジェクト: jinxueyu/keras-models
 def loss(self, y_true, y_pred):  # 目标y_pred需要是one hot形式
     if self.ignore_last_label:
         mask = 1 - y_true[:, :, -1:]
     else:
         mask = K.ones_like(y_pred[:, :, :1])
     y_true, y_pred = y_true[:, :, :self.num_labels], y_pred[:, :, :self.num_labels]
     path_score = self.path_score(y_pred, y_true)  # 计算分子(对数)
     init_states = [y_pred[:, 0]]  # 初始状态
     y_pred = K.concatenate([y_pred, mask])
     log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states)  # 计算Z向量(对数)
     log_norm = logsumexp(log_norm, 1, keepdims=True)  # 计算Z(对数)
     return log_norm - path_score  # 即log(分子/分母)
コード例 #8
0
 def loss(self, y_true, y_pred):  # 目标y_pred需要是one hot形式
     mask = 1 - y_true[:, 1:, -1] if self.ignore_last_label else None
     y_true, y_pred = y_true[:, :, :self.num_labels], y_pred[:, :, :self.
                                                             num_labels]
     init_states = [y_pred[:, 0]]  # 初始状态
     log_norm, _, _ = K.rnn(self.log_norm_step,
                            y_pred[:, 1:],
                            init_states,
                            mask=mask)  # 计算Z向量(对数)
     log_norm = tf.math.reduce_logsumexp(log_norm, 1,
                                         keepdims=True)  # 计算Z(对数)
     path_score = self.path_score(y_pred, y_true)  # 计算分子(对数)
     return tf.math.subtract(log_norm, path_score)  # 即log(分子/分母)
コード例 #9
0
    def recursion(self, input_energy, mask=None, go_backwards=False,
                  return_sequences=True, return_logZ=True, input_length=None):
        """Forward (alpha) or backward (beta) recursion
        If `return_logZ = True`, compute the logZ, the normalization constant:
        \[ Z = \sum_{y1, y2, y3} exp(-E) # energy
          = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3))
          = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3))
          sum_{y1} exp(-(u1' y1' + y1' W y2))) \]
        Denote:
            \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \]
            \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \]
            \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \]
        Note that:
              yi's are one-hot vectors
              u1, u3: boundary energies have been merged
        If `return_logZ = False`, compute the Viterbi's best path lookup table.
        """
        chain_energy = self.chain_kernel
        # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t
        chain_energy = K.expand_dims(chain_energy, 0)
        # shape=(B, F), dtype=float32
        prev_target_val = K.zeros_like(input_energy[:, 0, :])

        if go_backwards:
            input_energy = K.reverse(input_energy, 1)
            if mask is not None:
                mask = K.reverse(mask, 1)

        initial_states = [prev_target_val, K.zeros_like(prev_target_val[:, :1])]
        constants = [chain_energy]

        if mask is not None:
            mask2 = K.cast(K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1),
                           K.floatx())
            constants.append(mask2)

        def _step(input_energy_i, states):
            return self.step(input_energy_i, states, return_logZ)

        target_val_last, target_val_seq, _ = K.rnn(_step, input_energy,
                                                   initial_states,
                                                   constants=constants,
                                                   input_length=input_length,
                                                   unroll=self.unroll)

        if return_sequences:
            if go_backwards:
                target_val_seq = K.reverse(target_val_seq, 1)
            return target_val_seq
        else:
            return target_val_last
コード例 #10
0
 def call(self, inputs):
     initial_states = [
         K.zeros((K.shape(inputs)[0], self.units)),
         K.zeros((K.shape(inputs)[0], self.units))
     ]  # 定义初始态(全零)
     outputs = K.rnn(self.one_step, inputs, initial_states)
     self.distance = 1 - K.mean(
         outputs[1][..., self.units:self.units + self.levels], -1)
     self.distance_in = K.mean(outputs[1][..., self.units + self.levels:],
                               -1)
     if self.return_sequences:
         return outputs[1][..., :self.units]
     else:
         return outputs[0][..., :self.units]
コード例 #11
0
def _forward(x, reduce_step, initial_states, U):
    """Forward recurrence of the linear chain crf."""
    def _forward_step(energy_matrix_t, states):
        alpha_tm1 = states[-1]
        new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
        return new_states[0], new_states

    U_shared = K.expand_dims(K.expand_dims(U, 0), 0)

    inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared
    inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])],
                           axis=1)

    last, values, _ = K.rnn(_forward_step, inputs, initial_states)
    return last, values
コード例 #12
0
    def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(input_energy, mask,
                                                    self.left_boundary,
                                                    self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any,
        # as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        # matrix instead of vector is required by tf `K.rnn`
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]
        if K.backend() == 'theano':
            from theano import tensor as T
            initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                from theano import tensor as T
                return params[T.arange(n), indices]
            elif K.backend() == 'tensorflow':
                import tensorflow as tf
                indices = K.transpose(K.stack([tf.range(n), indices]))
                return tf.gather_nd(params, indices)
            else:
                raise NotImplementedError

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                from theano import tensor as T
                next_best_idx = T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path,
                                 argmin_tables,
                                 initial_best_idx,
                                 input_length=K.int_shape(X)[1],
                                 unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units)
コード例 #13
0
def _backward(gamma):
    """Backward recurrence of the linear chain crf."""
    gamma = K.cast(gamma, "int32")

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)
    return y
コード例 #14
0
ファイル: AbstractRNN.py プロジェクト: aniltrue/MLProject
    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)

        def step_fn(inputs, states):
            output, new_states = self.cell.call(inputs, states, **kwargs)

            return output, new_states

        initial_states = self.cell.get_initial_state(inputs)

        last_output, outputs, states = K.rnn(step_fn, inputs, initial_states=initial_states, go_backwards=self.reversed, input_length=input_shape[1])

        if self.return_sequences:
            return states
        else:
            return last_output
コード例 #15
0
    def call(self, x, training=None, mask=None, states=None):
        """
        x.shape=(batch_size,time_step,dim)=(3,10,128),#x is encoder ouput
        :param Tensor x: Should be the output of the decoder
        :param Tensor states: last state of the decoder
        :param Tensor mask: The mask to apply
        :return: Pointers probabilities
        """

        input_shape = self.input_spec[0].shape
        en_seq = x  #TensorShape([3, 10, 128])
        x_input = x[:,
                    input_shape[1] - 1, :]  ##只取最后一个时间戳的,TensorShape([3, 128])
        #重复一个2D张量。如果x具有shape(samples, dim),并且n是2,则输出将有shape(samples, 2, dim),在第二个维度将数据重复
        x_input = K.repeat(x_input, input_shape[1])  #TensorShape([3, 10, 128])
        if states:
            initial_states = states
        else:
            initial_states = self.decoder.get_initial_state(x_input)

        constants = []
        '''preprocessed_input.shape TensorShape([64, 10, 128])'''
        preprocessed_input, _, constants = self.decoder.process_inputs(
            x_input, initial_states, constants)
        constants.append(en_seq)
        #self.step(preprocessed_input,initial_states)
        ##这里preprocessed_input有时间维度,然后每个时间维度的数据,都要传给step函数调用
        '''
        k.rnn返回一个元组,(last_output, outputs, new_states),实现了step的递归调用

        last_output:shape为(samples, ...) 输出的rnn的最新输出。
        
        outputs:shape为(samples, time, ...)的张量,其中每个条目 outputs[s, t] 是样本 s 在时间 t 的步骤函数输出值。即step的输出,维度为(batch, 10)(无时间维度)
        
        new_states:张量列表,步长函数返回的最新状态,shape为(samples, ...)。
        '''
        last_output, outputs, states = K.rnn(
            self.step,
            preprocessed_input,
            initial_states,
            go_backwards=self.decoder.lstm.go_backwards,
            constants=constants,
            input_length=input_shape[1])

        # print('outputs',outputs.shape,outputs)#outputs (batch, 10, 10)
        return outputs
コード例 #16
0
    def call(self, input, initial_state=None):
        img, text = input

        def step(cell_inputs, cell_states):
            """Step function that will be used by Keras RNN backend."""
            h_tm1 = cell_states[0]
            features = self.attention(img, h_tm1)
            cell_inputs = K.concatenate([cell_inputs, features], axis=-1)

            # inputs projected by all gate matrices at once
            matrix_x = K.dot(cell_inputs, self.kernel)
            matrix_x = K.bias_add(matrix_x, self.input_bias)

            x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

            # hidden state projected by all gate matrices at once
            matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
            matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias)

            recurrent_z, recurrent_r, recurrent_h = array_ops.split(
                matrix_inner, 3, axis=1)
            z = K.sigmoid(x_z + recurrent_z)
            r = K.sigmoid(x_r + recurrent_r)
            hh = K.tanh(x_h + r * recurrent_h)

            # previous and candidate state mixed by update gate
            h = z * h_tm1 + (1 - z) * hh
            return h, [h]

        if initial_state is None:
            initial_state = (array_ops.zeros(
                (array_ops.shape(text)[0], self.units)), )
        last, sequence, hidden = K.rnn(step,
                                       text,
                                       initial_state,
                                       zero_output_for_mask=self.mask_zeros)
        if self.return_state and self.return_sequences:
            return sequence, hidden
        if self.return_state:
            return last, hidden
        if self.return_sequences:
            return sequence
        return last
コード例 #17
0
def _forward(x, reduce_step, initial_states, U, mask=None):
    '''Forward recurrence of the linear chain crf.'''
    def _forward_step(energy_matrix_t, states):
        alpha_tm1 = states[-1]
        new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
        return new_states[0], new_states

    U_shared = K.expand_dims(K.expand_dims(U, 0), 0)

    if mask is not None:
        mask = K.cast(mask, K.floatx())
        mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3)
        U_shared = U_shared * mask_U

    inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared
    inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])],
                           axis=1)

    last, values, _ = K.rnn(_forward_step, inputs, initial_states)
    return last, values
コード例 #18
0
ファイル: slayers.py プロジェクト: fcihraeipnusnacwh/MRC-CE
	def dense_loss(self, y_true, y_pred):
		"""y_true需要是one hot形式
		"""
		# 导出mask并转换数据类型
		mask = K.all(K.greater(y_pred, -1e6), axis=2, keepdims=True)
		mask = K.cast(mask, K.floatx())
		# 计算目标分数
		y_true, y_pred = y_true * mask, y_pred * mask
		target_score = self.target_score(y_true, y_pred)
		# 递归计算log Z
		init_states = [y_pred[:, 0]]
		y_pred = K.concatenate([y_pred, mask], axis=2)
		input_length = K.int_shape(y_pred[:, 1:])[1]
		log_norm, _, _ = K.rnn(
			self.log_norm_step,
			y_pred[:, 1:],
			init_states,
			input_length=input_length
		)  # 最后一步的log Z向量
		log_norm = tf.reduce_logsumexp(log_norm, 1)  # logsumexp得标量
		# 计算损失 -log p
		return log_norm - target_score
コード例 #19
0
def decide_placement(candidate, mask_template, decay_grad):
    # 文字候補をもとに可能な配置を決定する
    _, char_width = mask_template.shape[:2]
    mask_template_T = K.transpose(
        mask_template[0, :, 0, :])  # shape=(char_dim, char_width)

    def step_fn(
        inputs, states
    ):  # shape=(batch_size, height, char_dim), (batch_size, height, char_width)
        s = states[0]
        placement_t = combine_value_gradient(
            1.0 - s[:, :, :1],
            decay_grad * (1.0 - s[:, :, :1]))  # 勾配が減衰しないと学習が難しすぎるため対策
        s = s + placement_t * K.dot(inputs, mask_template_T)
        new_state = K.concatenate([s[:, :, 1:],
                                   K.zeros_like(s[:, :, :1])
                                   ])  # shape=(batch_size, height, char_width)
        return placement_t, [new_state]

    initial_state = K.zeros_like(candidate[:, :, :char_width, 0])
    candidate_t = tf.transpose(candidate, perm=[0, 2, 1, 3])
    _, placement_t, _ = K.rnn(step_fn, candidate_t, [initial_state])
    return tf.transpose(placement_t, perm=[0, 2, 1, 3])
コード例 #20
0
def _backward(gamma, mask):
    '''Backward recurrence of the linear chain crf.'''
    gamma = K.cast(gamma, 'int32')

    def _backward_step(gamma_t, states):
        y_tm1 = K.squeeze(states[0], 0)
        y_t = batch_gather(gamma_t, y_tm1)
        return y_t, [K.expand_dims(y_t, 0)]

    initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]
    _, y_rev, _ = K.rnn(_backward_step,
                        gamma,
                        initial_states,
                        go_backwards=True)
    y = K.reverse(y_rev, 1)

    if mask is not None:
        mask = K.cast(mask, dtype='int32')
        # mask output
        y *= mask
        # set masked values to -1
        y += -(1 - mask)
    return y
コード例 #21
0
    def call(self, x, constants=None, mask=None, initial_state=None):
        # input shape: (n_samples, time (padded with zeros), input_dim)
        input_shape = self.input_spec[0].shape

        if len(x) > 2:
            initial_state = x[2:]
            x = x[:2]
            assert len(initial_state) >= 1

        static_x = x[1]
        x = x[0]

        if self.layer.stateful:
            initial_states = self.layer.states
        elif initial_state is not None:
            initial_states = initial_state
            if not isinstance(initial_states, (list, tuple)):
                initial_states = [initial_states]
        else:
            initial_states = self.layer.get_initial_state(x)

        if not constants:
            constants = []
        constants += self.get_constants(static_x)

        last_output, outputs, states = K.rnn(
            self.step,
            x,
            initial_states,
            go_backwards=self.layer.go_backwards,
            mask=mask,
            constants=constants,
            unroll=self.layer.unroll,
            input_length=input_shape[1])

        # output has at the moment the form:
        # (real_output, attention)
        # split this now up

        output_dim = self.layer.compute_output_shape(input_shape)[0][-1]
        last_output = last_output[:output_dim]

        attentions = outputs[:, :, output_dim:]
        outputs = outputs[:, :, :output_dim]

        if self.layer.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.layer.states[i], states[i]))

        if self.layer.return_sequences:
            output = outputs
        else:
            output = last_output

        # Properly set learning phase
        if getattr(last_output, '_uses_learning_phase', False):
            output._uses_learning_phase = True
            for state in states:
                state._uses_learning_phase = True

        if self.layer.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            output = [output] + states

        if self.return_attention:
            if not isinstance(output, list):
                output = [output]
            output = output + [attentions]

        return output
コード例 #22
0
#encoding=utf8
import os
import numpy as np

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from tensorflow.keras import backend as K
import tensorflow as tf

batch_size = 1
time_step = 3
dim = 2
x = np.random.rand(batch_size, time_step, dim)  # [1,3,2]生成输入
init_state = np.zeros(1).reshape(1, 1)  # [1,1] 初始值设置为0


def step_func(inputs, states):
    o = K.sum(inputs, axis=1, keepdims=True) + states[0]
    return o, [o]


a, _, _ = K.rnn(step_func, inputs=x, initial_states=[init_state])
print("x", x)
コード例 #23
0
ファイル: __init__.py プロジェクト: blprickett/recurrentshop
import tensorflow.keras.backend as K

if K.backend() == 'tensorflow':
    from .tensorflow_backend import *
    rnn = lambda *args, **kwargs: K.rnn(*args, **kwargs) + ([], )
elif K.backend() == 'theano':
    from .theano_backend import *
else:
    raise Exception(K.backend() + ' backend is not supported.')
コード例 #24
0
    def call(self, x, constants=None, mask=None, initial_state=None):
        # input shape: (n_samples, time (padded with zeros), input_dim)
        input_shape = self.input_spec.shape

        if self.layer.stateful:
            initial_states = self.layer.states
        elif initial_state is not None:
            initial_states = initial_state
            if not isinstance(initial_states, (list, tuple)):
                initial_states = [initial_states]

            base_initial_state = self.layer.get_initial_state(x)
            if len(base_initial_state) != len(initial_states):
                raise ValueError(
                    "initial_state does not have the correct length. Received length {0} "
                    "but expected {1}".format(len(initial_states),
                                              len(base_initial_state)))
            else:
                # check the state' shape
                for i in range(len(initial_states)):
                    # initial_states[i][j] != base_initial_state[i][j]:
                    if not initial_states[i].shape.is_compatible_with(
                            base_initial_state[i].shape):
                        raise ValueError(
                            "initial_state does not match the default base state of the layer. "
                            "Received {0} but expected {1}".format(
                                [x.shape for x in initial_states],
                                [x.shape for x in base_initial_state]))
        else:
            initial_states = self.layer.get_initial_state(x)

        # print(initial_states)

        if not constants:
            constants = []

        constants += self.get_constants(x)

        last_output, outputs, states = K.rnn(
            self.step,
            x,
            initial_states,
            go_backwards=self.layer.go_backwards,
            mask=mask,
            constants=constants,
            unroll=self.layer.unroll,
            input_length=input_shape[1])

        if self.layer.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.layer.states[i], states[i]))

        if self.layer.return_sequences:
            output = outputs
        else:
            output = last_output

            # Properly set learning phase
        if getattr(last_output, '_uses_learning_phase', False):
            output._uses_learning_phase = True
            for state in states:
                state._uses_learning_phase = True

        if self.layer.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            return [output] + states
        else:
            return output
コード例 #25
0
ファイル: Attention.py プロジェクト: h0n9670/tts
    def call(self, inputs, verbose=False):

        encoder_out_seq, decoder_out_seq = inputs

        values = encoder_out_seq
        keys = self.memory_layer(values) if self.memory_layer else values

        def energy_step(query, states):
            previous_alignments = states[0]
            if self.rnn_cell:
                c_i = states[1]
                cell_state = states[2:]

                lstm_input = K.concatenate([query, c_i])
                lstm_input = K.expand_dims(lstm_input, 1)

                lstm_out = self.rnn_cell(lstm_input, initial_state=cell_state)
                lstm_output, new_cell_state = lstm_out[0], lstm_out[1:]
                query = lstm_output

            processed_query = self.query_layer(
                query) if self.query_layer else query

            expanded_alignments = K.expand_dims(previous_alignments, axis=2)

            f = self.location_convolution(expanded_alignments)

            processed_location_features = self.location_layer(f)

            e_i = K.sum(
                self.v_a * K.tanh(keys + processed_query +
                                  processed_location_features + self.b_a), [2])

            e_i = K.softmax(e_i)

            if self._cumulate:
                next_state = e_i + previous_alignments
            else:
                next_state = e_i

            if self.rnn_cell:
                new_c_i, _ = context_step(e_i, [c_i])

                return e_i, [next_state, new_c_i, *new_cell_state]
            return e_i, [next_state]

        def context_step(inputs, states):

            alignments = inputs
            expanded_alignments = K.expand_dims(alignments, 1)

            c_i = math_ops.matmul(expanded_alignments, values)
            c_i = K.squeeze(c_i, 1)

            return c_i, [c_i]

        def create_initial_state(inputs, hidden_size):
            fake_state = K.zeros_like(inputs)
            fake_state = K.sum(fake_state, axis=[1, 2])
            fake_state = K.expand_dims(fake_state)
            fake_state = K.tile(fake_state, [1, hidden_size])
            return fake_state

        def get_fake_cell_input(fake_state_c):
            fake_input = K.zeros_like(decoder_out_seq)[:, 0, :]
            fake_input = K.concatenate([fake_state_c, fake_input])
            fake_input = K.expand_dims(fake_input, 1)
            return fake_input

        fake_state_c = create_initial_state(values, values.shape[-1])
        fake_state_e = create_initial_state(values, K.shape(values)[1])
        if self.rnn_cell:
            cell_initial_state = self.rnn_cell.get_initial_state(
                get_fake_cell_input(fake_state_c))
            initial_states_e = [
                fake_state_e, fake_state_c, *cell_initial_state
            ]
        else:
            initial_states_e = [fake_state_e]

        last_out, e_outputs, _ = K.rnn(energy_step, decoder_out_seq,
                                       initial_states_e)

        c_outputs = math_ops.matmul(e_outputs, values)

        return [c_outputs, e_outputs]
コード例 #26
0
    def call(self, inputs, verbose=False, mask=None):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        # 注意,encoder_out_seq是一个数组,长度是seq;decoder_out_seq是一个输出。
        encoder_out_seq, decoder_out_seq = inputs

        encoder_out_seq = _p_shape(encoder_out_seq,
                                   "注意力调用:入参编码器输出序列:encoder_out_seq")
        decoder_out_seq = _p_shape(decoder_out_seq,
                                   "注意力调用:入参解码器输出序列:decoder_out_seq")

        # 实现了能量函数,e_tj=V * tanh ( W * h_j + U * S_t-1 + b )
        # inputs,我理解就是所有的h_j,错!我之前理解错了,这个参数是指某个时刻t,对应的输入!不是所有,是某个时刻的输入。
        #        按为什么还有个s,input+s,是因为batch。
        # states,我理解就是S_t-1
        # decode_outs是不包含seq的,不是一个decode_out_seq,而是decode_out,为何加s呢,是因为batch
        # 但是每一步都是encoder_out_seq全都参与运算的,
        # decoder_out一个和encoder_out_seq一串,对
        def energy_step(decode_outs, states):  # decode_outs(batch,dim)
            decode_outs = _p(decode_outs,
                             "energy_step:decode_outs 算能量函数了.........."
                             )  #decode_outs:[1,20]
            # decoder_seq [N,30,512] 30是字符串长度
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]  # 30, 512
            de_hidden = decode_outs.shape[-1]
            #  W * h_j
            reshaped_enc_outputs = K.reshape(
                encoder_out_seq, (-1, en_hidden))  #[b,64,512]=> [b*64,512]
            _p(reshaped_enc_outputs, "reshaped_enc_outputs")

            # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512]
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512]    => [b,1,512]
            U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a),
                                      axis=1)  # <= batch_size, 1, latent_dim

            # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程

            # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512]
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64]
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            # softmax(e_tj)
            e_i = K.softmax(e_i)
            e_i = _p(e_i, "energy_step:e_i")
            return e_i, [e_i]

        # 这个step函数有意思,特别要关注他的入参:
        # encoder_out_seq: 编码器的各个time sequence的输出h_i,[batch,ts,dim]
        # states:
        # inputs:某个时刻,这个rnn的输入,这里,恰好是之前那个能量函数eij对应这个时刻的概率
        # ----------------------------
        # "step_do 这个函数,这个函数接受两个输入:step_in 和 states。
        #   其中 step_in 是一个 (batch_size, input_dim) 的张量,
        #   代表当前时刻的样本 xt,而 states 是一个 list,代表 yt−1 及一些中间变量。"
        # e 是30次中的一次,他是一个64维度的概率向量
        def context_step(e, states):  # e (batch,dim),其实每个输入就是一个e
            e = _p(e, "context_step:e")
            states = _p(states, "context_step:states")
            # encoder_out_seq[b,64,512] * e[64,1]
            # dot是矩阵相乘,*是对应位置元素相乘
            # [b,64,512] * e[64,1]shape不一样,居然也可以乘,我试了,没问题
            # 其实,就是实现了encoder ouput根据softmax概率分布,加权求和
            c_i = K.sum(encoder_out_seq * K.expand_dims(e, -1), axis=1)
            c_i = _p(c_i,
                     "context_step:c_i,算h的期望,也就是注意力了---------------------\n")
            return c_i, [c_i]

        #    (batch_size, enc_seq_len, latent_dim) (b,64,512)
        # => (batch_size, hidden_size)
        # 这个函数是,作为GRU的初始状态值,
        def create_inital_state(inputs, hidden_size):  # hidden_size=64
            # print("inputs",inputs)
            # print("hidden_size",hidden_size)
            # print("type(hidden_size)", type(hidden_size))
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(
                inputs)  # [b,64,512]<= (batch_size, enc_seq_len, latent_dim)
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            # print(fake_state)
            # print("------")
            # print(tf.shape(fake_state))
            # print("hidden_size:",hidden_size)

            fake_state = tile(
                fake_state,
                [1, hidden_size])  # <= (batch_size, latent_dim) (b,64)
            return fake_state

        # encoder_out_seq = (batch_size, enc_seq_len, latent_dim)
        # fake_state_c ==   (batch_size, latent_dim)
        # fake_state_e ==   (batch_size, enc_seq) , 最后这个维度不好理解,其实就是attention模型里面的decoder对应的每个步骤的attention这个序列,是一个值
        # K.rnn(计算函数,输入x,初始状态): K.rnn 这个函数,接受三个基本参数,其中第一个参数就是刚才写好的 step_do 函数,第二个参数则是输入的时间序列,第三个是初始态
        # 这个rnn就是解码器,输入 eji=a(s_i-1,hj),其中j要遍历一遍,这个k.rnn就是把每个hj对应的eij都计算一遍
        # 输出e_outputs,就是一个概率序列

        # eij(i不变,j是一个encoder的h下标),灌入到一个新的rnn中,让他计算出对应的输出,这个才是真正的Decoder!!!
        shape = encoder_out_seq.shape.as_list()
        # print("encoder_out_seq.shape:",shape)
        # shape[1]是seq=64,序列长度
        fake_state_e = create_inital_state(
            encoder_out_seq, shape[1]
        )  # encoder_out_seq.shape[1]) , fake_state_e (batch,enc_seq_len)
        fake_state_e = _p_shape(fake_state_e, "fake_state_e")

        # 输出是一个e的序列,是对一个时刻而言的
        ########### ########### ########### K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|
        # 这个步骤是做了30次(decoder,也就是字符串长度),每次得到一个64维度(encoder的time_sequence)的概率向量
        last_out, e_outputs, _ = K.rnn(
            step_function=energy_step,
            inputs=decoder_out_seq,
            initial_states=[fake_state_e
                            ],  # (bx64)decoder_out_seq是一个序列,不是一个单个值
        )
        # e_outputs [30,64]

        e_outputs = _p_shape(e_outputs, "能量函数e输出::::")
        # shape[-1]是encoder的隐含层
        fake_state_c = create_inital_state(encoder_out_seq,
                                           encoder_out_seq.shape[-1])  #
        fake_state_c = _p_shape(fake_state_c, "fake_state_c")
        # print("e_outputs:", e_outputs)

        ########### ########### ########### K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|
        last_out, c_outputs, _ = K.rnn(  # context_step算注意力的期望,sum(eij*encoder_out), 输出的(batch,encoder_seq,)
            step_function=context_step,
            inputs=e_outputs,
            initial_states=[fake_state_c],
        )
        #c_outputs [b,64,512]
        c_outputs = _p_shape(c_outputs, "注意力c输出::::")

        # 输出:
        # 注意力c_outputs的向量(batch,图像seq,512),
        # 注意力e_outputs的向量(batch,图像seq,图像宽度/4),
        return c_outputs, e_outputs
コード例 #27
0
    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>', W_a_dot_s.shape)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(
                inputs)  # <= (batch_size, enc_seq_len, latent_dim
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = K.tile(fake_state,
                                [1, hidden_size])  # <= (batch_size, latent_dim
            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq,
                                           encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(
            encoder_out_seq, encoder_out_seq.shape[1]
        )  # <= (batch_size, enc_seq_len, latent_dim
        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs
コード例 #28
0
    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq,
                             axis=2)  # <= (batch_size, enc_seq_len, latent_dim
        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs