예제 #1
0
    def forward(self, x):
        """Compute the stft transform.

        Args:
            x (Variable): shape(B, T), dtype flaot32, the input waveform.

        Returns:
            (real, imag)
            real (Variable): shape(B, C, 1, T), dtype flaot32, the real part of the spectrogram. (C = 1 + n_fft // 2)
            imag (Variable): shape(B, C, 1, T), dtype flaot32, the image part of the spectrogram. (C = 1 + n_fft // 2) 
        """
        # x(batch_size, time_steps)
        # pad it first with reflect mode
        pad_start = F.reverse(x[:, 1:1 + self.n_fft // 2], axis=1)
        pad_stop = F.reverse(x[:, -(1 + self.n_fft // 2):-1], axis=1)
        x = F.concat([pad_start, x, pad_stop], axis=-1)

        # to BC1T, C=1
        x = F.unsqueeze(x, axes=[1, 2])
        out = conv2d(x, self.weight, stride=(1, self.hop_length))
        real, imag = F.split(out, 2, dim=1)  # BC1T
        return real, imag
예제 #2
0
def basic_lstm(input,
               init_hidden,
               init_cell,
               hidden_size,
               num_layers=1,
               sequence_length=None,
               dropout_prob=0.0,
               bidirectional=False,
               batch_first=True,
               param_attr=None,
               bias_attr=None,
               gate_activation=None,
               activation=None,
               forget_bias=1.0,
               dtype='float32',
               name='basic_lstm'):
    """
    LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM.

    .. math::
           i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)

           f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )

           o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)

           \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)

           c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}

           h_t &= o_t \odot tanh(c_t)

    Args:
        input (Variable): lstm input tensor, 
                       if batch_first = False, shape should be ( seq_len x batch_size x input_size )  
                       if batch_first = True, shape should be ( batch_size x seq_len x hidden_size )
        init_hidden(Variable|None): The initial hidden state of the LSTM
                       This is a tensor with shape ( num_layers x batch_size x hidden_size)
                       if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
                       and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use.
                       If it's None, it will be set to all 0.
        init_cell(Variable|None): The initial hidden state of the LSTM
                       This is a tensor with shape ( num_layers x batch_size x hidden_size)
                       if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
                       and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use.
                       If it's None, it will be set to all 0.
        hidden_size (int): Hidden size of the LSTM
        num_layers (int): The total number of layers of the LSTM
        sequence_length (Variabe|None): A tensor (shape [batch_size]) stores each real length of each instance,
                        This tensor will be convert to a mask to mask the padding ids
                        If it's None means NO padding ids
        dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of each layers, 
                             NOT between time steps
        bidirectional (bool|False): If it is bidirectional
        batch_first (bool|True): The shape format of the input and output tensors. If true,
            the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false,
            the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default
            this function accepts input and emits output in batch-major form to be consistent
            with most of data format, though a bit less efficient because of extra transposes.
        param_attr(ParamAttr|None): The parameter attribute for the learnable
            weight matrix. Note:
            If it is set to None or one attribute of ParamAttr, lstm_unit will
            create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with Xavier. Default: None.
        bias_attr (ParamAttr|None): The parameter attribute for the bias
            of LSTM unit.
            If it is set to None or one attribute of ParamAttr, lstm_unit will 
            create ParamAttr as bias_attr. If the Initializer of the bias_attr
            is not set, the bias is initialized zero. Default: None.
        gate_activation (function|None): The activation function for gates (actGate).
                                  Default: 'fluid.layers.sigmoid'
        activation (function|None): The activation function for cell (actNode).
                             Default: 'fluid.layers.tanh'
        forget_bias (float|1.0) : Forget bias used to compute the forget gate
        dtype(string): Data type used in this unit
        name(string): Name used to identify parameters and biases

    Returns:
        rnn_out(Tensor), last_hidden(Tensor), last_cell(Tensor)
            - rnn_out is the result of LSTM hidden, shape is (seq_len x batch_size x hidden_size) \
              if is_bidirec set to True, it's shape will be ( seq_len x batch_sze x hidden_size*2)
            - last_hidden is the hidden state of the last step of LSTM \
              with shape ( num_layers x batch_size x hidden_size ) \
              if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size),
              and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size)  to use.
            - last_cell is the hidden state of the last step of LSTM \
              with shape ( num_layers x batch_size x hidden_size ) \
              if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size),
              and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size)  to use.

    Examples:
        .. code-block:: python
            
            import paddle.fluid.layers as layers
            from paddle.fluid.contrib.layers import basic_lstm

            batch_size = 20
            input_size = 128
            hidden_size = 256
            num_layers = 2
            dropout = 0.5
            bidirectional = True
            batch_first = False

            input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32')
            pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32')
            pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32')
            sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32')

            rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \
                    hidden_size, num_layers = num_layers, \
                    sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \
                    batch_first = batch_first)

    """
    fw_unit_list = []

    for i in range(num_layers):
        new_name = name + "_layers_" + str(i)
        if param_attr is not None and param_attr.name is not None:
            layer_param_attr = copy.deepcopy(param_attr)
            layer_param_attr.name += "_fw_w_" + str(i)
        else:
            layer_param_attr = param_attr
        if bias_attr is not None and bias_attr.name is not None:
            layer_bias_attr = copy.deepcopy(bias_attr)
            layer_bias_attr.name += "_fw_b_" + str(i)
        else:
            layer_bias_attr = bias_attr
        fw_unit_list.append(
            BasicLSTMUnit(new_name,
                          hidden_size,
                          param_attr=layer_param_attr,
                          bias_attr=layer_bias_attr,
                          gate_activation=gate_activation,
                          activation=activation,
                          forget_bias=forget_bias,
                          dtype=dtype))
    if bidirectional:
        bw_unit_list = []

        for i in range(num_layers):
            new_name = name + "_reverse_layers_" + str(i)
            if param_attr is not None and param_attr.name is not None:
                layer_param_attr = copy.deepcopy(param_attr)
                layer_param_attr.name += "_bw_w_" + str(i)
            else:
                layer_param_attr = param_attr
            if bias_attr is not None and bias_attr.name is not None:
                layer_bias_attr = copy.deepcopy(bias_attr)
                layer_bias_attr.name += "_bw_b_" + str(i)
            else:
                layer_bias_attr = param_attr
            bw_unit_list.append(
                BasicLSTMUnit(new_name,
                              hidden_size,
                              param_attr=layer_param_attr,
                              bias_attr=layer_bias_attr,
                              gate_activation=gate_activation,
                              activation=activation,
                              forget_bias=forget_bias,
                              dtype=dtype))

    if batch_first:
        input = layers.transpose(input, [1, 0, 2])

    mask = None
    if sequence_length:
        max_seq_len = layers.shape(input)[0]
        mask = layers.sequence_mask(sequence_length,
                                    maxlen=max_seq_len,
                                    dtype='float32')

        mask = layers.transpose(mask, [1, 0])

    direc_num = 1
    if bidirectional:
        direc_num = 2
        # convert to [num_layers, 2, batch_size, hidden_size]
    if init_hidden:
        init_hidden = layers.reshape(
            init_hidden, shape=[num_layers, direc_num, -1, hidden_size])
        init_cell = layers.reshape(
            init_cell, shape=[num_layers, direc_num, -1, hidden_size])

    # forward direction
    def get_single_direction_output(rnn_input,
                                    unit_list,
                                    mask=None,
                                    direc_index=0):
        rnn = StaticRNN()
        with rnn.step():
            step_input = rnn.step_input(rnn_input)

            if mask:
                step_mask = rnn.step_input(mask)

            for i in range(num_layers):
                if init_hidden:
                    pre_hidden = rnn.memory(init=init_hidden[i, direc_index])
                    pre_cell = rnn.memory(init=init_cell[i, direc_index])
                else:
                    pre_hidden = rnn.memory(batch_ref=rnn_input,
                                            shape=[-1, hidden_size])
                    pre_cell = rnn.memory(batch_ref=rnn_input,
                                          shape=[-1, hidden_size])

                new_hidden, new_cell = unit_list[i](step_input, pre_hidden,
                                                    pre_cell)

                if mask:
                    new_hidden = layers.elementwise_mul(
                        new_hidden, step_mask,
                        axis=0) - layers.elementwise_mul(pre_hidden,
                                                         (step_mask - 1),
                                                         axis=0)
                    new_cell = layers.elementwise_mul(
                        new_cell, step_mask, axis=0) - layers.elementwise_mul(
                            pre_cell, (step_mask - 1), axis=0)

                rnn.update_memory(pre_hidden, new_hidden)
                rnn.update_memory(pre_cell, new_cell)

                rnn.step_output(new_hidden)
                rnn.step_output(new_cell)

                step_input = new_hidden
                if dropout_prob != None and dropout_prob > 0.0:
                    step_input = layers.dropout(
                        step_input,
                        dropout_prob=dropout_prob,
                        dropout_implementation='upscale_in_train')

            rnn.step_output(step_input)

        rnn_out = rnn()

        last_hidden_array = []
        last_cell_array = []
        rnn_output = rnn_out[-1]
        for i in range(num_layers):
            last_hidden = rnn_out[i * 2]
            last_hidden = last_hidden[-1]
            last_hidden_array.append(last_hidden)
            last_cell = rnn_out[i * 2 + 1]
            last_cell = last_cell[-1]
            last_cell_array.append(last_cell)

        last_hidden_output = layers.concat(last_hidden_array, axis=0)
        last_hidden_output = layers.reshape(
            last_hidden_output, shape=[num_layers, -1, hidden_size])
        last_cell_output = layers.concat(last_cell_array, axis=0)
        last_cell_output = layers.reshape(last_cell_output,
                                          shape=[num_layers, -1, hidden_size])

        return rnn_output, last_hidden_output, last_cell_output
        # seq_len, batch_size, hidden_size

    fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output(
        input, fw_unit_list, mask, direc_index=0)

    if bidirectional:
        bw_input = layers.reverse(input, axis=[0])
        bw_mask = None
        if mask:
            bw_mask = layers.reverse(mask, axis=[0])
        bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output(
            bw_input, bw_unit_list, bw_mask, direc_index=1)

        bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0])

        rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2)
        last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1)
        last_hidden = layers.reshape(
            last_hidden, shape=[num_layers * direc_num, -1, hidden_size])

        last_cell = layers.concat([fw_last_cell, bw_last_cell], axis=1)
        last_cell = layers.reshape(
            last_cell, shape=[num_layers * direc_num, -1, hidden_size])

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])
        return rnn_out, last_hidden, last_cell
    else:

        rnn_out = fw_rnn_out
        last_hidden = fw_last_hidden
        last_cell = fw_last_cell

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])

        return rnn_out, last_hidden, last_cell
예제 #3
0
def basic_gru(input,
              init_hidden,
              hidden_size,
              num_layers=1,
              sequence_length=None,
              dropout_prob=0.0,
              bidirectional=False,
              batch_first=True,
              param_attr=None,
              bias_attr=None,
              gate_activation=None,
              activation=None,
              dtype='float32',
              name='basic_gru'):
    """
    GRU implementation using basic operator, supports multiple layers and bidirectional gru.

    .. math::
            u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u)

            r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r)

            m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m)

            h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t)

    Args:
        input (Variable): GRU input tensor, 
                       if batch_first = False, shape should be ( seq_len x batch_size x input_size )  
                       if batch_first = True, shape should be ( batch_size x seq_len x hidden_size )
        init_hidden(Variable|None): The initial hidden state of the GRU
                       This is a tensor with shape ( num_layers x batch_size x hidden_size)
                       if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
                       and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use.
                       If it's None, it will be set to all 0.
        hidden_size (int): Hidden size of the GRU
        num_layers (int): The total number of layers of the GRU
        sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance,
                        This tensor will be convert to a mask to mask the padding ids
                        If it's None means NO padding ids
        dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers, 
                             NOT between time steps
        bidirectional (bool|False): If it is bidirectional
        batch_first (bool|True): The shape format of the input and output tensors. If true,
            the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false,
            the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default
            this function accepts input and emits output in batch-major form to be consistent
            with most of data format, though a bit less efficient because of extra transposes.
        param_attr(ParamAttr|None): The parameter attribute for the learnable
            weight matrix. Note:
            If it is set to None or one attribute of ParamAttr, gru_unit will
            create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with Xavier. Default: None.
        bias_attr (ParamAttr|None): The parameter attribute for the bias
            of GRU unit.
            If it is set to None or one attribute of ParamAttr, gru_unit will 
            create ParamAttr as bias_attr. If the Initializer of the bias_attr
            is not set, the bias is initialized zero. Default: None.
        gate_activation (function|None): The activation function for gates (actGate).
                                  Default: 'fluid.layers.sigmoid'
        activation (function|None): The activation function for cell (actNode).
                             Default: 'fluid.layers.tanh'
        dtype(string): data type used in this unit
        name(string): name used to identify parameters and biases

    Returns:
        rnn_out(Tensor),last_hidden(Tensor)
            - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \
              if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2)
            - last_hidden is the hidden state of the last step of GRU \
              shape is ( num_layers x batch_size x hidden_size ) \
              if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size),
              can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size)

    Examples:
        .. code-block:: python
            
            import paddle.fluid.layers as layers
            from paddle.fluid.contrib.layers import basic_gru

            batch_size = 20
            input_size = 128
            hidden_size = 256
            num_layers = 2
            dropout = 0.5
            bidirectional = True
            batch_first = False

            input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32')
            pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32')
            sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32')


            rnn_out, last_hidden = basic_gru( input, pre_hidden, hidden_size, num_layers = num_layers, \
                    sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \
                    batch_first = batch_first)

    """

    fw_unit_list = []

    for i in range(num_layers):
        new_name = name + "_layers_" + str(i)
        if param_attr is not None and param_attr.name is not None:
            layer_param_attr = copy.deepcopy(param_attr)
            layer_param_attr.name += "_fw_w_" + str(i)
        else:
            layer_param_attr = param_attr
        if bias_attr is not None and bias_attr.name is not None:
            layer_bias_attr = copy.deepcopy(bias_attr)
            layer_bias_attr.name += "_fw_b_" + str(i)
        else:
            layer_bias_attr = bias_attr
        fw_unit_list.append(
            BasicGRUUnit(new_name, hidden_size, layer_param_attr,
                         layer_bias_attr, gate_activation, activation, dtype))
    if bidirectional:
        bw_unit_list = []

        for i in range(num_layers):
            new_name = name + "_reverse_layers_" + str(i)
            if param_attr is not None and param_attr.name is not None:
                layer_param_attr = copy.deepcopy(param_attr)
                layer_param_attr.name += "_bw_w_" + str(i)
            else:
                layer_param_attr = param_attr
            if bias_attr is not None and bias_attr.name is not None:
                layer_bias_attr = copy.deepcopy(bias_attr)
                layer_bias_attr.name += "_bw_b_" + str(i)
            else:
                layer_bias_attr = bias_attr

            bw_unit_list.append(
                BasicGRUUnit(new_name, hidden_size, layer_param_attr,
                             layer_bias_attr, gate_activation, activation,
                             dtype))

    if batch_first:
        input = layers.transpose(input, [1, 0, 2])

    mask = None
    if sequence_length:
        max_seq_len = layers.shape(input)[0]
        mask = layers.sequence_mask(sequence_length,
                                    maxlen=max_seq_len,
                                    dtype='float32')
        mask = layers.transpose(mask, [1, 0])

    direc_num = 1
    if bidirectional:
        direc_num = 2
    if init_hidden:
        init_hidden = layers.reshape(
            init_hidden, shape=[num_layers, direc_num, -1, hidden_size])

    def get_single_direction_output(rnn_input,
                                    unit_list,
                                    mask=None,
                                    direc_index=0):
        rnn = StaticRNN()
        with rnn.step():
            step_input = rnn.step_input(rnn_input)

            if mask:
                step_mask = rnn.step_input(mask)

            for i in range(num_layers):
                if init_hidden:
                    pre_hidden = rnn.memory(init=init_hidden[i, direc_index])
                else:
                    pre_hidden = rnn.memory(batch_ref=rnn_input,
                                            shape=[-1, hidden_size],
                                            ref_batch_dim_idx=1)

                new_hidden = unit_list[i](step_input, pre_hidden)

                if mask:
                    new_hidden = layers.elementwise_mul(
                        new_hidden, step_mask,
                        axis=0) - layers.elementwise_mul(pre_hidden,
                                                         (step_mask - 1),
                                                         axis=0)
                rnn.update_memory(pre_hidden, new_hidden)

                rnn.step_output(new_hidden)

                step_input = new_hidden
                if dropout_prob != None and dropout_prob > 0.0:
                    step_input = layers.dropout(
                        step_input,
                        dropout_prob=dropout_prob,
                    )

            rnn.step_output(step_input)

        rnn_out = rnn()

        last_hidden_array = []
        rnn_output = rnn_out[-1]
        for i in range(num_layers):
            last_hidden = rnn_out[i]
            last_hidden = last_hidden[-1]
            last_hidden_array.append(last_hidden)

        last_hidden_output = layers.concat(last_hidden_array, axis=0)
        last_hidden_output = layers.reshape(
            last_hidden_output, shape=[num_layers, -1, hidden_size])

        return rnn_output, last_hidden_output
        # seq_len, batch_size, hidden_size

    fw_rnn_out, fw_last_hidden = get_single_direction_output(input,
                                                             fw_unit_list,
                                                             mask,
                                                             direc_index=0)

    if bidirectional:
        bw_input = layers.reverse(input, axis=[0])
        bw_mask = None
        if mask:
            bw_mask = layers.reverse(mask, axis=[0])
        bw_rnn_out, bw_last_hidden = get_single_direction_output(bw_input,
                                                                 bw_unit_list,
                                                                 bw_mask,
                                                                 direc_index=1)

        bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0])

        rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2)
        last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1)

        last_hidden = layers.reshape(
            last_hidden, shape=[num_layers * direc_num, -1, hidden_size])

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])
        return rnn_out, last_hidden
    else:

        rnn_out = fw_rnn_out
        last_hidden = fw_last_hidden

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])

        return rnn_out, last_hidden
예제 #4
0
    def forward(self,
                inputs,
                initial_states=None,
                sequence_length=None,
                **kwargs):
        if F.in_dygraph_mode():

            class OutputArray(object):
                def __init__(self, x):
                    self.array = [x]

                def append(self, x):
                    self.array.append(x)

            def _maybe_copy(state, new_state, step_mask):
                # TODO: use where_op
                new_state = L.elementwise_mul(new_state, step_mask, axis=0) - \
                        L.elementwise_mul(state, (step_mask - 1), axis=0)
                return new_state

            #logging.info("inputs shape: {}".format(inputs.shape))
            flat_inputs = U.flatten(inputs)
            #logging.info("flat inputs len: {}".format(len(flat_inputs)))
            #logging.info("flat inputs[0] shape: {}".format(flat_inputs[0].shape))

            batch_size, time_steps = (
                flat_inputs[0].shape[self.batch_index],
                flat_inputs[0].shape[self.time_step_index])
            #logging.info("batch_size: {}".format(batch_size))
            #logging.info("time_steps: {}".format(time_steps))

            if initial_states is None:
                initial_states = self.cell.get_initial_states(
                    batch_ref=inputs, batch_dim_idx=self.batch_index)

            if not self.time_major:
                # 如果第一维不是时间步 则第一维和第二维交换
                # 第一维为时间步
                inputs = U.map_structure(
                    lambda x: L.transpose(x, [1, 0] + list(
                        range(2, len(x.shape)))), inputs)

            if sequence_length is not None:
                mask = L.sequence_mask(
                    sequence_length,
                    maxlen=time_steps,
                    dtype=U.flatten(initial_states)[0].dtype)
                # 同样 第一维为时间步
                mask = L.transpose(mask, [1, 0])

            if self.is_reverse:
                # 如果反向
                # 则第一维反向
                inputs = U.map_structure(lambda x: L.reverse(x, axis=[0]), inputs)
                mask = L.reverse(mask, axis=[0]) if sequence_length is not None else None

            states = initial_states
            outputs = []
            # 遍历时间步
            for i in range(time_steps):
                # 取该时间步的输入
                step_inputs = U.map_structure(lambda x: x[i], inputs)
                # 输入当前输入和状态
                # 得到输出和新状态
                step_outputs, new_states = self.cell(step_inputs, states, **kwargs)
                if sequence_length is not None:
                    # 如果有mask 则被mask的地方 用原state的数
                    # _maybe_copy: 未mask的部分用new_states, 被mask的部分用states
                    new_states = U.map_structure(
                        partial(_maybe_copy, step_mask=mask[i]),
                        states,
                        new_states)
                states = new_states
                #logging.info("step_output shape: {}".format(step_outputs.shape))

                if i == 0:
                    # 初始时,各输出
                    outputs = U.map_structure(lambda x: OutputArray(x), step_outputs)
                else:
                    # 各输出加入对应list中
                    U.map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs)

            # 最后按时间步的维度堆叠
            final_outputs = U.map_structure(
                lambda x: L.stack(x.array, axis=self.time_step_index),
                outputs)
            #logging.info("final_outputs shape: {}".format(final_outputs.shape))

            if self.is_reverse:
                # 如果是反向 则最后结果也反向一下
                final_outputs = U.map_structure(
                    lambda x: L.reverse(x, axis=self.time_step_index),
                    final_outputs)

            final_states = new_states

        else:
            final_outputs, final_states = L.rnn(
                self.cell,
                inputs,
                initial_states=initial_states,
                sequence_length=sequence_length,
                time_major=self.time_major,
                is_reverse=self.is_reverse,
                **kwargs)

        return final_outputs, final_states
예제 #5
0
 def transform(self, img, do_flip):
     if do_flip:
         if isinstance(img, PTensor):
             return layers.reverse(img, 2)
         return np.fliplr(img).copy()
     return img
예제 #6
0
def conditional_gru(input,
                    encode_hidden,
                    init_hidden,
                    encode_hidden_size,
                    hidden_size,
                    num_layers=1,
                    sequence_length=None,
                    dropout_prob=0.0,
                    bidirectional=False,
                    batch_first=True,
                    param_attr=None,
                    bias_attr=None,
                    gate_activation=None,
                    activation=None,
                    dtype="float32",
                    name="conditional_gru"):
    """
        定义一个新的GRU类型,多了参数Cu,Cr,C。GRU的新公式:
        .. math::
            u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + C_u h_i + b_u)

            r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + C_r h_i + b_r)

            m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + C_u h_i + C h_i + b_m)

            h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t)
        其他定义与GRU相同
    Args:
       input (Variable): GRU input tensor,
                      if batch_first = False, shape should be ( seq_len x batch_size x input_size )
                      if batch_first = True, shape should be ( batch_size x seq_len x hidden_size )
       encode_hidden: The hidden state from the encoder of the GRU. If bidirectional is True, the encode_hidden is assert
                      to contain two parts, former half part is for forward direction, and later half part is for backward
                      direction.
       encode_hidden_size: The size of encode_hidden. If bidirectional is True, the encode_hidden_size includes the
                      former half part and the later half part, i.e., the actual size of encode_hidden is
                      encode_hidden_size / 2
       init_hidden(Variable|None): The initial hidden state of the GRU
                      This is a tensor with shape ( num_layers x batch_size x hidden_size)
                      if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
                      and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use.
                      If it's None, it will be set to all 0.
       hidden_size (int): Hidden size of the GRU
       num_layers (int): The total number of layers of the GRU
       sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance,
                       This tensor will be convert to a mask to mask the padding ids
                       If it's None means NO padding ids
       dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers,
                            NOT between time steps
       bidirectional (bool|False): If it is bidirectional
       batch_first (bool|True): The shape format of the input and output tensors. If true,
           the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false,
           the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default
           this function accepts input and emits output in batch-major form to be consistent
           with most of data format, though a bit less efficient because of extra transposes.
       param_attr(ParamAttr|None): The parameter attribute for the learnable
           weight matrix. Note:
           If it is set to None or one attribute of ParamAttr, gru_unit will
           create ParamAttr as param_attr. If the Initializer of the param_attr
           is not set, the parameter is initialized with Xavier. Default: None.
       bias_attr (ParamAttr|None): The parameter attribute for the bias
           of GRU unit.
           If it is set to None or one attribute of ParamAttr, gru_unit will
           create ParamAttr as bias_attr. If the Initializer of the bias_attr
           is not set, the bias is initialized zero. Default: None.
       gate_activation (function|None): The activation function for gates (actGate).
                                 Default: 'fluid.layers.sigmoid'
       activation (function|None): The activation function for cell (actNode).
                            Default: 'fluid.layers.tanh'
       dtype(string): data type used in this unit
       name(string): name used to identify parameters and biases

       Returns:
        rnn_out(Tensor),last_hidden(Tensor)
            - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \
              if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2)
            - last_hidden is the hidden state of the last step of GRU \
              shape is ( num_layers x batch_size x hidden_size ) \
              if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size),
              can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size)
            - all_hidden is all the hidden states of the input, including the last_hidden and medium hidden states. \
              shape is (num_layers x seq_len x batch_size x hidden_size). if is_bidirec set to True, shape will be
              (2 x num_layers x seq_len x batch_size x hidden_size)
    """
    if bidirectional:
        encode_hidden, bw_encode_hidden = layers.split(encode_hidden, num_or_sections=2, dim=-1)
        encode_hidden_size = int(encode_hidden_size / 2)

    fw_unit_list = []

    for i in range(num_layers):
        new_name = name + '_layers_' + str(i)
        if param_attr is not None and param_attr.name is not None:
            layer_param_attr = copy.deepcopy(param_attr)
            layer_param_attr.name += '_fw_w_' + str(i)
        else:
            layer_param_attr = param_attr
        if bias_attr is not None and bias_attr.name is not None:
            layer_bias_attr = copy.deepcopy(bias_attr)
            layer_bias_attr.name += '_fw_b_' + str(i)
        else:
            layer_bias_attr = bias_attr

        fw_unit_list.append(
            ConditionalGRUUnit(new_name, encode_hidden_size, hidden_size, layer_param_attr,
                               layer_bias_attr, gate_activation, activation, dtype)
        )

    if bidirectional:
        bw_unit_list = []

        for i in range(num_layers):
            new_name = name + '_reverse_layers_' + str(i)
            if param_attr is not None and param_attr.name is not None:
                layer_param_attr = copy.deepcopy(param_attr)
                layer_param_attr.name += '_bw_w_' + str(i)
            else:
                layer_param_attr = param_attr
            if bias_attr is not None and bias_attr.name is not None:
                layer_bias_attr = copy.deepcopy(bias_attr)
                layer_bias_attr.name += '_bw_b_' + str(i)
            else:
                layer_bias_attr = bias_attr
            bw_unit_list.append(
                ConditionalGRUUnit(new_name, encode_hidden_size, hidden_size, layer_param_attr,
                                   layer_bias_attr, gate_activation, activation, dtype)
            )

    if batch_first:
        input = layers.transpose(input, [1, 0, 2])

    mask = None
    if sequence_length:
        max_seq_len = layers.shape(input)[0]
        mask = layers.sequence_mask(
            sequence_length, maxlen=max_seq_len, dtype='float32'
        )
        mask = layers.transpose(mask, [1, 0])

    direc_num = 1
    if bidirectional:
        direc_num = 2
    if init_hidden:
        init_hidden = layers.reshape(
            init_hidden, shape=[num_layers, direc_num, -1, hidden_size]
        )

    def get_single_direction_output(rnn_input,
                                    encode_hidden,
                                    unit_list,
                                    mask=None,
                                    direc_index=0):
        rnn = StaticRNN()
        #print(rnn_input.shape)
        with rnn.step():
            step_input = rnn.step_input(rnn_input)

            if mask:
                step_mask = rnn.step_input(mask)

            for i in range(num_layers):
                if init_hidden:
                    pre_hidden = rnn.memory(init=init_hidden[i, direc_index])
                else:
                    pre_hidden = rnn.memory(batch_ref=rnn_input,
                                            shape=[-1, hidden_size],
                                            ref_batch_dim_idx=1)
                encode_h = encode_hidden[i]
                pre_encode_hidden = layers.concat([pre_hidden, encode_h], axis=1)
                new_hidden = unit_list[i](step_input, pre_encode_hidden)

                if mask:
                    new_hidden = layers.elementwise_mul(
                        new_hidden, step_mask, axis=0) - layers.elementwise_mul(
                        pre_hidden, (step_mask - 1), axis=0)
                rnn.update_memory(pre_hidden, new_hidden)

                rnn.step_output(new_hidden)

                step_input = new_hidden
                if dropout_prob is not None and dropout_prob > 0.0:
                    step_input = layers.dropout(step_input, dropout_prob=dropout_prob, )

            rnn.step_output(step_input)

        rnn_out = rnn()

        last_hidden_array = []
        all_hidden_array = []  # 增加这个来得到所有隐含状态
        rnn_output = rnn_out[-1]

        for i in range(num_layers):
            last_hidden = rnn_out[i]
            all_hidden_array.append(last_hidden)
            last_hidden = last_hidden[-1]
            last_hidden_array.append(last_hidden)

        all_hidden_array = layers.concat(all_hidden_array, axis=0)
        all_hidden_array = layers.reshape(all_hidden_array, shape=[num_layers, input.shape[0], -1, hidden_size])
        last_hidden_output = layers.concat(last_hidden_array, axis=0)
        last_hidden_output = layers.reshape(last_hidden_output, shape=[num_layers, -1, hidden_size])

        return rnn_output, last_hidden_output, all_hidden_array

    fw_rnn_out, fw_last_hidden, fw_all_hidden = get_single_direction_output(
        input, encode_hidden, fw_unit_list, mask, direc_index=0)

    if bidirectional:
        bw_input = layers.reverse(input, axis=[0])
        bw_mask = None
        if mask:
            bw_mask = layers.reverse(mask, axis=[0])
        bw_rnn_out, bw_last_hidden, bw_all_hidden = get_single_direction_output(
            bw_input, bw_encode_hidden, bw_unit_list, bw_mask, direc_index=1)

        bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0])

        rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2)
        last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1)
        all_hidden = layers.concat([fw_all_hidden, bw_all_hidden], axis=0)

        last_hidden = layers.reshape(
            last_hidden, shape=[num_layers * direc_num, -1, hidden_size])

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])
        return rnn_out, last_hidden, all_hidden
    else:

        rnn_out = fw_rnn_out
        last_hidden = fw_last_hidden
        all_hidden = fw_all_hidden

        if batch_first:
            rnn_out = layers.transpose(rnn_out, [1, 0, 2])

        return rnn_out, last_hidden, all_hidden
예제 #7
0
 def __call__(self, image: PTensor):
     if isinstance(image, PTensor):
         return self.crop_to_output(layers.reverse(image, 2))
     else:
         return self.crop_to_output(np.flipud(image))