def forward(self, x): """Compute the stft transform. Args: x (Variable): shape(B, T), dtype flaot32, the input waveform. Returns: (real, imag) real (Variable): shape(B, C, 1, T), dtype flaot32, the real part of the spectrogram. (C = 1 + n_fft // 2) imag (Variable): shape(B, C, 1, T), dtype flaot32, the image part of the spectrogram. (C = 1 + n_fft // 2) """ # x(batch_size, time_steps) # pad it first with reflect mode pad_start = F.reverse(x[:, 1:1 + self.n_fft // 2], axis=1) pad_stop = F.reverse(x[:, -(1 + self.n_fft // 2):-1], axis=1) x = F.concat([pad_start, x, pad_stop], axis=-1) # to BC1T, C=1 x = F.unsqueeze(x, axes=[1, 2]) out = conv2d(x, self.weight, stride=(1, self.hop_length)) real, imag = F.split(out, 2, dim=1) # BC1T return real, imag
def basic_lstm(input, init_hidden, init_cell, hidden_size, num_layers=1, sequence_length=None, dropout_prob=0.0, bidirectional=False, batch_first=True, param_attr=None, bias_attr=None, gate_activation=None, activation=None, forget_bias=1.0, dtype='float32', name='basic_lstm'): """ LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM. .. math:: i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} h_t &= o_t \odot tanh(c_t) Args: input (Variable): lstm input tensor, if batch_first = False, shape should be ( seq_len x batch_size x input_size ) if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) init_hidden(Variable|None): The initial hidden state of the LSTM This is a tensor with shape ( num_layers x batch_size x hidden_size) if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. If it's None, it will be set to all 0. init_cell(Variable|None): The initial hidden state of the LSTM This is a tensor with shape ( num_layers x batch_size x hidden_size) if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. If it's None, it will be set to all 0. hidden_size (int): Hidden size of the LSTM num_layers (int): The total number of layers of the LSTM sequence_length (Variabe|None): A tensor (shape [batch_size]) stores each real length of each instance, This tensor will be convert to a mask to mask the padding ids If it's None means NO padding ids dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of each layers, NOT between time steps bidirectional (bool|False): If it is bidirectional batch_first (bool|True): The shape format of the input and output tensors. If true, the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default this function accepts input and emits output in batch-major form to be consistent with most of data format, though a bit less efficient because of extra transposes. param_attr(ParamAttr|None): The parameter attribute for the learnable weight matrix. Note: If it is set to None or one attribute of ParamAttr, lstm_unit will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|None): The parameter attribute for the bias of LSTM unit. If it is set to None or one attribute of ParamAttr, lstm_unit will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. gate_activation (function|None): The activation function for gates (actGate). Default: 'fluid.layers.sigmoid' activation (function|None): The activation function for cell (actNode). Default: 'fluid.layers.tanh' forget_bias (float|1.0) : Forget bias used to compute the forget gate dtype(string): Data type used in this unit name(string): Name used to identify parameters and biases Returns: rnn_out(Tensor), last_hidden(Tensor), last_cell(Tensor) - rnn_out is the result of LSTM hidden, shape is (seq_len x batch_size x hidden_size) \ if is_bidirec set to True, it's shape will be ( seq_len x batch_sze x hidden_size*2) - last_hidden is the hidden state of the last step of LSTM \ with shape ( num_layers x batch_size x hidden_size ) \ if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. - last_cell is the hidden state of the last step of LSTM \ with shape ( num_layers x batch_size x hidden_size ) \ if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. Examples: .. code-block:: python import paddle.fluid.layers as layers from paddle.fluid.contrib.layers import basic_lstm batch_size = 20 input_size = 128 hidden_size = 256 num_layers = 2 dropout = 0.5 bidirectional = True batch_first = False input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32') sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \ hidden_size, num_layers = num_layers, \ sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ batch_first = batch_first) """ fw_unit_list = [] for i in range(num_layers): new_name = name + "_layers_" + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += "_fw_w_" + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += "_fw_b_" + str(i) else: layer_bias_attr = bias_attr fw_unit_list.append( BasicLSTMUnit(new_name, hidden_size, param_attr=layer_param_attr, bias_attr=layer_bias_attr, gate_activation=gate_activation, activation=activation, forget_bias=forget_bias, dtype=dtype)) if bidirectional: bw_unit_list = [] for i in range(num_layers): new_name = name + "_reverse_layers_" + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += "_bw_w_" + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += "_bw_b_" + str(i) else: layer_bias_attr = param_attr bw_unit_list.append( BasicLSTMUnit(new_name, hidden_size, param_attr=layer_param_attr, bias_attr=layer_bias_attr, gate_activation=gate_activation, activation=activation, forget_bias=forget_bias, dtype=dtype)) if batch_first: input = layers.transpose(input, [1, 0, 2]) mask = None if sequence_length: max_seq_len = layers.shape(input)[0] mask = layers.sequence_mask(sequence_length, maxlen=max_seq_len, dtype='float32') mask = layers.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: direc_num = 2 # convert to [num_layers, 2, batch_size, hidden_size] if init_hidden: init_hidden = layers.reshape( init_hidden, shape=[num_layers, direc_num, -1, hidden_size]) init_cell = layers.reshape( init_cell, shape=[num_layers, direc_num, -1, hidden_size]) # forward direction def get_single_direction_output(rnn_input, unit_list, mask=None, direc_index=0): rnn = StaticRNN() with rnn.step(): step_input = rnn.step_input(rnn_input) if mask: step_mask = rnn.step_input(mask) for i in range(num_layers): if init_hidden: pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) pre_cell = rnn.memory(init=init_cell[i, direc_index]) else: pre_hidden = rnn.memory(batch_ref=rnn_input, shape=[-1, hidden_size]) pre_cell = rnn.memory(batch_ref=rnn_input, shape=[-1, hidden_size]) new_hidden, new_cell = unit_list[i](step_input, pre_hidden, pre_cell) if mask: new_hidden = layers.elementwise_mul( new_hidden, step_mask, axis=0) - layers.elementwise_mul(pre_hidden, (step_mask - 1), axis=0) new_cell = layers.elementwise_mul( new_cell, step_mask, axis=0) - layers.elementwise_mul( pre_cell, (step_mask - 1), axis=0) rnn.update_memory(pre_hidden, new_hidden) rnn.update_memory(pre_cell, new_cell) rnn.step_output(new_hidden) rnn.step_output(new_cell) step_input = new_hidden if dropout_prob != None and dropout_prob > 0.0: step_input = layers.dropout( step_input, dropout_prob=dropout_prob, dropout_implementation='upscale_in_train') rnn.step_output(step_input) rnn_out = rnn() last_hidden_array = [] last_cell_array = [] rnn_output = rnn_out[-1] for i in range(num_layers): last_hidden = rnn_out[i * 2] last_hidden = last_hidden[-1] last_hidden_array.append(last_hidden) last_cell = rnn_out[i * 2 + 1] last_cell = last_cell[-1] last_cell_array.append(last_cell) last_hidden_output = layers.concat(last_hidden_array, axis=0) last_hidden_output = layers.reshape( last_hidden_output, shape=[num_layers, -1, hidden_size]) last_cell_output = layers.concat(last_cell_array, axis=0) last_cell_output = layers.reshape(last_cell_output, shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output, last_cell_output # seq_len, batch_size, hidden_size fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output( input, fw_unit_list, mask, direc_index=0) if bidirectional: bw_input = layers.reverse(input, axis=[0]) bw_mask = None if mask: bw_mask = layers.reverse(mask, axis=[0]) bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output( bw_input, bw_unit_list, bw_mask, direc_index=1) bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) last_hidden = layers.reshape( last_hidden, shape=[num_layers * direc_num, -1, hidden_size]) last_cell = layers.concat([fw_last_cell, bw_last_cell], axis=1) last_cell = layers.reshape( last_cell, shape=[num_layers * direc_num, -1, hidden_size]) if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell else: rnn_out = fw_rnn_out last_hidden = fw_last_hidden last_cell = fw_last_cell if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell
def basic_gru(input, init_hidden, hidden_size, num_layers=1, sequence_length=None, dropout_prob=0.0, bidirectional=False, batch_first=True, param_attr=None, bias_attr=None, gate_activation=None, activation=None, dtype='float32', name='basic_gru'): """ GRU implementation using basic operator, supports multiple layers and bidirectional gru. .. math:: u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r) m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m) h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) Args: input (Variable): GRU input tensor, if batch_first = False, shape should be ( seq_len x batch_size x input_size ) if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) init_hidden(Variable|None): The initial hidden state of the GRU This is a tensor with shape ( num_layers x batch_size x hidden_size) if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use. If it's None, it will be set to all 0. hidden_size (int): Hidden size of the GRU num_layers (int): The total number of layers of the GRU sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance, This tensor will be convert to a mask to mask the padding ids If it's None means NO padding ids dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers, NOT between time steps bidirectional (bool|False): If it is bidirectional batch_first (bool|True): The shape format of the input and output tensors. If true, the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default this function accepts input and emits output in batch-major form to be consistent with most of data format, though a bit less efficient because of extra transposes. param_attr(ParamAttr|None): The parameter attribute for the learnable weight matrix. Note: If it is set to None or one attribute of ParamAttr, gru_unit will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|None): The parameter attribute for the bias of GRU unit. If it is set to None or one attribute of ParamAttr, gru_unit will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. gate_activation (function|None): The activation function for gates (actGate). Default: 'fluid.layers.sigmoid' activation (function|None): The activation function for cell (actNode). Default: 'fluid.layers.tanh' dtype(string): data type used in this unit name(string): name used to identify parameters and biases Returns: rnn_out(Tensor),last_hidden(Tensor) - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \ if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2) - last_hidden is the hidden state of the last step of GRU \ shape is ( num_layers x batch_size x hidden_size ) \ if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size), can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size) Examples: .. code-block:: python import paddle.fluid.layers as layers from paddle.fluid.contrib.layers import basic_gru batch_size = 20 input_size = 128 hidden_size = 256 num_layers = 2 dropout = 0.5 bidirectional = True batch_first = False input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') rnn_out, last_hidden = basic_gru( input, pre_hidden, hidden_size, num_layers = num_layers, \ sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ batch_first = batch_first) """ fw_unit_list = [] for i in range(num_layers): new_name = name + "_layers_" + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += "_fw_w_" + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += "_fw_b_" + str(i) else: layer_bias_attr = bias_attr fw_unit_list.append( BasicGRUUnit(new_name, hidden_size, layer_param_attr, layer_bias_attr, gate_activation, activation, dtype)) if bidirectional: bw_unit_list = [] for i in range(num_layers): new_name = name + "_reverse_layers_" + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += "_bw_w_" + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += "_bw_b_" + str(i) else: layer_bias_attr = bias_attr bw_unit_list.append( BasicGRUUnit(new_name, hidden_size, layer_param_attr, layer_bias_attr, gate_activation, activation, dtype)) if batch_first: input = layers.transpose(input, [1, 0, 2]) mask = None if sequence_length: max_seq_len = layers.shape(input)[0] mask = layers.sequence_mask(sequence_length, maxlen=max_seq_len, dtype='float32') mask = layers.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: direc_num = 2 if init_hidden: init_hidden = layers.reshape( init_hidden, shape=[num_layers, direc_num, -1, hidden_size]) def get_single_direction_output(rnn_input, unit_list, mask=None, direc_index=0): rnn = StaticRNN() with rnn.step(): step_input = rnn.step_input(rnn_input) if mask: step_mask = rnn.step_input(mask) for i in range(num_layers): if init_hidden: pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) else: pre_hidden = rnn.memory(batch_ref=rnn_input, shape=[-1, hidden_size], ref_batch_dim_idx=1) new_hidden = unit_list[i](step_input, pre_hidden) if mask: new_hidden = layers.elementwise_mul( new_hidden, step_mask, axis=0) - layers.elementwise_mul(pre_hidden, (step_mask - 1), axis=0) rnn.update_memory(pre_hidden, new_hidden) rnn.step_output(new_hidden) step_input = new_hidden if dropout_prob != None and dropout_prob > 0.0: step_input = layers.dropout( step_input, dropout_prob=dropout_prob, ) rnn.step_output(step_input) rnn_out = rnn() last_hidden_array = [] rnn_output = rnn_out[-1] for i in range(num_layers): last_hidden = rnn_out[i] last_hidden = last_hidden[-1] last_hidden_array.append(last_hidden) last_hidden_output = layers.concat(last_hidden_array, axis=0) last_hidden_output = layers.reshape( last_hidden_output, shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output # seq_len, batch_size, hidden_size fw_rnn_out, fw_last_hidden = get_single_direction_output(input, fw_unit_list, mask, direc_index=0) if bidirectional: bw_input = layers.reverse(input, axis=[0]) bw_mask = None if mask: bw_mask = layers.reverse(mask, axis=[0]) bw_rnn_out, bw_last_hidden = get_single_direction_output(bw_input, bw_unit_list, bw_mask, direc_index=1) bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) last_hidden = layers.reshape( last_hidden, shape=[num_layers * direc_num, -1, hidden_size]) if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden else: rnn_out = fw_rnn_out last_hidden = fw_last_hidden if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden
def forward(self, inputs, initial_states=None, sequence_length=None, **kwargs): if F.in_dygraph_mode(): class OutputArray(object): def __init__(self, x): self.array = [x] def append(self, x): self.array.append(x) def _maybe_copy(state, new_state, step_mask): # TODO: use where_op new_state = L.elementwise_mul(new_state, step_mask, axis=0) - \ L.elementwise_mul(state, (step_mask - 1), axis=0) return new_state #logging.info("inputs shape: {}".format(inputs.shape)) flat_inputs = U.flatten(inputs) #logging.info("flat inputs len: {}".format(len(flat_inputs))) #logging.info("flat inputs[0] shape: {}".format(flat_inputs[0].shape)) batch_size, time_steps = ( flat_inputs[0].shape[self.batch_index], flat_inputs[0].shape[self.time_step_index]) #logging.info("batch_size: {}".format(batch_size)) #logging.info("time_steps: {}".format(time_steps)) if initial_states is None: initial_states = self.cell.get_initial_states( batch_ref=inputs, batch_dim_idx=self.batch_index) if not self.time_major: # 如果第一维不是时间步 则第一维和第二维交换 # 第一维为时间步 inputs = U.map_structure( lambda x: L.transpose(x, [1, 0] + list( range(2, len(x.shape)))), inputs) if sequence_length is not None: mask = L.sequence_mask( sequence_length, maxlen=time_steps, dtype=U.flatten(initial_states)[0].dtype) # 同样 第一维为时间步 mask = L.transpose(mask, [1, 0]) if self.is_reverse: # 如果反向 # 则第一维反向 inputs = U.map_structure(lambda x: L.reverse(x, axis=[0]), inputs) mask = L.reverse(mask, axis=[0]) if sequence_length is not None else None states = initial_states outputs = [] # 遍历时间步 for i in range(time_steps): # 取该时间步的输入 step_inputs = U.map_structure(lambda x: x[i], inputs) # 输入当前输入和状态 # 得到输出和新状态 step_outputs, new_states = self.cell(step_inputs, states, **kwargs) if sequence_length is not None: # 如果有mask 则被mask的地方 用原state的数 # _maybe_copy: 未mask的部分用new_states, 被mask的部分用states new_states = U.map_structure( partial(_maybe_copy, step_mask=mask[i]), states, new_states) states = new_states #logging.info("step_output shape: {}".format(step_outputs.shape)) if i == 0: # 初始时,各输出 outputs = U.map_structure(lambda x: OutputArray(x), step_outputs) else: # 各输出加入对应list中 U.map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs) # 最后按时间步的维度堆叠 final_outputs = U.map_structure( lambda x: L.stack(x.array, axis=self.time_step_index), outputs) #logging.info("final_outputs shape: {}".format(final_outputs.shape)) if self.is_reverse: # 如果是反向 则最后结果也反向一下 final_outputs = U.map_structure( lambda x: L.reverse(x, axis=self.time_step_index), final_outputs) final_states = new_states else: final_outputs, final_states = L.rnn( self.cell, inputs, initial_states=initial_states, sequence_length=sequence_length, time_major=self.time_major, is_reverse=self.is_reverse, **kwargs) return final_outputs, final_states
def transform(self, img, do_flip): if do_flip: if isinstance(img, PTensor): return layers.reverse(img, 2) return np.fliplr(img).copy() return img
def conditional_gru(input, encode_hidden, init_hidden, encode_hidden_size, hidden_size, num_layers=1, sequence_length=None, dropout_prob=0.0, bidirectional=False, batch_first=True, param_attr=None, bias_attr=None, gate_activation=None, activation=None, dtype="float32", name="conditional_gru"): """ 定义一个新的GRU类型,多了参数Cu,Cr,C。GRU的新公式: .. math:: u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + C_u h_i + b_u) r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + C_r h_i + b_r) m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + C_u h_i + C h_i + b_m) h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) 其他定义与GRU相同 Args: input (Variable): GRU input tensor, if batch_first = False, shape should be ( seq_len x batch_size x input_size ) if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) encode_hidden: The hidden state from the encoder of the GRU. If bidirectional is True, the encode_hidden is assert to contain two parts, former half part is for forward direction, and later half part is for backward direction. encode_hidden_size: The size of encode_hidden. If bidirectional is True, the encode_hidden_size includes the former half part and the later half part, i.e., the actual size of encode_hidden is encode_hidden_size / 2 init_hidden(Variable|None): The initial hidden state of the GRU This is a tensor with shape ( num_layers x batch_size x hidden_size) if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use. If it's None, it will be set to all 0. hidden_size (int): Hidden size of the GRU num_layers (int): The total number of layers of the GRU sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance, This tensor will be convert to a mask to mask the padding ids If it's None means NO padding ids dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers, NOT between time steps bidirectional (bool|False): If it is bidirectional batch_first (bool|True): The shape format of the input and output tensors. If true, the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default this function accepts input and emits output in batch-major form to be consistent with most of data format, though a bit less efficient because of extra transposes. param_attr(ParamAttr|None): The parameter attribute for the learnable weight matrix. Note: If it is set to None or one attribute of ParamAttr, gru_unit will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|None): The parameter attribute for the bias of GRU unit. If it is set to None or one attribute of ParamAttr, gru_unit will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. gate_activation (function|None): The activation function for gates (actGate). Default: 'fluid.layers.sigmoid' activation (function|None): The activation function for cell (actNode). Default: 'fluid.layers.tanh' dtype(string): data type used in this unit name(string): name used to identify parameters and biases Returns: rnn_out(Tensor),last_hidden(Tensor) - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \ if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2) - last_hidden is the hidden state of the last step of GRU \ shape is ( num_layers x batch_size x hidden_size ) \ if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size), can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size) - all_hidden is all the hidden states of the input, including the last_hidden and medium hidden states. \ shape is (num_layers x seq_len x batch_size x hidden_size). if is_bidirec set to True, shape will be (2 x num_layers x seq_len x batch_size x hidden_size) """ if bidirectional: encode_hidden, bw_encode_hidden = layers.split(encode_hidden, num_or_sections=2, dim=-1) encode_hidden_size = int(encode_hidden_size / 2) fw_unit_list = [] for i in range(num_layers): new_name = name + '_layers_' + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += '_fw_w_' + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += '_fw_b_' + str(i) else: layer_bias_attr = bias_attr fw_unit_list.append( ConditionalGRUUnit(new_name, encode_hidden_size, hidden_size, layer_param_attr, layer_bias_attr, gate_activation, activation, dtype) ) if bidirectional: bw_unit_list = [] for i in range(num_layers): new_name = name + '_reverse_layers_' + str(i) if param_attr is not None and param_attr.name is not None: layer_param_attr = copy.deepcopy(param_attr) layer_param_attr.name += '_bw_w_' + str(i) else: layer_param_attr = param_attr if bias_attr is not None and bias_attr.name is not None: layer_bias_attr = copy.deepcopy(bias_attr) layer_bias_attr.name += '_bw_b_' + str(i) else: layer_bias_attr = bias_attr bw_unit_list.append( ConditionalGRUUnit(new_name, encode_hidden_size, hidden_size, layer_param_attr, layer_bias_attr, gate_activation, activation, dtype) ) if batch_first: input = layers.transpose(input, [1, 0, 2]) mask = None if sequence_length: max_seq_len = layers.shape(input)[0] mask = layers.sequence_mask( sequence_length, maxlen=max_seq_len, dtype='float32' ) mask = layers.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: direc_num = 2 if init_hidden: init_hidden = layers.reshape( init_hidden, shape=[num_layers, direc_num, -1, hidden_size] ) def get_single_direction_output(rnn_input, encode_hidden, unit_list, mask=None, direc_index=0): rnn = StaticRNN() #print(rnn_input.shape) with rnn.step(): step_input = rnn.step_input(rnn_input) if mask: step_mask = rnn.step_input(mask) for i in range(num_layers): if init_hidden: pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) else: pre_hidden = rnn.memory(batch_ref=rnn_input, shape=[-1, hidden_size], ref_batch_dim_idx=1) encode_h = encode_hidden[i] pre_encode_hidden = layers.concat([pre_hidden, encode_h], axis=1) new_hidden = unit_list[i](step_input, pre_encode_hidden) if mask: new_hidden = layers.elementwise_mul( new_hidden, step_mask, axis=0) - layers.elementwise_mul( pre_hidden, (step_mask - 1), axis=0) rnn.update_memory(pre_hidden, new_hidden) rnn.step_output(new_hidden) step_input = new_hidden if dropout_prob is not None and dropout_prob > 0.0: step_input = layers.dropout(step_input, dropout_prob=dropout_prob, ) rnn.step_output(step_input) rnn_out = rnn() last_hidden_array = [] all_hidden_array = [] # 增加这个来得到所有隐含状态 rnn_output = rnn_out[-1] for i in range(num_layers): last_hidden = rnn_out[i] all_hidden_array.append(last_hidden) last_hidden = last_hidden[-1] last_hidden_array.append(last_hidden) all_hidden_array = layers.concat(all_hidden_array, axis=0) all_hidden_array = layers.reshape(all_hidden_array, shape=[num_layers, input.shape[0], -1, hidden_size]) last_hidden_output = layers.concat(last_hidden_array, axis=0) last_hidden_output = layers.reshape(last_hidden_output, shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output, all_hidden_array fw_rnn_out, fw_last_hidden, fw_all_hidden = get_single_direction_output( input, encode_hidden, fw_unit_list, mask, direc_index=0) if bidirectional: bw_input = layers.reverse(input, axis=[0]) bw_mask = None if mask: bw_mask = layers.reverse(mask, axis=[0]) bw_rnn_out, bw_last_hidden, bw_all_hidden = get_single_direction_output( bw_input, bw_encode_hidden, bw_unit_list, bw_mask, direc_index=1) bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) all_hidden = layers.concat([fw_all_hidden, bw_all_hidden], axis=0) last_hidden = layers.reshape( last_hidden, shape=[num_layers * direc_num, -1, hidden_size]) if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, all_hidden else: rnn_out = fw_rnn_out last_hidden = fw_last_hidden all_hidden = fw_all_hidden if batch_first: rnn_out = layers.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, all_hidden
def __call__(self, image: PTensor): if isinstance(image, PTensor): return self.crop_to_output(layers.reverse(image, 2)) else: return self.crop_to_output(np.flipud(image))