def train_rnn(item_fc, h_0, output_type): shifted_item_fc = fluid_sequence_advance(item_fc, OOV=0) drnn = fluid.layers.DynamicRNN() with drnn.block(): last_item_fc = drnn.step_input(shifted_item_fc) cur_h_0 = drnn.memory(init=h_0, need_reorder=True) # step_input will remove lod info last_item_fc = layers.lod_reset(last_item_fc, cur_h_0) next_h_0 = layers.dynamic_gru( last_item_fc, size=4, h_0=cur_h_0, param_attr=fluid.ParamAttr(name="item_gru.w_0"), bias_attr=fluid.ParamAttr(name="item_gru.b_0")) if output_type == 'c_Q': Q = layers.fc(next_h_0, 1, param_attr=fluid.ParamAttr(name="c_Q_fc.w_0"), bias_attr=fluid.ParamAttr(name="c_Q_fc.b_0")) drnn.output(Q) elif output_type == 'max_Q': Q = layers.fc(next_h_0, 1, param_attr=fluid.ParamAttr(name="max_Q_fc.w_0"), bias_attr=fluid.ParamAttr(name="max_Q_fc.b_0")) drnn.output(Q) # update drnn.update_memory(cur_h_0, next_h_0) drnn_output = drnn() return drnn_output
def __call__(self, input, h_0=None): return layers.dynamic_gru( input=input, size=size, param_attr=self.attr_holder.param_attr, bias_attr=self.attr_holder.bias_attr, is_reverse=is_reverse, gate_activation=gate_activation, candidate_activation=candidate_activation, h_0=h_0, origin_mode=origin_mode)
def __call__(self, input, state=None): gru_input = layers.fc(input=input, size=self.hidden_dim * 3, param_attr=self.input_param_attr, bias_attr=self.input_bias_attr) return layers.dynamic_gru( input=gru_input, size=self.hidden_dim, param_attr=self.param_attr, bias_attr=self.bias_attr, is_reverse=self.is_reverse, gate_activation=self.gate_activation, candidate_activation=self.candidate_activation, h_0=state, origin_mode=self.origin_mode)
def gru_fun(gru_in, name=None, is_reverse=False): """ gru fun """ fw_last_array = [] fw_in = gru_in for i in range(num_layers): fw_gru_in = layers.fc( input=fw_in, size=hidden_size * 3, param_attr=fluid.ParamAttr(name=name + "_fc_w"), bias_attr=fluid.ParamAttr(name=name + "_fc_b")) fw_gru_out = layers.dynamic_gru( input=fw_gru_in, size=hidden_size, param_attr=fluid.ParamAttr(name=name + "_w"), bias_attr=fluid.ParamAttr(name=name + "_b"), origin_mode=True, is_reverse=is_reverse) fw_in = fw_gru_out if is_reverse: fw_last_hidden = layers.sequence_first_step(fw_gru_out) else: fw_last_hidden = layers.sequence_last_step(fw_gru_out) if last_mask: fw_last_hidden = layers.elementwise_mul(fw_last_hidden, last_mask, axis=0) fw_last_array.append(fw_last_hidden) if num_layers == 1: final_fw_last_hidden = layers.unsqueeze(fw_last_array[0], axes=[0]) else: final_fw_last_hidden = layers.concat(fw_last_array, axis=0) final_fw_last_hidden = layers.reshape( final_fw_last_hidden, shape=[num_layers, -1, hidden_size]) final_fw_out = fw_in return final_fw_out, final_fw_last_hidden