def bulit_gru(self,input_var=None,mask_var=None,input_root=None,input_e1=None,input_e2=None): """ Bulit the GRU network """ #inputlayer l_in=lasagne.layers.InputLayer(shape=self.input_shape,input_var=input_var,name="l_in") #mask layer l_mask=lasagne.layers.InputLayer(shape=self.mask_shape,input_var=mask_var,name="l_mask") #inpute dropout l_input_drop=lasagne.layers.DropoutLayer(l_in,p=self.keep_prob_input) """Input attention entity and root""" l_in_root=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_root,name="l_in_root") l_in_e1=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_e1,name="l_in_e1") l_in_e2=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_e2,name="l_in_e2") #Two GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward1") l_gru_forward=lasagne.layers.GRULayer(\ l_gru_forward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward2") #Two GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward1") l_gru_backward=lasagne.layers.GRULayer(\ l_gru_backward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward2") #Merge forward layers and backward layers l_merge=lasagne.layers.ElemwiseSumLayer([l_gru_forward,l_gru_backward]) #Keyword-Attention l_self_att=SelfAttEntRootLayer((l_in,l_in_e1,l_in_e2,l_in_root,l_merge),attention_size2=self.attention_size2) ## #when you need order in [Fe1,Fe2,Froot],you can use this line. ## l_kas=SelfAttEntRootLayer3((l_self_att,l_merge)) #output dropout l_merge_drop=lasagne.layers.DropoutLayer(l_self_att,p=self.keep_prob_gru_output) l_merge_fc = lasagne.layers.batch_norm(lasagne.layers.DenseLayer( l_merge_drop, num_units=250, W=lasagne.init.GlorotUniform(gain=1.0), b=lasagne.init.Constant(1.), nonlinearity=lasagne.nonlinearities.selu)) # #Keyword-attention(variant) # l_self_att_var=SelfAttEntRootLayerVariant((l_in,l_in_e1,l_in_e2,l_in_root,l_merge),attention_size2=self.attention_size2) # # #output dropout # l_merge_drop_var=lasagne.layers.DropoutLayer(l_self_att_var,p=self.keep_prob_gru_output) # # l_merge_fc_var = lasagne.layers.batch_norm(lasagne.layers.DenseLayer( # l_merge_drop_var, # num_units=250, # W=lasagne.init.GlorotUniform(gain=1.0), b=lasagne.init.Constant(0.), # nonlinearity=lasagne.nonlinearities.selu)) # # #Merge keyword-attention # l_key_merge=lasagne.layers.ElemwiseSumLayer([l_merge_fc,l_merge_fc_var]) #Margin loss #init w_classes: it's format is (class_number,network_output[1]) w_classes_init=np.sqrt(6.0/(19+200)) l_out_margin=MarginLossLayer(l_merge_fc,w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) return l_out_margin,l_in,l_mask,l_out_margin,l_out_margin
def bulit_gru(self, input_var=None, mask_var=None, input_root=None, input_e1=None, input_e2=None, sen_length=None): """ Bulit the GRU network """ #inputlayer l_in = lasagne.layers.InputLayer(shape=self.input_shape, input_var=input_var, name="l_in") #mask layer l_mask = lasagne.layers.InputLayer(shape=self.mask_shape, input_var=mask_var, name="l_mask") #inpute dropout l_input_drop = lasagne.layers.DropoutLayer(l_in, p=self.keep_prob_input) """Input attention entity and root""" l_in_root = lasagne.layers.InputLayer(shape=self.input_shape_att, input_var=input_root, name="l_in_root") l_in_e1 = lasagne.layers.InputLayer(shape=self.input_shape_att, input_var=input_e1, name="l_in_e1") l_in_e2 = lasagne.layers.InputLayer(shape=self.input_shape_att, input_var=input_e2, name="l_in_e2") # #input attention # l_att_alpha=InputAttEntRootLayer((l_in,l_in_e1,l_in_e2,l_in_root)) """GRU""" #Two GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward1") l_gru_forward=lasagne.layers.GRULayer(\ l_gru_forward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward2") # #Attention # l_att_forward=InputAttentionDotLayer((l_gru_forward,l_att_alpha)) #Two GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward1") l_gru_backward=lasagne.layers.GRULayer(\ l_gru_backward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward2") # #Attention # l_att_backward=InputAttentionDotLayer((l_gru_backward,l_att_alpha)) #Merge forward layers and backward layers l_merge = lasagne.layers.ElemwiseSumLayer( (l_gru_forward, l_gru_backward)) # l_merge=lasagne.layers.ConcatLayer((l_gru_forward,l_gru_backward),axis=2) #Self-Attention # l_self_att=SelfAttentionDotLayer((l_merge,l_att_alpha)) l_self_att = SelfAttEntRootLayer( (l_in, l_in_e1, l_in_e2, l_in_root, l_merge), attention_size2=self.attention_size2) #output dropout l_merge_drop = lasagne.layers.DropoutLayer(l_self_att, p=self.keep_prob_gru_output) l_merge_fc = lasagne.layers.DenseLayer( l_merge_drop, num_units=50, W=lasagne.init.GlorotUniform(gain=1.0), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.selu) # #Margin loss # #init w_classes: it's format is (class_number,network_output[1]) # w_classes_init=np.sqrt(6.0/(19+self.gru_size)) # l_out_margin=MarginLossLayer(l_merge_drop,w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) # Finally, we'll add the fully-connected output layer, of 10 softmax units: l_out_margin = lasagne.layers.DenseLayer(\ l_merge_fc, num_units=self.num_classes,\ nonlinearity=lasagne.nonlinearities.softmax) return l_out_margin, l_in, l_mask, l_self_att, l_gru_forward