def bulit_gru(self,input_var=None,mask_var=None,input_root=None,input_e1=None,input_e2=None): """ Bulit the GRU network """ #inputlayer l_in=lasagne.layers.InputLayer(shape=self.input_shape,input_var=input_var,name="l_in") #mask layer l_mask=lasagne.layers.InputLayer(shape=self.mask_shape,input_var=mask_var,name="l_mask") #inpute dropout l_input_drop=lasagne.layers.DropoutLayer(l_in,p=self.keep_prob_input) """Input attention entity and root""" l_in_root=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_root,name="l_in_root") l_in_e1=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_e1,name="l_in_e1") l_in_e2=lasagne.layers.InputLayer(shape=self.input_shape_att,input_var=input_e2,name="l_in_e2") #Two GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward1") l_gru_forward=lasagne.layers.GRULayer(\ l_gru_forward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward2") #Two GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward1") l_gru_backward=lasagne.layers.GRULayer(\ l_gru_backward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward2") #Merge forward layers and backward layers l_merge=lasagne.layers.ElemwiseSumLayer([l_gru_forward,l_gru_backward]) #Keyword-Attention l_self_att=SelfAttEntRootLayer((l_in,l_in_e1,l_in_e2,l_in_root,l_merge),attention_size2=self.attention_size2) ## #when you need order in [Fe1,Fe2,Froot],you can use this line. ## l_kas=SelfAttEntRootLayer3((l_self_att,l_merge)) #output dropout l_merge_drop=lasagne.layers.DropoutLayer(l_self_att,p=self.keep_prob_gru_output) l_merge_fc = lasagne.layers.batch_norm(lasagne.layers.DenseLayer( l_merge_drop, num_units=250, W=lasagne.init.GlorotUniform(gain=1.0), b=lasagne.init.Constant(1.), nonlinearity=lasagne.nonlinearities.selu)) # #Keyword-attention(variant) # l_self_att_var=SelfAttEntRootLayerVariant((l_in,l_in_e1,l_in_e2,l_in_root,l_merge),attention_size2=self.attention_size2) # # #output dropout # l_merge_drop_var=lasagne.layers.DropoutLayer(l_self_att_var,p=self.keep_prob_gru_output) # # l_merge_fc_var = lasagne.layers.batch_norm(lasagne.layers.DenseLayer( # l_merge_drop_var, # num_units=250, # W=lasagne.init.GlorotUniform(gain=1.0), b=lasagne.init.Constant(0.), # nonlinearity=lasagne.nonlinearities.selu)) # # #Merge keyword-attention # l_key_merge=lasagne.layers.ElemwiseSumLayer([l_merge_fc,l_merge_fc_var]) #Margin loss #init w_classes: it's format is (class_number,network_output[1]) w_classes_init=np.sqrt(6.0/(19+200)) l_out_margin=MarginLossLayer(l_merge_fc,w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) return l_out_margin,l_in,l_mask,l_out_margin,l_out_margin
def bulit_gru(self, input_var=None, mask_var=None, input_entity_var=None, mask_entity_var=None): """ Bulit the GRU network """ #inputlayer l_in = lasagne.layers.InputLayer(shape=self.input_shape, input_var=input_var, name="l_in") """Input attention""" # l_input_att=self.input_att_gru(input_entity_var,mask_entity_var) #Attention layer l_att = InputAttentionLayer(l_in, name="attention") """BiGru""" #mask layer l_mask = lasagne.layers.InputLayer(shape=self.mask_shape, input_var=mask_var, name="l_mask") #inpute dropout l_input_drop = lasagne.layers.DropoutLayer(l_att, p=self.keep_prob_input) #Two GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward1") l_gru_forward=lasagne.layers.GRULayer(\ l_gru_forward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,name="l_gru_forward2") #Two GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=False,name="l_gru_backward1") l_gru_backward=lasagne.layers.GRULayer(\ l_gru_backward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,backwards=True,name="l_gru_backward2") #Merge forward layers and backward layers l_merge = lasagne.layers.ElemwiseSumLayer( [l_gru_forward, l_gru_backward]) #output dropout l_merge_drop = lasagne.layers.DropoutLayer(l_merge, p=self.keep_prob_gru_output) #Negative-Margin loss #init w_classes: it's format is (class_number,network_output[1]) w_classes_init = np.sqrt(6.0 / (19 + self.gru_size)) l_out_margin = MarginLossLayer( l_merge_drop, w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) return l_out_margin, l_in, l_mask, l_att, l_out_margin
def bulit_gru(self, input_var=None, mask_var=None): """ Bulit the GRU network """ #inputlayer l_in = lasagne.layers.InputLayer(shape=self.input_shape, input_var=input_var, name="l_in") #mask layer l_mask = lasagne.layers.InputLayer(shape=self.mask_shape, input_var=mask_var, name="l_mask") #inpute dropout l_input_drop = lasagne.layers.DropoutLayer(l_in, p=self.keep_prob_input) #Two GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward1") l_gru_forward=lasagne.layers.GRULayer(\ l_gru_forward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,name="l_gru_forward2") #Two GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,backwards=True,name="l_gru_backward1") l_gru_backward=lasagne.layers.GRULayer(\ l_gru_backward,num_units=self.gru_size,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,backwards=True,name="l_gru_backward2") #output forward # l_outdrop_forward=lasagne.layers.DropoutLayer(l_gru_forward,p=self.keep_prob_forward) #output forward # l_outdrop_backward=lasagne.layers.DropoutLayer(l_gru_backward,p=self.keep_prob_backward) #Merge forward layers and backward layers l_merge = lasagne.layers.ElemwiseSumLayer( [l_gru_forward, l_gru_backward]) #output dropout l_merge_drop = lasagne.layers.DropoutLayer(l_merge, p=self.keep_prob_gru_output) #Margin loss #init w_classes: it's format is (class_number,network_output[1]) w_classes_init = np.sqrt(6.0 / (19 + self.gru_size)) l_out_margin = MarginLossLayer( l_merge_drop, w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) # # Finally, we'll add the fully-connected output layer, of 10 softmax units: # l_out = lasagne.layers.DenseLayer(\ # l_merge_drop, num_units=self.num_classes,\ # nonlinearity=lasagne.nonlinearities.softmax) return l_out_margin, l_in, l_mask, l_merge_drop, l_out_margin
def bulit_gru(self, input_var=None, mask_var=None, left_sdp_length=None, sen_length=None): """ Bulit the GRU network """ #inputlayer l_in = lasagne.layers.InputLayer(shape=self.input_shape, input_var=input_var, name="l_in") #mask layer l_mask = lasagne.layers.InputLayer(shape=self.mask_shape, input_var=mask_var, name="l_mask") #inpute dropout l_input_drop = lasagne.layers.DropoutLayer(l_in, p=self.keep_prob_input) ''' """ CNN """ #the length of sentences l_sen_length=lasagne.layers.InputLayer(shape=(None,1),input_var=sen_length,name="l_sen_length") #split the global SDP l_split_global_sdp=SplitInGlobal((l_sen_length,l_in)) l_split_global_sdp=lasagne.layers.ReshapeLayer(l_split_global_sdp,([0],1,[1],[2])) l_global_sdp_cnn = lasagne.layers.Conv2DLayer( l_split_global_sdp, num_filters=500, filter_size=(3, self.cnn_gru_size), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # MAX-pooling in global SDP l_global_sdp_maxpooling=lasagne.layers.GlobalPoolLayer(l_global_sdp_cnn, pool_function=T.max) #output dropout l_sdp_drop=lasagne.layers.DropoutLayer(l_global_sdp_maxpooling,p=self.keep_prob_cnn) w_classes_init=np.sqrt(6.0/(19+500)) l_out_margin=MarginLossLayer(l_sdp_drop,w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) return l_out_margin,l_in,l_mask,l_global_sdp_maxpooling,l_out_margin ''' """ 1.Split third input. 2.Input third CNN """ #GRU forward l_gru_forward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.cnn_gru_size,mask_input=l_mask,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=False,name="l_gru_forward") #left sdp length input layer l_left_sdp_length = lasagne.layers.InputLayer( shape=(None, 1), input_var=left_sdp_length, name="l_left_sdp_length") #the length of sentences l_sen_length = lasagne.layers.InputLayer(shape=(None, 1), input_var=sen_length, name="l_sen_length") #split the left SDP and the right SDP l_split_left_sdp = SplitInLeft((l_left_sdp_length, l_gru_forward)) l_split_right_sdp = SplitInRight( (l_left_sdp_length, l_sen_length, l_gru_forward)) #split the global SDP l_split_global_sdp = SplitInGlobal((l_sen_length, l_gru_forward)) #Reshape the layer in 4D l_split_left_sdp = lasagne.layers.ReshapeLayer(l_split_left_sdp, ([0], 1, [1], [2])) l_split_right_sdp = lasagne.layers.ReshapeLayer( l_split_right_sdp, ([0], 1, [1], [2])) l_split_global_sdp = lasagne.layers.ReshapeLayer( l_split_global_sdp, ([0], 1, [1], [2])) # Convolutional layer in left SDP l_left_sdp_cnn = lasagne.layers.Conv2DLayer( l_split_left_sdp, num_filters=50, filter_size=(2, self.cnn_gru_size), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # Convolutional layer in right SDP l_right_sdp_cnn = lasagne.layers.Conv2DLayer( l_split_right_sdp, num_filters=50, filter_size=(2, self.cnn_gru_size), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # Convolutional layer in global SDP l_global_sdp_cnn = lasagne.layers.Conv2DLayer( l_split_global_sdp, num_filters=50, filter_size=(2, self.cnn_gru_size), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) # Max-pooling in left SDP l_left_sdp_maxpooling = lasagne.layers.ReshapeLayer( lasagne.layers.GlobalPoolLayer(l_left_sdp_cnn, pool_function=T.max), ([0], [1], 1)) # MAX-pooling in rigth SDP l_right_sdp_maxpooling = lasagne.layers.ReshapeLayer( lasagne.layers.GlobalPoolLayer(l_right_sdp_cnn, pool_function=T.max), ([0], [1], 1)) # MAX-pooling in global SDP l_global_sdp_maxpooling = lasagne.layers.ReshapeLayer( lasagne.layers.GlobalPoolLayer(l_global_sdp_cnn, pool_function=T.max), ([0], [1], 1)) #Concatenate left SDP and right SDP l_con_sdp = lasagne.layers.ConcatLayer( (l_global_sdp_maxpooling, l_left_sdp_maxpooling, l_right_sdp_maxpooling), axis=2) #output dropout l_con_sdp_drop = lasagne.layers.DropoutLayer(l_con_sdp, p=self.keep_prob_cnn) # A fully-connected layer of 200 units with 50% dropout on its inputs: l_con_sdp_den = lasagne.layers.DenseLayer( l_con_sdp_drop, num_units=150, nonlinearity=lasagne.nonlinearities.rectify) # # Finally, we'll add the fully-connected output layer, of 10 softmax units: # l_out = lasagne.layers.DenseLayer(\ # lasagne.layers.dropout(l_con_sdp_den, p=self.keep_prob_backward), \ # num_units=self.num_classes,\ # nonlinearity=lasagne.nonlinearities.softmax) """ 2.Input in GRU """ l_gru_forward2=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,mask_input=l_mask,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,name="l_gru_forward2") #GRU backward l_gru_backward=lasagne.layers.GRULayer(\ l_input_drop,num_units=self.gru_size,mask_input=l_mask,grad_clipping=self.grad_clip, resetgate=self.gate_parameters,updategate=self.gate_parameters, hidden_update=self.cell_parameters,learn_init=True, only_return_final=True,backwards=True,name="l_gru_backward") #Merge forward layers and backward layers l_merge = lasagne.layers.ElemwiseSumLayer( [l_gru_forward2, l_gru_backward]) #output dropout l_merge_drop = lasagne.layers.DropoutLayer(l_merge, p=self.keep_prob_gru_output) """ 3.Concatenate CNN and GRU """ l_cnn_gru = lasagne.layers.ConcatLayer((l_con_sdp_den, l_merge_drop), axis=1) # #Highway network # l_highway_network=HighwayNetwork1D(l_cnn_gru) #Margin loss #init w_classes: it's format is (class_number,network_output[1]) w_classes_init = np.sqrt(6.0 / (19 + self.gru_size + 150)) l_out_margin = MarginLossLayer( l_cnn_gru, w_classes=lasagne.init.Uniform(w_classes_init), class_number=19) a = lasagne.layers.ReshapeLayer( lasagne.layers.InputLayer(shape=(19, 300), input_var=l_out_margin.w_classes), (19, 300)) # l_out = lasagne.layers.DenseLayer(\ # lasagne.layers.dropout(l_cnn_gru, p=self.keep_prob_cnn_gur_output),\ # num_units=self.num_classes,\ # nonlinearity=lasagne.nonlinearities.softmax) return l_out_margin, l_in, l_mask, a, l_out_margin