def test_lookup(): table_size =1000 feature_num = 500 lookup_table_layer = LookupTableLayer(table_size, feature_num, 'test_lookup') input = T.shared(np.asarray([[0,1,2,3],[1,2,3,4], [7,8,9,10],[5,6,7,8]], dtype=np.int32)) output_flattern = lookup_table_layer.output(input) output_tensor = lookup_table_layer.output(input,tensor_output=True) flattern_shape = output_flattern.eval().shape tensor_shape = output_tensor.eval().shape assert flattern_shape == (4, 2000), "flattern shape = {0}".format(flattern_shape) assert tensor_shape == (4, 4, 500), "tensor shape = {0}".format(tensor_shape) #lookup_table_layer.save('/home/kingsfield/Data/models') #lookup_table_layer.load('/home/kingsfield/Data/models') f = file('/home/kingsfield/Data/models/test.save', 'wb') cPickle.dump(lookup_table_layer, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() f = file('/home/kingsfield/Data/models/test.save', 'rb') test = cPickle.load(f) f.close() print test.name,test._table_size,test._feature_num
class WordLevelNeuralModelCore(object): def __init__(self, np_rng = None,**kwargs): self.word_ids = T.imatrix('input') self.word_num = kwargs['word_num'] self.window_size = kwargs['window_size'] self.feature_num = kwargs['feature_num'] self.hidden_layer_size = kwargs['hidden_layer_size'] self.n_outs = kwargs['n_outs'] self.lookup_table_layer = LookupTableLayer(self.word_num,self.feature_num) self.hidden_layer = HiddenLayer(rng=np_rng, input=self.lookup_table_layer.output(self.word_ids), n_in = self.window_size * self.feature_num, n_out = self.hidden_layer_size, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.output_layer = SoftMaxLayer( input=self.hidden_layer.output, n_in=self.hidden_layer_size, n_out=self.n_outs) self.params = self.lookup_table_layer.params() + \ self.hidden_layer.params() + \ self.output_layer.params() def inputs(self): return [self.word_ids]
def __setstate__(self, state): assert state['name'] == "srl-machine" self.word_embedding_layer = LookupTableLayer() self.word_embedding_layer.__setstate__(state["word_embedding_layer"]) self.pos_embedding_layer = LookupTableLayer() self.pos_embedding_layer.__setstate__(state["pos_embedding_layer"]) self.loc_embedding_layer = LookupTableLayer() self.loc_embedding_layer.__setstate__(state["loc_embedding_layer"]) self.locdiff_word_embedding_layer = LookupTableLayer() self.locdiff_word_embedding_layer.__setstate__(state["locdiff_word_embedding_layer"]) self.locdiff_verb_embedding_layer = LookupTableLayer() self.locdiff_verb_embedding_layer.__setstate__(state["locdiff_verb_embedding_layer"]) self.word_conv_layer = Conv1DMaxPoolLayer() self.word_conv_layer.__setstate__(state["word_conv_layer"]) self.pos_conv_layer = Conv1DMaxPoolLayer() self.pos_conv_layer.__setstate__(state["pos_conv_layer"]) self.locdiff_word_conv_layer = Conv1DMaxPoolLayer() self.locdiff_word_conv_layer.__setstate__(state["locdiff_word_conv_layer"]) self.locdiff_verb_conv_layer = Conv1DMaxPoolLayer() self.locdiff_verb_conv_layer.__setstate__(state["locdiff_verb_conv_layer"])
def test_lookup(): table_size =1000 feature_num = 500 lookup_table_layer = LookupTableLayer(table_size, feature_num) input = T.shared(np.asarray([[0,1,2,3],[1,2,3,4], [7,8,9,10],[5,6,7,8]], dtype=np.int32)) output_tensor = lookup_table_layer.output(input,tensor_output=True) tensor_shape = output_tensor.eval().shape assert tensor_shape == (4, 4, 500), "lookup table output tensor shape = {0}".format(tensor_shape) rng = np.random.RandomState(1234) conv1d_layer = Conv1DLayer("test", rng, 1, 100, 10) conv_output = conv1d_layer.output(output_tensor.dimshuffle(0,'x',1,2)) conv_out_shape = conv_output.eval().shape assert conv_out_shape == (4,100, 4, 491), "conv1d output tensor shape = {0}".format(conv_out_shape) batch_size = conv_out_shape[0] sentence_len = conv_out_shape[2] re_organized = conv_output.dimshuffle(0,2,1,3).reshape( ( batch_size, sentence_len, -1 ) ) re_organized_shape = re_organized.eval().shape assert re_organized_shape == (4,4, 100* 491), "reorganized output shape = {0}".format(re_organized_shape)
def __init__(self, problem_character = None, nn_architecture = None, trans_mat_prior = None): # x shape: [mini-batch size, feature-dim]. # In this problem [mini-batch feature-dim] if ( problem_character is None or nn_architecture is None): raise Exception("both problem and architecture must be provided") word_num = problem_character['word_num'] POS_type_num = problem_character['POS_type_num'] dist_to_verb_num = problem_character['dist_to_verb_num'] dist_to_word_num = problem_character['dist_to_word_num'] # 1,word vector # output shape: (batch size,sentence_len, word_feature_num) self.word_embedding_layer = LookupTableLayer( table_size = word_num, feature_num = nn_architecture.word_feature_dim ) # 3,word POS tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.pos_embedding_layer = LookupTableLayer( table_size = POS_type_num, feature_num = nn_architecture.pos_feature_dim, ) # self.loc_embedding_layer = LookupTableLayer( # table_size = loc_type_num, # feature_num = nn_architecture.dist_feature_dim, # ) # 5,distance tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.locdiff_word_embedding_layer = LookupTableLayer( table_size = dist_to_word_num, feature_num = nn_architecture.dist_feature_dim, ) self.locdiff_verb_embedding_layer = LookupTableLayer( table_size = dist_to_verb_num, feature_num = nn_architecture.dist_feature_dim, ) conv_input_dim = nn_architecture.word_feature_dim * 3 + \ nn_architecture.pos_feature_dim * 3 + \ nn_architecture.dist_feature_dim * 4 conv_shape = (nn_architecture.conv_output_dim, 1, nn_architecture.conv_window_height, conv_input_dim) self.conv_layer = Conv1DMaxPoolLayer( activator_type="sigmoid", tensor_shape = conv_shape) self.embedding_conv_layers = [self.word_embedding_layer, self.pos_embedding_layer, self.locdiff_word_embedding_layer, self.locdiff_verb_embedding_layer, self.conv_layer] input_dim = nn_architecture.conv_output_dim self.perception_layers = [] for idx, output_dim in enumerate(nn_architecture.hidden_layer_output_dims): hidden_layer = PerceptionLayer( input_dim = input_dim, output_dim = output_dim, activator_type = "sigmoid") self.perception_layers.append(hidden_layer) input_dim = output_dim out_layer = PerceptionLayer( input_dim = input_dim, output_dim = problem_character["SRL_type_num"], activator_type = "softmax") self.perception_layers.append(out_layer) self.cost = create_cost({"type": "cross_entropy"}) # self.output_layer = PathTransitionLayer('output', # class_num=SRL_type_num, # trans_mat_prior= trans_mat_prior) # self.output_layer = SoftMaxLayer(n_in= nn_architecture.hidden_layer_output_dims[-1], # n_out = SRL_type_num,) X = theano.tensor.matrix("X") self.__output_func = theano.function([X], outputs = self.__output(X)) self.__predict_expr = theano.tensor.argmax(self.__output(X), axis = 1) self.__predict_func = theano.function([X], outputs = self.__predict_expr)
class SRLNetwork(GradientOptimizable): def __init__(self, problem_character = None, nn_architecture = None, trans_mat_prior = None): # x shape: [mini-batch size, feature-dim]. # In this problem [mini-batch feature-dim] if ( problem_character is None or nn_architecture is None): raise Exception("both problem and architecture must be provided") word_num = problem_character['word_num'] POS_type_num = problem_character['POS_type_num'] dist_to_verb_num = problem_character['dist_to_verb_num'] dist_to_word_num = problem_character['dist_to_word_num'] # 1,word vector # output shape: (batch size,sentence_len, word_feature_num) self.word_embedding_layer = LookupTableLayer( table_size = word_num, feature_num = nn_architecture.word_feature_dim ) # 3,word POS tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.pos_embedding_layer = LookupTableLayer( table_size = POS_type_num, feature_num = nn_architecture.pos_feature_dim, ) # self.loc_embedding_layer = LookupTableLayer( # table_size = loc_type_num, # feature_num = nn_architecture.dist_feature_dim, # ) # 5,distance tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.locdiff_word_embedding_layer = LookupTableLayer( table_size = dist_to_word_num, feature_num = nn_architecture.dist_feature_dim, ) self.locdiff_verb_embedding_layer = LookupTableLayer( table_size = dist_to_verb_num, feature_num = nn_architecture.dist_feature_dim, ) conv_input_dim = nn_architecture.word_feature_dim * 3 + \ nn_architecture.pos_feature_dim * 3 + \ nn_architecture.dist_feature_dim * 4 conv_shape = (nn_architecture.conv_output_dim, 1, nn_architecture.conv_window_height, conv_input_dim) self.conv_layer = Conv1DMaxPoolLayer( activator_type="sigmoid", tensor_shape = conv_shape) self.embedding_conv_layers = [self.word_embedding_layer, self.pos_embedding_layer, self.locdiff_word_embedding_layer, self.locdiff_verb_embedding_layer, self.conv_layer] input_dim = nn_architecture.conv_output_dim self.perception_layers = [] for idx, output_dim in enumerate(nn_architecture.hidden_layer_output_dims): hidden_layer = PerceptionLayer( input_dim = input_dim, output_dim = output_dim, activator_type = "sigmoid") self.perception_layers.append(hidden_layer) input_dim = output_dim out_layer = PerceptionLayer( input_dim = input_dim, output_dim = problem_character["SRL_type_num"], activator_type = "softmax") self.perception_layers.append(out_layer) self.cost = create_cost({"type": "cross_entropy"}) # self.output_layer = PathTransitionLayer('output', # class_num=SRL_type_num, # trans_mat_prior= trans_mat_prior) # self.output_layer = SoftMaxLayer(n_in= nn_architecture.hidden_layer_output_dims[-1], # n_out = SRL_type_num,) X = theano.tensor.matrix("X") self.__output_func = theano.function([X], outputs = self.__output(X)) self.__predict_expr = theano.tensor.argmax(self.__output(X), axis = 1) self.__predict_func = theano.function([X], outputs = self.__predict_expr) def __output(self, X): # X.sentence_word_id = [] #当前句子的全局word id 列表 # X.sentence_pos_id = [] #当前句子的全局词性 id 列表 # # #每个<word, verb> pair 一条记录 # X.cur_word_id = [] # 当前word 的词id # X.cur_verb_id = [] # 当前verb 的词id # X.cur_word_pos_id = [] # 当前word的词性 id # X.cur_verb_pos_id = [] # 当前verb的词性 id # X.cur_word_loc_id = [] # 当前word的位置 id # NOT IN USE # X.cur_verb_loc_id = [] # 当前verb的位置 id # NOT IN USE # X.cur_word2verb_dist_id = [] # 当前word 到 当前verb的位置距离 id # X.cur_verb2word_dist_id = [] # 当前verb 到 当前word的位置距离 id # X.other_word2verb_dist_id = [] # 其他word 到当前verb的位置距离 id # NOT IN USE # X.other_word2word_dist_id = [] # 其他word 到当前word的位置距离 id # NOT IN USE start_idx = 0 sentence_len = X[0, start_idx].astype('int32') start_idx += 1 sentence_word_id = X[0, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len sentence_pos_id = X[0, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len cur_word_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word_pos_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_pos_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word_loc_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_loc_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word2verb_dist_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb2word_dist_id = X[:,start_idx].astype('int32') start_idx += 1 other_word2verb_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len other_word2word_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len wordvec = self.word_embedding_layer.output( inputs = cur_word_id #word_id_input ) verbvec = self.word_embedding_layer.output( inputs = cur_verb_id #verb_id_input ) wordPOSvec = self.pos_embedding_layer.output( inputs = cur_word_pos_id #word_pos_input ) verbPOSvec = self.pos_embedding_layer.output( inputs = cur_verb_pos_id #verb_pos_input ) # wordlocvec = self.loc_embedding_layer.output( # inputs = word_loc_input, # ) # verblocvec = self.loc_embedding_layer.output( # inputs = verb_loc_input, # ) locdiff_word2verb_vec = self.locdiff_verb_embedding_layer.output( inputs = cur_word2verb_dist_id ) locdiff_verb2word_vec = self.locdiff_word_embedding_layer.output( inputs = cur_verb2word_dist_id ) sentence_word_vec = self.word_embedding_layer.output( inputs = sentence_word_id, ) sentence_pos_vec = self.pos_embedding_layer.output( inputs = sentence_pos_id, ) other_loc2word_vec = self.locdiff_word_embedding_layer.output( inputs = other_word2word_dist_id ) other_loc2verb_vec = self.locdiff_verb_embedding_layer.output( inputs = other_word2verb_dist_id ) batch_size = sentence_len conv_input_feature = T.concatenate( ( wordvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), verbvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), wordPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), verbPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), locdiff_word2verb_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), locdiff_verb2word_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), sentence_word_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0), sentence_pos_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0), other_loc2word_vec.dimshuffle(0, "x", 1, 2), other_loc2verb_vec.dimshuffle(0, "x", 1, 2), ), axis=3 ) conv_out = self.conv_layer.output(conv_input_feature).reshape((batch_size, -1)) layer_input = conv_out for layer in self.perception_layers: layer_input = layer.output(layer_input) return layer_input def predict(self, X): return self.__predict_func(X) def predict_prob(self,X): return self.__output_func(X) def object_gradient(self, X, y): object_expr = self.cost.cost(self.__output(X), y) params = self.params() grad = T.grad(object_expr, params) gradient_vec = [] for param in grad: gradient_vec.append(param.flatten()) gradient_expr = theano.tensor.concatenate(gradient_vec) return [object_expr, gradient_expr] def get_parameter(self): all_layes = self.embedding_conv_layers + self.perception_layers param_vec = [layer.get_parameter() for layer in all_layes] return numpy.concatenate(param_vec) def set_parameter(self, param_vec): all_layes = self.embedding_conv_layers + self.perception_layers parameter_size_vec = [layer.get_parameter_size() for layer in all_layes] start_idx = [0] + list(numpy.cumsum(parameter_size_vec)) for idx, layer in enumerate(all_layes): layer.set_parameter(param_vec[start_idx[idx]:start_idx[idx] + parameter_size_vec[idx]]) def params(self): all_layes = self.embedding_conv_layers + self.perception_layers return list(itertools.chain.from_iterable([layer.params() for layer in all_layes])) def __getstate__(self): state = dict() state['name'] = "srl-machine" state['word_embedding_layer'] = self.word_embedding_layer.__getstate__() state['word_conv_layer'] = self.word_conv_layer.__getstate__() state['pos_embedding_layer'] = self.pos_embedding_layer.__getstate__() state['pos_conv_layer'] = self.pos_conv_layer.__getstate__() state['loc_embedding_layer'] = self.loc_embedding_layer.__getstate__() state['locdiff_word_embedding_layer'] = self.locdiff_word_embedding_layer.__getstate__() state['locdiff_word_conv_layer'] = self.locdiff_word_conv_layer.__getstate__() state['locdiff_verb_embedding_layer'] = self.locdiff_verb_embedding_layer.__getstate__() state['locdiff_verb_conv_layer'] = self.locdiff_verb_conv_layer.__getstate__() for idx, hidden_layer in enumerate(self.perception_layers): state['hidden_layer_' + str(idx)] = hidden_layer.__getstate__() state['output_layer'] = self.output_layer.__getstate__() return state def __setstate__(self, state): assert state['name'] == "srl-machine" self.word_embedding_layer = LookupTableLayer() self.word_embedding_layer.__setstate__(state["word_embedding_layer"]) self.pos_embedding_layer = LookupTableLayer() self.pos_embedding_layer.__setstate__(state["pos_embedding_layer"]) self.loc_embedding_layer = LookupTableLayer() self.loc_embedding_layer.__setstate__(state["loc_embedding_layer"]) self.locdiff_word_embedding_layer = LookupTableLayer() self.locdiff_word_embedding_layer.__setstate__(state["locdiff_word_embedding_layer"]) self.locdiff_verb_embedding_layer = LookupTableLayer() self.locdiff_verb_embedding_layer.__setstate__(state["locdiff_verb_embedding_layer"]) self.word_conv_layer = Conv1DMaxPoolLayer() self.word_conv_layer.__setstate__(state["word_conv_layer"]) self.pos_conv_layer = Conv1DMaxPoolLayer() self.pos_conv_layer.__setstate__(state["pos_conv_layer"]) self.locdiff_word_conv_layer = Conv1DMaxPoolLayer() self.locdiff_word_conv_layer.__setstate__(state["locdiff_word_conv_layer"]) self.locdiff_verb_conv_layer = Conv1DMaxPoolLayer() self.locdiff_verb_conv_layer.__setstate__(state["locdiff_verb_conv_layer"])