def __init__(self, K, vocab_size, num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, save_attn=False): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.char_dim = char_dim self.learning_rate = LEARNING_RATE self.num_chars = num_chars self.use_feat = use_feat self.save_attn = save_attn self.gating_fn = gating_fn self.use_chars = self.char_dim!=0 if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \ T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') feat_var = T.imatrix('feat') doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars') tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask') cloze_var = T.ivector('cloze') self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var] self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = ( self.build_network(K, vocab_size, W_init)) self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, target_var).mean() self.params = L.get_all_params(self.network, trainable=True) updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate) self.train_fn = theano.function(self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs], updates=updates, on_unused_input='warn') self.validate_fn = theano.function(self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions, on_unused_input='warn')
def BuildModel(modelSpecs, forTrain=True): rng = np.random.RandomState() ## x is for sequential features and y for matrix (or pairwise) features x = T.tensor3('x') y = T.tensor4('y') ## mask for x and y, respectively xmask = T.bmatrix('xmask') ymask = T.btensor3('ymask') xem = None ##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ): if config.EmbeddingUsed(modelSpecs): xem = T.tensor3('xem') distancePredictor = ResNet4DistMatrix( rng, seqInput=x, matrixInput=y, mask_seq=xmask, mask_matrix=ymask, embedInput=xem, modelSpecs=modelSpecs ) else: distancePredictor = ResNet4DistMatrix( rng, seqInput=x, matrixInput=y, mask_seq=xmask, mask_matrix=ymask, modelSpecs=modelSpecs ) ## labelList is a list of label tensors, each having shape (batchSize, seqLen, seqLen) or (batchSize, seqLen, seqLen, valueDims[response] ) labelList = [] if forTrain: ## when this model is used for training. We need to define the label variable for response in modelSpecs['responses']: labelType = Response2LabelType(response) rValDims = config.responseValueDims[labelType] if labelType.startswith('Discrete'): if rValDims > 1: ## if one response is a vector, then we use a 4-d tensor ## wtensor is for 16bit integer labelList.append( T.wtensor4('Tlabel4' + response ) ) else: labelList.append( T.wtensor3('Tlabel4' + response ) ) else: if rValDims > 1: labelList.append( T.tensor4('Tlabel4' + response ) ) else: labelList.append( T.tensor3('Tlabel4' + response ) ) ## weightList is a list of label weight tensors, each having shape (batchSize, seqLen, seqLen) weightList = [] if len(labelList)>0 and modelSpecs['UseSampleWeight']: weightList = [ T.tensor3('Tweight4'+response) for response in modelSpecs['responses'] ] ## for prediction, both labelList and weightList are empty return distancePredictor, x, y, xmask, ymask, xem, labelList, weightList
def __init__(self, K, vocab_size, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.subsample = subsample norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1 if W_init is None: W_init = lasagne.init.GlorotNormal().sample( (vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3( 'quer'), T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') if rlambda > 0.: W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape) else: W_pert = W_init predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = self.build_network( K, vocab_size, doc_var, query_var, cand_var, docmask_var, qmask_var, candmask_var, W_pert) loss_fn = T.nnet.categorical_crossentropy(predicted_probs, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn = lasagne.objectives.categorical_accuracy( predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn_val = lasagne.objectives.categorical_accuracy( predicted_probs_val, target_var).mean() params = L.get_all_params(self.doc_net, trainable=True) + \ L.get_all_params(self.q_net, trainable=True) updates = lasagne.updates.adam(loss_fn, params, learning_rate=LEARNING_RATE) self.train_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \ qmask_var, candmask_var], [loss_fn, eval_fn, predicted_probs], updates=updates) self.validate_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \ qmask_var, candmask_var], [loss_fn_val, eval_fn_val, predicted_probs_val])
def __init__(self, K, vocab_size, num_chars, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample, char_dim, use_feat): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.subsample = subsample self.char_dim = char_dim self.learning_rate = LEARNING_RATE self.num_chars = num_chars self.use_feat = use_feat norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1 self.use_chars = self.char_dim != 0 if W_init is None: W_init = lasagne.init.GlorotNormal().sample( (vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \ T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') feat_var = T.imatrix('feat') doc_toks, qry_toks = T.imatrix('dchars'), T.imatrix('qchars') tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask') cloze_var = T.ivector('cloze') self.inps = [ doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var ] if rlambda > 0.: W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape) else: W_pert = W_init self.predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = ( self.build_network(K, vocab_size, W_pert)) self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() + \ rlambda*norm(W_emb-W_init) self.eval_fn = lasagne.objectives.categorical_accuracy( self.predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn_val = lasagne.objectives.categorical_accuracy( predicted_probs_val, target_var).mean() self.params = L.get_all_params([self.doc_net] + self.q_net, trainable=True) updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate) self.train_fn = theano.function( self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs], updates=updates, on_unused_input='warn') self.validate_fn = theano.function( self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val], on_unused_input='warn')
def TestResNet4DistMatrix(): x = T.tensor3('x') y = T.tensor4('y') xmask = T.bmatrix('xmask') ymask = T.btensor3('ymask') selection = T.wtensor3('selection') import cPickle fh = open('seqDataset4HF.pkl') data = cPickle.load(fh) fh.close() distancePredictor = ResNet4DistMatrix(rng=np.random.RandomState(), seqInput=x, matrixInput=y, n_in_seq=data[0][0].shape[2], n_in_matrix=data[1][0].shape[3], n_hiddens_seq=[3, 5], n_hiddens_matrix=[2], hwsz_seq=4, hwsz_matrix=4, mask_seq=xmask, mask_matrix=ymask) """ f = theano.function([x, y, xmask, ymask], distancePredictor.output_1d) g = theano.function([x, y, xmask, ymask], distancePredictor.output_2d) """ dataLen = 300 batchSize = 60 a = np.random.uniform(0, 1, (batchSize, dataLen, 20)).astype(np.float32) b = np.random.uniform(0, 1, (batchSize, dataLen, dataLen, 3)).astype(np.float32) amask = np.zeros((batchSize, 0)).astype(np.int8) bmask = np.zeros((batchSize, 0, dataLen)).astype(np.int8) sel = np.ones((batchSize, dataLen, dataLen)).astype(np.int8) #print a #print b c = np.random.uniform(0, 3, (batchSize, dataLen, dataLen)).round().astype( np.int8) np.putmask(c, c >= 2, 2) """ c[0, 1, 13]=1 c[0, 2, 15]=1 c[0, 4, 16]=1 c[0, 1, 27]=1 c[0, 2, 28]=1 c[0, 4, 29]=1 c[1, 0, 13]=2 c[1, 1, 15]=2 c[1, 3, 16]=2 c[2, 0, 23]=2 c[2, 1, 25]=2 c[2, 3, 26]=2 """ #sel = c #out1d = f(a, b, amask, bmask) #out2d = g(a, b, amask, bmask) #print out1d #print out2d z = T.btensor3('z') loss = distancePredictor.loss(z, selection) errs = distancePredictor.ErrorsByRange(z) accs = distancePredictor.TopAccuracyByRange(z) confM = distancePredictor.confusionMatrix(z) h = theano.function([x, y, xmask, ymask, selection, z], confM, on_unused_input='ignore') #l, e, accu = h(a, b, amask, bmask, sel, c) cms = [] for i in np.arange(5): cm = h(data[0][i], data[1][i], data[2][i], data[3][i], data[4][i], data[5][i]) print(cm) cms.append(cm) sumofcms = np.sum(cms, axis=0) * 1. for i in range(sumofcms.shape[0]): sumofcms[i] /= np.sum(sumofcms[i]) confusions = sumofcms print(confusions) print(np.sum(confusions[0])) print(np.sum(confusions[1])) print(np.sum(confusions[2])) """
def __init__( self, rng, batchsize=100, activation=relu ): import char_load (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len, \ k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt") dim_word = 30 dim_char = 5 cl_word = 300 cl_char = 50 k_word = k_wrd k_char = k_chr data_train_word, \ data_test_word, \ data_train_char, \ data_test_char, \ target_train, \ target_test \ = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1) x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True) x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True) x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True) x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True) self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x_wrd = T.wmatrix('x_wrd') x_chr = T.wtensor3('x_chr') y = T.bvector('y') train = T.iscalar('train') """network definition""" layer_char_embed_input = x_chr # .reshape((batchsize, max_sen_len, max_word_len)) layer_char_embed = EmbedIDLayer( rng, layer_char_embed_input, n_input=char_cnt, n_output=dim_char ) layer1_input = layer_char_embed.output.reshape( (batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_char, 1, k_char, dim_char), # cl_charフィルタ数 image_shape=(batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_word_len - k_char + 1, 1) ) layer_word_embed_input = x_wrd # .reshape((batchsize, max_sen_len)) layer_word_embed = EmbedIDLayer( rng, layer_word_embed_input, n_input=word_cnt, n_output=dim_word ) layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char)) layer3_input = T.concatenate( [layer3_word_input, layer3_char_input], axis=3 ) # .reshape((batchsize, 1, max_sen_len, dim_word+cl_char)) layer3 = ConvolutionalLayer( rng, layer3_input, filter_shape=(cl_word, 1, k_word, dim_word + cl_char), # 1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char), activation=activation ) layer4 = MaxPoolingLayer( layer3.output, poolsize=(max_sen_len - k_word + 1, 1) ) layer5_input = layer4.output.reshape((batchsize, cl_word)) layer5 = FullyConnectedLayer( rng, dropout(rng, layer5_input, train), n_input=cl_word, n_output=50, activation=activation ) layer6_input = layer5.output layer6 = FullyConnectedLayer( rng, dropout(rng, layer6_input, train, p=0.1), n_input=50, n_output=2, activation=None ) result = Result(layer6.output, y) loss = result.negative_log_likelihood() accuracy = result.accuracy() params = layer6.params \ + layer5.params \ + layer3.params \ + layer_word_embed.params \ + layer1.params \ + layer_char_embed.params updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x_wrd: x_train_word[index * batchsize: (index + 1) * batchsize], x_chr: x_train_char[index * batchsize: (index + 1) * batchsize], y: y_train[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x_wrd: x_test_word[index * batchsize: (index + 1) * batchsize], x_chr: x_test_char[index * batchsize: (index + 1) * batchsize], y: y_test[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](0) } )
def __init__( self, rng, batchsize=100, activation=relu ): import char_load (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len,\ k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt") dim_word = 30 dim_char = 5 cl_word = 300 cl_char = 50 k_word = k_wrd k_char = k_chr data_train_word,\ data_test_word,\ data_train_char,\ data_test_char,\ target_train,\ target_test\ = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1) x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True) x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True) x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True) x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True) self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x_wrd = T.wmatrix('x_wrd') x_chr = T.wtensor3('x_chr') y = T.bvector('y') train = T.iscalar('train') """network definition""" layer_char_embed_input = x_chr#.reshape((batchsize, max_sen_len, max_word_len)) layer_char_embed = EmbedIDLayer( rng, layer_char_embed_input, n_input=char_cnt, n_output=dim_char ) layer1_input = layer_char_embed.output.reshape( (batchsize*max_sen_len, 1, max_word_len, dim_char) ) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_char, 1, k_char, dim_char),# cl_charフィルタ数 image_shape=(batchsize*max_sen_len, 1, max_word_len, dim_char) ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_word_len-k_char+1, 1) ) layer_word_embed_input = x_wrd #.reshape((batchsize, max_sen_len)) layer_word_embed = EmbedIDLayer( rng, layer_word_embed_input, n_input=word_cnt, n_output=dim_word ) layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char)) layer3_input = T.concatenate( [layer3_word_input, layer3_char_input], axis=3 )#.reshape((batchsize, 1, max_sen_len, dim_word+cl_char)) layer3 = ConvolutionalLayer( rng, layer3_input, filter_shape=(cl_word, 1, k_word, dim_word + cl_char),#1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char), activation=activation ) layer4 = MaxPoolingLayer( layer3.output, poolsize=(max_sen_len-k_word+1, 1) ) layer5_input = layer4.output.reshape((batchsize, cl_word)) layer5 = FullyConnectedLayer( rng, dropout(rng, layer5_input, train), n_input=cl_word, n_output=50, activation=activation ) layer6_input = layer5.output layer6 = FullyConnectedLayer( rng, dropout(rng, layer6_input, train, p=0.1), n_input=50, n_output=2, activation=None ) result = Result(layer6.output, y) loss = result.negative_log_likelihood() accuracy = result.accuracy() params = layer6.params\ +layer5.params\ +layer3.params\ +layer_word_embed.params\ +layer1.params\ +layer_char_embed.params updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x_wrd: x_train_word[index*batchsize: (index+1)*batchsize], x_chr: x_train_char[index*batchsize: (index+1)*batchsize], y: y_train[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x_wrd: x_test_word[index*batchsize: (index+1)*batchsize], x_chr: x_test_char[index*batchsize: (index+1)*batchsize], y: y_test[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](0) } )
def BuildModel(modelSpecs, forTrain=True): rng = np.random.RandomState() ## x is for sequential features and y for matrix (or pairwise) features x = T.tensor3('x') y = T.tensor4('y') ## mask for x and y, respectively xmask = T.bmatrix('xmask') ymask = T.btensor3('ymask') xem = None ##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ): if config.EmbeddingUsed(modelSpecs): xem = T.tensor3('xem') ## bounding box for crop of a big protein distance matrix. This box allows crop at any position. box = None if forTrain: box = T.ivector('boundingbox') ## trainByRefLoss can be either 1 or -1. When this variable exists, we train the model using both reference loss and the loss of real data trainByRefLoss = None if forTrain and config.TrainByRefLoss(modelSpecs): trainByRefLoss = T.iscalar('trainByRefLoss') distancePredictor = ResNet4DistMatrix(rng, seqInput=x, matrixInput=y, mask_seq=xmask, mask_matrix=ymask, embedInput=xem, boundingbox=box, modelSpecs=modelSpecs) ## labelList is a list of label tensors, each having shape (batchSize, seqLen, seqLen) or (batchSize, seqLen, seqLen, valueDims[response] ) labelList = [] if forTrain: ## when this model is used for training. We need to define the label variable for response in modelSpecs['responses']: labelType = Response2LabelType(response) rValDims = GetResponseValueDims(response) if labelType.startswith('Discrete'): if rValDims > 1: ## if one response is a vector, then we use a 4-d tensor ## wtensor is for 16bit integer labelList.append(T.wtensor4('Tlabel4' + response)) else: labelList.append(T.wtensor3('Tlabel4' + response)) else: if rValDims > 1: labelList.append(T.tensor4('Tlabel4' + response)) else: labelList.append(T.tensor3('Tlabel4' + response)) ## weightList is a list of label weight tensors, each having shape (batchSize, seqLen, seqLen) weightList = [] if len(labelList) > 0 and config.UseSampleWeight(modelSpecs): weightList = [ T.tensor3('Tweight4' + response) for response in modelSpecs['responses'] ] ## for prediction, both labelList and weightList are empty if forTrain: return distancePredictor, x, y, xmask, ymask, xem, labelList, weightList, box, trainByRefLoss else: return distancePredictor, x, y, xmask, ymask, xem
def __init__(self, rng, batchsize=100, activation=relu): import loader (numsent, charcnt, wordcnt, maxwordlen, maxsenlen,\ kchr, kwrd, xchr, xwrd, y) = loader.read("tweets_clean.txt") dimword = 30 dimchar = 5 clword = 300 clchar = 50 kword = kwrd kchar = kchr datatrainword,\ datatestword,\ datatrainchar,\ datatestchar,\ targettrain,\ targettest\ = train_test_split(xwrd, xchr, y, random_state=1234, test_size=0.1) xtrainword = theano.shared(np.asarray(datatrainword, dtype='int16'), borrow=True) xtrainchar = theano.shared(np.asarray(datatrainchar, dtype='int16'), borrow=True) ytrain = theano.shared(np.asarray(targettrain, dtype='int8'), borrow=True) xtestword = theano.shared(np.asarray(datatestword, dtype='int16'), borrow=True) xtestchar = theano.shared(np.asarray(datatestchar, dtype='int16'), borrow=True) ytest = theano.shared(np.asarray(targettest, dtype='int8'), borrow=True) self.ntrainbatches = xtrainword.get_value( borrow=True).shape[0] / batchsize self.ntestbatches = xtestword.get_value( borrow=True).shape[0] / batchsize index = T.iscalar() xwrd = T.wmatrix('xwrd') xchr = T.wtensor3('xchr') y = T.bvector('y') train = T.iscalar('train') layercharembedinput = xchr layercharembed = EmbedIDLayer(rng, layercharembedinput, ninput=charcnt, noutput=dimchar) layer1input = layercharembed.output.reshape( (batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer1 = ConvolutionalLayer(rng, layer1input, filter_shape=(clchar, 1, kchar, dimchar), image_shape=(batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer2 = MaxPoolingLayer(layer1.output, poolsize=(maxwordlen - kchar + 1, 1)) layerwordembedinput = xwrd layerwordembed = EmbedIDLayer(rng, layerwordembedinput, ninput=wordcnt, noutput=dimword) layer3wordinput = layerwordembed.output.reshape( (batchsize, 1, maxsenlen, dimword)) layer3charinput = layer2.output.reshape( (batchsize, 1, maxsenlen, clchar)) layer3input = T.concatenate([layer3wordinput, layer3charinput], axis=3) layer3 = ConvolutionalLayer(rng, layer3input, filter_shape=(clword, 1, kword, dimword + clchar), image_shape=(batchsize, 1, maxsenlen, dimword + clchar), activation=activation) layer4 = MaxPoolingLayer(layer3.output, poolsize=(maxsenlen - kword + 1, 1)) layer5input = layer4.output.reshape((batchsize, clword)) layer5 = FullyConnectedLayer(rng, dropout(rng, layer5input, train), ninput=clword, noutput=50, activation=activation) layer6input = layer5.output layer6 = FullyConnectedLayer(rng, dropout(rng, layer6input, train, p=0.1), ninput=50, noutput=2, activation=None) result = Result(layer6.output, y) loss = result.negativeloglikelihood() accuracy = result.accuracy() params = layer6.params\ +layer5.params\ +layer3.params\ +layerwordembed.params\ +layer1.params\ +layercharembed.params updates = RMSprop(learningrate=0.001, params=params).updates(loss) self.trainmodel = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ xwrd: xtrainword[index * batchsize:(index + 1) * batchsize], xchr: xtrainchar[index * batchsize:(index + 1) * batchsize], y: ytrain[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.testmodel = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ xwrd: xtestword[index * batchsize:(index + 1) * batchsize], xchr: xtestchar[index * batchsize:(index + 1) * batchsize], y: ytest[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })