def __init__(self, n_voc, trainset, testset,dataname, classes, prefix): if prefix != None: prefix += '/' self.trainset = trainset self.testset = testset docs = T.imatrix() label = T.ivector() length = T.fvector() sentencenum = T.fvector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() isTrain = T.iscalar() rng = numpy.random layers = [] layers.append(EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix)) layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, length)) layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum)) layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, sentencenum)) layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix)) layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax)) self.layers = layers cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') err = T.argmax(layers[-1].output, axis=1) - label mse = T.sum(err * err) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[1:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=cost, updates=updates, ) self.test_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=[correct, mse], )
def __init__(self, n_voc, trainset, testset, em_path, classes, prefix): if prefix is not None: prefix += '/' self.trainset = trainset self.testset = testset docs = T.imatrix() label = T.ivector() length = T.fvector() sentencenum = T.fvector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() isTrain = T.iscalar() rng = numpy.random layers = [] layers.append( EmbLayer(em_path, rng, docs, n_voc, 200, "emblayer", prefix)) layers.append( LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, length)) layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum)) layers.append( LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, sentencenum)) layers.append( HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix)) # 保存这层的输出 是numpy.ndarray类型 feature_vector = layers[-1].output layers.append( HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax)) # softmax这层输出是 n行2维向量(2分类),表示每个句子属于每个分类的概率,之和为1 predict_probability = layers[-1].output self.layers = layers cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') err = T.argmax(layers[-1].output, axis=1) - label mse = T.sum(err * err) # 返回预测的各个文本的类别(0和1) predict_label = T.argmax(layers[-1].output, axis=1) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[1:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function(inputs=[ docs, label, length, sentencenum, wordmask, sentencemask, maxsentencenum ], outputs=[cost, feature_vector], updates=updates, mode='FAST_RUN') self.test_model = theano.function(inputs=[ docs, label, length, sentencenum, wordmask, sentencemask, maxsentencenum ], outputs=[ correct, mse, predict_label, feature_vector, predict_probability ], mode='FAST_RUN')
def __init__(self, n_voc, n_usr, n_prd, trainset, testset, dataname, classes, prefix): if prefix != None: prefix += '/' self.trainset = trainset self.testset = testset docs = T.imatrix() label = T.ivector() usr = T.ivector() prd = T.ivector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() isTrain = T.iscalar() rng = numpy.random layers = [] docsemb = EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix) Uemb = UsrEmbLayer(rng, n_usr, 200, 'usremblayer', prefix) Pemb = PrdEmbLayer(rng, n_prd, 200, 'prdemblayer', prefix) layers.append(docsemb) layers.append(Uemb) layers.append(Pemb) layers.append(LSTMLayer(rng, docsemb.output, wordmask, 200, 200, 'wordlstmlayer', prefix)) uemb_sentence = GetuEmbLayer(usr, Uemb.output, maxsentencenum, 'uemb_sentence', prefix) pemb_sentence = GetpEmbLayer(prd, Pemb.output, maxsentencenum, 'pemb_sentence', prefix) layers.append(AttentionLayer(rng, layers[-1].output, uemb_sentence.output, pemb_sentence.output, wordmask, 200,200,200,200, 'wordattentionLayer', prefix)) layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum, prefix)) layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix)) uemb_doc = GetuEmbLayer(usr, Uemb.output, maxsentencenum, 'uemb_doc', prefix) pemb_doc = GetpEmbLayer(prd, Pemb.output, maxsentencenum, 'pemb_doc', prefix) layers.append(AttentionLayer(rng, layers[-1].output, uemb_doc.output, pemb_doc.output, sentencemask, 200,200,200,200, 'sentenceattentionLayer', prefix)) layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix)) layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax)) self.layers = layers cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') err = T.argmax(layers[-1].output, axis=1) - label mse = T.sum(err * err) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[3:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function( inputs=[docs, label, usr, prd, wordmask,sentencemask,maxsentencenum], outputs=cost, updates=updates, ) self.test_model = theano.function( inputs=[docs, label, usr, prd, wordmask,sentencemask,maxsentencenum], outputs=[correct, mse], )
def __init__(self, n_voc, trainset, testset, dataname, classes, prefix): if prefix != None: prefix += '/' self.trainset = trainset self.testset = testset self.classes = int(classes) docs = T.imatrix() label = T.imatrix() length = T.fvector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() sentencenum = T.fvector() isTrain = T.iscalar() rng = numpy.random # layers = [] # layers.append(EmbLayer(rng, docs, n_voc, 50, 'emblayer', dataname, prefix)) # layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 50, 50, 'wordlstmlayer', prefix)) # layers.append(SimpleAttentionLayer(rng, layers[-1].output, wordmask,50, 50, 'wordattentionlayer', prefix)) # layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum,prefix)) # layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 50, 50, 'sentencelstmlayer', prefix)) # layers.append(SimpleAttentionLayer(rng, layers[-1].output, sentencemask,50, 50, 'sentenceattentionlayer', prefix)) # layers.append(HiddenLayer(rng, layers[-1].output, 50, 50, 'fulllayer', prefix)) # layers.append(HiddenLayer(rng, layers[-1].output, 50, int(classes), 'softmaxlayer', prefix, activation=T.nnet.sigmoid)) # self.layers = layers layers = [] layers.append( EmbLayer(rng, docs, n_voc, 50, 'emblayer', dataname, prefix)) layers.append( LSTMLayer(rng, layers[-1].output, wordmask, 50, 50, 'wordlstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, length)) layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum)) layers.append( LSTMLayer(rng, layers[-1].output, sentencemask, 50, 50, 'sentencelstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, sentencenum)) layers.append( HiddenLayer(rng, layers[-1].output, 50, 50, 'fulllayer', prefix)) layers.append( HiddenLayer(rng, layers[-1].output, 50, int(classes), 'softmaxlayer', prefix, activation=T.nnet.sigmoid)) self.layers = layers predict = layers[-1].output cost = T.nnet.binary_crossentropy(layers[-1].output, label).sum(1) cost = cost.mean() # modifu corrrect. # predicted_value = ((layers[-1].output) >= EVALUTION_THRESHOLD_FOR_MULTI_LABEL).astype(int) # predicted_value = predicted_value.astype(bool) # true_value = label.astype(bool) # equal = true_value == predicted_value # match = np.sum(equal, axis=1) == np.size(equal, axis=1) # # value 1 match_ratio # exact_match_ratio = np.sum(match) / np.size(match) # true_and_predict = np.sum(true_value & predicted_value, axis=1) # true_or_predict = np.sum(true_value | predicted_value, axis=1) # # value 2 accuracy # accuracy = np.mean(true_and_predict / true_or_predict) # # value 3 pression # precison = np.mean(true_and_predict / (np.sum(predicted_value, axis=1) + 1e-9)) # # recall 4 recall # recall = np.mean(true_and_predict / np.sum(true_value, axis=1)) # # f1_Measure # F1_Measure = np.mean((true_and_predict * 2) / (np.sum(true_value, axis=1) + np.sum(predicted_value, axis=1))) # # HammingLoss # HammingLoss = np.mean(true_value ^ total_predicted_value) # TP # TP = np.sum(true_value & predicted_value,axis=0,dtype=np.int32) # FP = np.sum((~true_value) & predicted_value,axis=0,dtype=np.int32) # FN = np.sum(true_value & (~predicted_value),axis=0,dtype=np.int32) # _P = np.sum(TP) / (np.sum(TP) + np.sum(FP) + 1e-9 ) # _R = np.sum(TP) / (np.sum(TP) + np.sum(FN) + 1e-9 ) # Micro_F1 = (2 * _P *_R) / (_P + _R) # _P_t = TP / (TP + FP + 1e-9) # _R_t = TP / (TP + FN + 1e-9) # Macro_F1 = np.mean((2 * _P_t * _R_t) / (_P_t + _R_t + 1e-9)) #cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') #modify this #correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') #err = T.argmax(layers[-1].output, axis=1) - label #mse = T.sum(err * err) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[1:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function( inputs=[ docs, label, length, sentencenum, wordmask, sentencemask, maxsentencenum ], outputs=cost, updates=updates, ) self.test_model = theano.function( inputs=[ docs, length, sentencenum, wordmask, sentencemask, maxsentencenum ], outputs=predict, )