def predict_test(sess, model): """Test on the valid data.""" time0 = time.time() predict_labels_list = list() # 所有的预测结果 predict_score20_list = list() # 预测排名前20的分数 predict_labels_list2 = list() #前五名的结果 marked_labels_list = list() topic_num = list() predict_scores = list() for i in tqdm(xrange(n_te_batches)): [X1_batch, X2_batch, y_batch] = get_batch_t(i) marked_labels_list.extend(y_batch)#真实标签结果 没-1 y_batch = to_categorical(y_batch) _batch_size = len(X1_batch) fetches = [model.y_pred]#每个类别的分数 feed_dict = {model.X1_inputs: X1_batch, model.X2_inputs: X2_batch, model.batch_size: _batch_size, model.tst: True, model.keep_prob: 1.0} predict_labels = sess.run(fetches, feed_dict)[0] predict_labels = softmax(predict_labels)#128 predict_scores.append(predict_labels)#每个类别的分数 predict_top5score = map(lambda label: np.sort(label,axis=-1)[-1:-6:-1], predict_labels) # 取最大的5个分数 128 index = map(findindex,predict_top5score)#list 128 #print (index,'index.type:',type(index),'len.index',len(index)) predict_toplabels = list() for i in range(len(index)): if index[i] == None: toplabel = predict_labels[i].argsort()[-1:-6:-1] elif index[i] == 0: toplabel = predict_labels[i].argsort()[-1:-2:-1] else: toplabel = predict_labels[i].argsort()[-1:-1*index[i]-1:-1] predict_toplabels.append(toplabel) predict_labels_list.extend(predict_toplabels) #print('predict_toplabels:',predict_toplabels,type(predict_toplabels),len(predict_toplabels)) predict_label_and_marked_label_list = zip(predict_labels_list, marked_labels_list)#都-1了 不知道为啥 print (predict_label_and_marked_label_list[0:2]) #(array([ 15, 327, 307, 478, 10]), [8, 15, 307, 0]),真实是[9, 16, 308, 1] precision, recall, f1 = score_eval(predict_label_and_marked_label_list)#计算分数 print('Local test p=%g, r=%g, f1=%g' % (precision, recall, f1)) predict_scores = np.vstack(np.asarray(predict_scores)) print('predict_scores:',predict_scores.shape) local_scores_name = local_scores_path + model_name + '_test.npy' np.save(local_scores_name, predict_scores)#保存每个类别的分数 print('local_scores.shape=', predict_scores.shape) print('Writed the test scores into %s, time %g s' % (local_scores_name, time.time() - time0))
def predict_dev(sess, model): """Test on the valid data.""" time0 = time.time() predict_labels_list = list() # 所有的预测结果 predict_score20_list = list() # 预测排名前20的分数 predict_labels_list2 = list() #前五名的结果 marked_labels_list = list() topic_num = list() predict_scores = list() for i in tqdm(xrange(n_va_batches)):#验证集 [X1_batch, X2_batch, y_batch] = get_batch(i) X1_length, X2_length = get_sequence_length(X1_batch, X2_batch) marked_labels_list.extend(y_batch)#真实标签结果 没-1 y_batch = to_categorical(y_batch) _batch_size = len(X1_batch) fetches = [model.y_pred]#每个类别的分数 feed_dict = {model.X1_inputs: X1_batch, model.X2_inputs: X2_batch, model.batch_size: _batch_size, model.X1_length: X1_length, model.X2_length: X2_length, model.tst: True, model.keep_prob: 1.0} predict_labels = sess.run(fetches, feed_dict)[0] predict_labels = softmax(predict_labels)#128 predict_scores.append(predict_labels)#每个类别的分数 predict_top5score = map(lambda label: np.sort(label,axis=-1)[-1:-6:-1], predict_labels) # 取最大的5个分数 128 #predict_top20score = map(lambda label: np.sort(label,axis=-1)[-1:-21:-1], predict_labels) # 取最大的20个分数 128 #print (type(predict_score20_list)) #print (type(predict_top20score)) #predict_score20_list.extend(predict_top20score) #所有 #list,predict_score_list1[0]=[ 0.63514245 0.09193601 0.0417341 0.02742104 0.02721145] index = map(findindex,predict_top5score)#list 128 #print (index,'index.type:',type(index),'len.index',len(index)) predict_toplabels = list() for i in range(len(index)): if index[i] == None: toplabel = predict_labels[i].argsort()[-1:-6:-1] elif index[i] == 0: toplabel = predict_labels[i].argsort()[-1:-2:-1] else: toplabel = predict_labels[i].argsort()[-1:-1*index[i]-1:-1] predict_toplabels.append(toplabel) predict_labels_list.extend(predict_toplabels) #print('predict_toplabels:',predict_toplabels,type(predict_toplabels),len(predict_toplabels)) #predict_top5labels = map(lambda label: label.argsort()[-1:-6:-1], predict_labels) # 取最大的5个下标 #predict_labels_list2.extend(predict_top5labels) #predict_labels_list2.to_csv('predict_labels_list2.csv') #predict_score20_list = DataFrame(predict_score20_list) #predict_labels_list2 = DataFrame(predict_labels_list2) #predict_score20_list.to_csv('score20list.csv') #predict_labels_list2.to_csv('predict_labels_list2.csv') #topic_num = map(tolen,marked_labels_list) #topic_num = DataFrame(topic_num) #topic_num.to_csv('topic_num.csv') predict_label_and_marked_label_list = zip(predict_labels_list, marked_labels_list)#都-1了 不知道为啥 print (predict_label_and_marked_label_list[0:2]) #(array([ 15, 327, 307, 478, 10]), [8, 15, 307, 0]),真实是[9, 16, 308, 1] precision, recall, f1 = score_eval(predict_label_and_marked_label_list)#计算分数 print('Local valid p=%g, r=%g, f1=%g' % (precision, recall, f1)) predict_scores = np.vstack(np.asarray(predict_scores)) print('predict_scores:',predict_scores.shape) local_scores_name = local_scores_path + model_name + '_dev.npy' np.save(local_scores_name, predict_scores)#保存每个类别的分数 print('local_scores.shape=', predict_scores.shape) print('Writed the dev scores into %s, time %g s' % (local_scores_name, time.time() - time0))