def predict(): word_weights, tag_weights = load_embedding() word_voc, tag_voc, label_voc = load_voc() # train data sentences, tags, labels = load_train_data(word_voc, tag_voc, label_voc) seed = 137 np.random.seed(seed) np.random.shuffle(sentences) np.random.seed(seed) np.random.shuffle(tags) np.random.seed(seed) np.random.shuffle(labels) # load data sentences_test, tags_test = load_test_data(word_voc, tag_voc, label_voc) labels_test = None # clear reslut command = 'rm ./Data/result/*' os.popen(command) # 划分训练、开发、测试集 kf = KFold(n_splits=config.KFOLD) train_indices, dev_indices = [], [] for train_index, dev_index in kf.split(labels): train_indices.append(train_index) dev_indices.append(dev_index) for num in range(config.KFOLD): train_index, dev_index = train_indices[num], dev_indices[num] sentences_train, sentences_dev = sentences[train_index], sentences[dev_index] tags_train, tags_dev = tags[train_index], tags[dev_index] labels_train, labels_dev = labels[train_index], labels[dev_index] # init model model = DCModel( config.MAX_LEN, word_weights, tag_weights, result_path='./Data/result/result.txt', label_voc=label_voc) # fit model model.fit( sentences_train, tags_train, labels_train, sentences_dev, tags_dev, labels_dev, sentences_test, tags_test, labels_test, config.BATCH_SIZE, config.NB_EPOCH, keep_prob=config.KEEP_PROB, word_keep_prob=config.WORD_KEEP_PROB, tag_keep_prob=config.TAG_KEEP_PROB) print(model.get_best_score()) [p_test, r_test, f_test], nb_epoch = model.get_best_score() command = 'cp ./Data/result/epoch_%d.csv ./Data/result/best_%d' % (nb_epoch+1, num) print(command) os.popen(command) print(p_test, r_test, f_test, '\n') # evaluate # result_path_k = result_path % k # p_test, r_test, f_test = model.evaluate(sentences_test, tags_test, positions_test, # labels_test, simple_compute=False, ignore_label=IGNORE_LABEL, # label_voc=relation_voc, result_path=result_path_k) # clear model model.clear_model() del model
from load_data import load_embedding, load_voc, load_train_data, load_test_data import time from generator import BatchGenerator from TFNN.layers.EmbeddingLayer import Embedding from sklearn.model_selection import KFold from triggerType_to_trigger import get_trigger ''' For Chinese word segmentation. ''' #############################1.load data ###################################### class_type = 3 training_count = 16796 test_count = 2570 word_weights, tag_weights = load_embedding() #矩阵形式 word_voc, tag_voc, label_voc = load_voc() #字典形式 sentences, tags, labels = load_train_data(word_voc, tag_voc, label_voc, class_type, training_count) Xend_sentence, Xend_tag_test, yend_test = load_test_data( word_voc, tag_voc, label_voc, class_type, test_count) #划分训练集,测试集(这里的y为词性tag kf = KFold(n_splits=10) train_indices, dev_indices = [], [] for train_index, dev_index in kf.split(labels): train_indices.append(train_index) dev_indices.append(dev_index) for num in range(10): train_index, dev_index = train_indices[num], dev_indices[num] sentences_train, sentences_dev = sentences[train_index], sentences[