dataname = 'cckscner.user.data.SensitiV' user_datafile = "./model/model_data/" + dataname + ".pkl" batch_size = 8 data_split = 1 retrain = False Test = True valid = False Label = True if not os.path.exists(user_datafile): print("Process data....") get_data(trainfile=trainfile, testfile=testfile, w2v_file=word2v_file, c2v_file=char2v_file, base_datafile=base_datafile, user_datafile=user_datafile, w2v_k=300, c2v_k=100, data_split=data_split, maxlen=50) print("data has extisted: " + user_datafile) print('loading base data ...') char_vob, target_vob, \ idex_2char, idex_2target, \ char_W, \ char_k, \ max_s = pickle.load(open(base_datafile, 'rb')) print('loading user data ...') train, train_SensitiV, train_label,\ test, test_SensitiV, test_label = pickle.load(open(user_datafile, 'rb')) trainx_char = np.asarray(train, dtype="int32")
testfile = "./data/test.txt" char2v_file = "./data/CCKS18CNER_Char2Vec.txt" word2v_file = "./data/CCKS18CNER_Word2Vec.txt" datafile = "./data/model/data5.pkl" modelfile = "./data/model/model_char_word_CRF51.h5" resultdir = "./data/result/" batch_size = 32 retrain = True Test = True valid = False Label = True if not os.path.exists(datafile): print("Precess data....") get_data(trainfile=trainfile, testfile=testfile, w2v_file=word2v_file, char2v_file=char2v_file, datafile=datafile, w2v_k=100, char_emd_dim=100, maxlen=50) if not os.path.exists(modelfile): print("Lstm data has extisted: " + datafile) print("Training EE model....") print(modelfile) train_e2e_model(modelname, datafile, modelfile, resultdir, npochos=100, hidden_dim=200, batch_size=batch_size, retrain=False) else: if retrain: print("ReTraining EE model....") train_e2e_model(modelname, datafile, modelfile, resultdir, npochos=100, hidden_dim=200, batch_size=batch_size, retrain=retrain) if Test: print("test EE model....")
# word2v_file = "" # dataname = 'MSRA.data_WSUI.2' # datafile = "./data/model_data/" + dataname + ".pkl" # batch_size = 32 # # ------------------2 retrain = False Test = True valid = False Label = True if not os.path.exists(datafile): print("Process data....") get_data(trainfile=trainfile, testfile=testfile, w2v_file='', c2v_file=char2v_file, datafile=datafile, w2v_k=300, c2v_k=100, maxlen=50) print("data has extisted: " + datafile) print('loading data ...') train, train_label, test, test_label,\ train_posi, test_posi, train_word, test_word,\ char_vob, target_vob, posi_vob, word_vob,\ idex_2char, idex_2target,\ character_W, posi_W, word_W,\ character_k, posi_k, word_k, max_s = pickle.load(open(datafile, 'rb')) trainx_char = np.asarray(train, dtype="int32") trainy = np.asarray(train_label, dtype="int32")