def evaluation_mbrnn(): test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, load_train=False, noise_percent=0) brnn = DynamicWeightHybridModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, brnn_model_path=BRNN_700_PATH, fnn_model_path=JOINT_100_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) ev, weights = brnn.evaluate(test_x, test_y, return_weight=True) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) brnn.save_weights(DW_HYBRID_600_PATH)
def evaluation_mbrnn_load_model(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=100, test_num=400, noise_percent=10, noise_type='shuffle') brnn = GRUModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) brnn.load_weights(BRNN_700_PATH) begin = 0 # process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) # train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100, # word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) # train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None # brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn_load_model(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=10) brnn = KnowledgeDistillModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, fnn_model_path=FNN_700_PATH) brnn.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) ev = brnn.evaluate(test_x, test_y) print('b-brnn:') template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) ev = brnn.evaluate(test_x, test_y, choose=1) print('fnn:') print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_joint(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=700, test_num=400, noise_percent=10) brnn = FNNModel(time_step=12, feature_size=100) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=5, epochs=12) ev = brnn.evaluate(test_x, test_y, choose=0) # ev_b = brnn.evaluate(test_x, test_y, 1) # ev_f = brnn.evaluate(test_x, test_y, 2) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score'])) # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score'])) # model_path = ROOT_PATH + '\\fnn_11_30' brnn.save_weights(JOINT_100_PATH)
def evaluation_load_model(): test_x, test_y = load_data_for_rnn_new_add_noise(load_train=False, data_set=3, train_num=700, test_num=400, noise_percent=10) brnn = FNNModel(time_step=12, feature_size=100) brnn.load_weights(JOINT_100_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32) ev = brnn.evaluate(test_x, test_y, choose=1) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn_best_params(): test_xs, test_ys = [], [] for i in range(0, 11): # 异常比例从0到100% test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, noise_percent=i, load_train=False) test_xs.append(test_x) test_ys.append(test_y) begin = 0 for i in range(len(test_ys)): process_rnn_label_list(test_ys[i], time_step=TIME_STEP, begin=begin) test_xs[i] = trans_to_wordvec_by_word2vec( test_xs[i], feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=TIME_STEP, begin=begin) test_xs[i], test_ys[i] = tf.constant( test_xs[i], dtype=tf.float32), tf.constant(test_ys[i], dtype=tf.float32) print('-----------------------------------------------') model = KnowledgeDistillModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) model.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH) res = 0 length = len(test_xs) for i, test_x in enumerate(test_xs): ev = model.evaluate(test_x, test_ys[i]) print(i * 10, '%:', ev['f1-score']) res += ev['f1-score'] res /= length print('brnn mean f1-score:', res) res = 0 for i, test_x in enumerate(test_xs): ev = model.evaluate(test_x, test_ys[i], choose=1) print(i * 10, '%:', ev['f1-score']) res += ev['f1-score'] res /= length print('fnn mean f1-score:', res)
def evaluation_hbrnn(): # from tensorflow.python.client import device_lib # print(device_lib.list_local_devices()) # set_gpu() train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3, train_num=50, test_num=400) h_brnn = HBRNN(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=1, hidden_vector_size=64, word_num=MAX_LEN) begin = 0 process_rnn_label_list(train_y, time_step=h_brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=h_brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='attention', time_step=h_brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='attention', time_step=h_brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) print(train_x.shape, train_y.shape) print(test_x.shape, test_y.shape) inputs, label_list = None, None # wbrnn = WBRNNLayer(rnn_utils=32, output_vector_size=32) # output_train, output_test = wbrnn(train_x), wbrnn(test_x) # print(output_train.shape, output_test.shape) h_brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = h_brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=600, test_num=400, noise_percent=10, noise_type='swap') brnn = GRUModel(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=2) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=5, epochs=15) ev = brnn.evaluate(test_x, test_y) # ev_b = brnn.evaluate(test_x, test_y, 1) # ev_f = brnn.evaluate(test_x, test_y, 2) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score'])) # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score'])) # model_path = ROOT_PATH + '\\B-BRNN-IMPROVED-BY-FEATURE-INTEGRATION' brnn.save_weights(BRNN_600_PATH)
def evaluation_mbrnn1(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=1, train_num=1, test_num=400) brnn = TeacherModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, brnn_model_path=BBRNN_MODEL_PATH, fnn_model_path=FNN_MODEL_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(test_x, test_y, batchsz=10, epochs=10)
def evaluation_mbrnn(): train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3, train_num=500, test_num=400) brnn = GRUModel1(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=2) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) model_path = ROOT_PATH + '\\wbrnn_feature_block_attention_11_24_19_30' brnn.save_weights(model_path)
def evaluation_mbrnn_load_model(): test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=0, load_train=False) test_x_noise, test_y_noise = load_data_for_rnn_new_add_noise( data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=10, load_train=False) brnn = DynamicWeightHybridModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) brnn.load_weights(DW_HYBRID_600_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) process_rnn_label_list(test_y_noise, time_step=brnn.time_step, begin=begin) test_x_noise = trans_to_wordvec_by_word2vec( test_x_noise, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x_noise, test_y_noise = tf.constant( test_x_noise, dtype=tf.float32), tf.constant(test_y_noise, dtype=tf.float32) ev_noise, weight_noise = brnn.evaluate(test_x_noise, test_y_noise, return_weight=True) ev, weight = brnn.evaluate(test_x, test_y, return_weight=True) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) print( template.format(ev_noise['precision'], ev_noise['recall'], ev_noise['f1-score'])) x = [i for i in range(0, 400)] # colors = [] # for i in range(400): # colors.append('r') # for i in range(400, 800): # colors.append('g') # plt.scatter(x, tf.concat([weight, weight_noise], axis=-1), c=colors, s=20) # plt.scatter(x, weight_noise, c=['g'], s=20) plt.scatter(x, weight_noise, c="r", alpha=0.5, label="abnormal test set", s=15, marker='^') # 第二个散点图,颜色为蓝色,透明度50%,图例为散点图2 plt.scatter(x, weight, c="g", alpha=0.5, label="normal test set", s=15, marker='*') plt.xlabel('index of resume sample') plt.ylabel('total weight value of each time step ') plt.legend(loc='best') plt.title('weight value distribution') plt.savefig(ROOT_PATH + '\\lspd_weight_distribution.pdf') plt.show()