def evaluation_joint(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=700, test_num=400, noise_percent=10) brnn = FNNModel(time_step=12, feature_size=100) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=5, epochs=12) ev = brnn.evaluate(test_x, test_y, choose=0) # ev_b = brnn.evaluate(test_x, test_y, 1) # ev_f = brnn.evaluate(test_x, test_y, 2) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score'])) # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score'])) # model_path = ROOT_PATH + '\\fnn_11_30' brnn.save_weights(JOINT_100_PATH)
def evaluation_mbrnn_load_model(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=100, test_num=400, noise_percent=10, noise_type='shuffle') brnn = GRUModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) brnn.load_weights(BRNN_700_PATH) begin = 0 # process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) # train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100, # word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) # train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None # brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn(): test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, load_train=False, noise_percent=0) brnn = DynamicWeightHybridModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, brnn_model_path=BRNN_700_PATH, fnn_model_path=JOINT_100_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) ev, weights = brnn.evaluate(test_x, test_y, return_weight=True) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) brnn.save_weights(DW_HYBRID_600_PATH)
def evaluation_mbrnn_load_model(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=10) brnn = KnowledgeDistillModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, fnn_model_path=FNN_700_PATH) brnn.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) ev = brnn.evaluate(test_x, test_y) print('b-brnn:') template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) ev = brnn.evaluate(test_x, test_y, choose=1) print('fnn:') print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_hbrnn(): # from tensorflow.python.client import device_lib # print(device_lib.list_local_devices()) # set_gpu() train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3, train_num=50, test_num=400) h_brnn = HBRNN(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=1, hidden_vector_size=64, word_num=MAX_LEN) begin = 0 process_rnn_label_list(train_y, time_step=h_brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=h_brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='attention', time_step=h_brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='attention', time_step=h_brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) print(train_x.shape, train_y.shape) print(test_x.shape, test_y.shape) inputs, label_list = None, None # wbrnn = WBRNNLayer(rnn_utils=32, output_vector_size=32) # output_train, output_test = wbrnn(train_x), wbrnn(test_x) # print(output_train.shape, output_test.shape) h_brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = h_brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=3, train_num=600, test_num=400, noise_percent=10, noise_type='swap') brnn = GRUModel(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=2) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=5, epochs=15) ev = brnn.evaluate(test_x, test_y) # ev_b = brnn.evaluate(test_x, test_y, 1) # ev_f = brnn.evaluate(test_x, test_y, 2) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score'])) # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score'])) # model_path = ROOT_PATH + '\\B-BRNN-IMPROVED-BY-FEATURE-INTEGRATION' brnn.save_weights(BRNN_600_PATH)
def evaluation_load_model(): test_x, test_y = load_data_for_rnn_new_add_noise(load_train=False, data_set=3, train_num=700, test_num=400, noise_percent=10) brnn = FNNModel(time_step=12, feature_size=100) brnn.load_weights(JOINT_100_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32) ev = brnn.evaluate(test_x, test_y, choose=1) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
def evaluation_mbrnn(): train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3, train_num=500, test_num=400) brnn = GRUModel1(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=2) begin = 0 process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin) # 原地修改label_list,统一维度 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) # print(train_y) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant( train_y, dtype=tf.float32) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(train_x, train_y, batchsz=10, epochs=15) ev = brnn.evaluate(test_x, test_y) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) model_path = ROOT_PATH + '\\wbrnn_feature_block_attention_11_24_19_30' brnn.save_weights(model_path)
def test_model(): file_path = 'D:\\Download\\简历模板.docx' brnn_save = FNNModel(time_step=12, feature_size=100) brnn_save.load_weights(FNN_MODEL_PATH) one_resume = segment_one_resume_from_file(file_path) for module in one_resume: print(module) print('-------------------------------------------------------------------------') text_list = [one_resume] inputs = trans_to_wordvec_by_word2vec(text_list, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn_save.time_step) print(brnn_save.predict(inputs), brnn_save.predict(inputs, number2label=number2label))
def evaluation_mbrnn_best_params(): test_xs, test_ys = [], [] for i in range(0, 11): # 异常比例从0到100% test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, noise_percent=i, load_train=False) test_xs.append(test_x) test_ys.append(test_y) begin = 0 for i in range(len(test_ys)): process_rnn_label_list(test_ys[i], time_step=TIME_STEP, begin=begin) test_xs[i] = trans_to_wordvec_by_word2vec( test_xs[i], feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=TIME_STEP, begin=begin) test_xs[i], test_ys[i] = tf.constant( test_xs[i], dtype=tf.float32), tf.constant(test_ys[i], dtype=tf.float32) print('-----------------------------------------------') model = KnowledgeDistillModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) model.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH) res = 0 length = len(test_xs) for i, test_x in enumerate(test_xs): ev = model.evaluate(test_x, test_ys[i]) print(i * 10, '%:', ev['f1-score']) res += ev['f1-score'] res /= length print('brnn mean f1-score:', res) res = 0 for i, test_x in enumerate(test_xs): ev = model.evaluate(test_x, test_ys[i], choose=1) print(i * 10, '%:', ev['f1-score']) res += ev['f1-score'] res /= length print('fnn mean f1-score:', res)
def test_model(): file_path = 'D:\\Download\\简历模板.docx' brnn_save = DynamicWeightHybridModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) brnn_save.load_weights(DW_HYBRID_700_PATH) # one_resume = segment_one_resume_from_file(file_path) one_resume = [ """计算机中级 英语""", """ """, """姓 名:孙XX 性 别:男 出生年月:1992.07 籍 贯:广东湛江 身 高:170cm 政治面貌:团员 学 历:高技/专科 专 业:室内设计 手 机:13XXXXXXXX94 电子邮箱:[email protected] 在读院校:广州市XXXXXXXXX术学院 """, """ 计算机中级 英语 """, """ 深圳印刷玩具兼职开机员, 一味餐厅兼职后厨; 河源精雕装饰材料店兼职; 泰康人寿职员; """, """ 在学校担任班干部- “橘阳话剧社”社员; 加入“英语爱好者学会”成为了一名英语爱好者; """, """ 掌握WORD、EXCEL、POWERPOINT、AutoCAD、3ds max、 精通AutoCAD绘图与建模; 在大学期间,培养了我较强的组织能力和较强的责任心。课余时间一直在腾讯课堂增强专业知识,完善各个方面的能力。""" ] for module in one_resume: print(module) print( '-------------------------------------------------------------------------' ) text_list = [one_resume] inputs = trans_to_wordvec_by_word2vec( text_list, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn_save.time_step) print(brnn_save.predict(inputs), brnn_save.predict(inputs, number2label=number2label))
def evaluation_mbrnn1(): train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise( data_set=1, train_num=1, test_num=400) brnn = TeacherModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM, brnn_model_path=BBRNN_MODEL_PATH, fnn_model_path=FNN_MODEL_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=WORD2VEC_FEATURE_NUM, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) inputs, label_list = None, None brnn.fit(test_x, test_y, batchsz=10, epochs=10)
def segment_one_resume_list_format(txt_list, nn_model_w=None, word2vec_model=None): """ :param txt_list:DataFrame [ [txt,font1,font2,...,label,title], [txt,font1,font2,...,label,title], ...] :param title_list :title key list :return: [[key,txt], [key,txt],...] """"" length = len(txt_list) modules = [] key_loc = 0 last_key = 'base_info' one_module = '' if nn_model_w is None: nn_model_w = load_models(muti_textcnn_api_model_update2_path_zhwiki_corpus_word2vec) # nn_model_w.build((None, 350, 100)) nn_model_w.summary() if word2vec_model is None: word2vec_model = gensim.models.word2vec.Word2Vec.load(word2vec_model_path_zhwiki_rnn_update_20_923) for i in range(length): if txt_list.iloc[i, :]['Label'] == 0: one_module += '\n' + txt_list.iloc[i, :]['Text'] else: text_array = trans_to_wordvec_by_word2vec([one_module], feature_size=WORD2VEC_FEATURE_NUM, type='cnn', max_len=MAX_LEN, word2vec_model=word2vec_model) # 转为词向量 res = nn_model_w.predict(text_array) # print(res[0]) res_list = list(res[0]) # print(res_list) tag = number2label[res_list.index(max(res[0]))] # print(tag) # print(one_module[1:]) modules.append([tag, one_module[1:]]) one_module = '' key_loc = i last_key = txt_list.iloc[i, :]['Title'] if key_loc != length-1: modules.append([last_key, one_module[1:]]) return modules
def test_model(): file_path = 'D:\\Download\\简历模板.docx' model_path = ROOT_PATH + '\\wbrnn_feature_h_brnn_固定投票权重_11_24_15_05' brnn_save = GRUModel(time_step=12, feature_size=100, rnn_utils=64, rnn_layers_num=1) brnn_save.load_weights(model_path) one_resume = segment_one_resume_from_file(file_path) for module in one_resume: print(module) print( '-------------------------------------------------------------------------' ) text_list = [one_resume] inputs = trans_to_wordvec_by_word2vec( text_list, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn_save.time_step) print(brnn_save.predict(inputs), brnn_save.predict(inputs, number2label=number2label))
def test_svm(): tests1 = [100, 200, 300, 400, 500, 600, 700] tests = [5, 10, 20, 50, 60, 70, 100] for train_num in tests: train_x, train_y, test_x, test_y = load_data_for_single_muti_classification( data_set=3, train_num=train_num, test_num=400) print(len(train_x), len(train_y)) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='full') test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='full') # print(train_y) # train_x, train_y = np.array(train_x), np.array(train_y) # print(np.isnan(train_x).all()) model = SVC() # model = GradientBoostingClassifier() model.fit(train_x, train_y) pre = model.predict(test_x) total = len(test_y) trues = 0 total_p, total_p_t = 0, 0 total_r, total_r_p = 0, 0 for i in range(total): if pre[i] == test_y[i]: trues += 1 acc = trues / total # print('accuracy is:', acc) pre_c = [[], [], [], [], [], [], [], [], [], []] # 每一个类别对应一个P recall_c = [[], [], [], [], [], [], [], [], [], []] # 每一个类别对应一个R p_arr = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] c_arr = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for i in range(total): recall_c[test_y[i]].append(i) pre_c[pre[i]].append(i) weight_r = [len(recall_c[i]) / total for i in range(len(recall_c))] # print(weight_r) for i in range(len(pre_c)): total_p = len(pre_c[i]) total_r = len(recall_c[i]) total_p_t = 0 # print('numer', str(i), 'pre and actual:', total_p, total_r) for ele in pre_c[i]: if test_y[ele] == i: total_p_t += 1 if total_p != 0: p_arr[i] = total_p_t / total_p * weight_r[i] precision = sum(p_arr) # print('precision is:', precision) # print(p_arr) for i in range(len(recall_c)): total_r = len(recall_c[i]) total_r_p = 0 for ele in recall_c[i]: if pre[ele] == i: total_r_p += 1 if total_r != 0: c_arr[i] = total_r_p / total_r * weight_r[i] recall = sum(c_arr) # print('recall is:', recall) # print(c_arr) print('') print('') print('') print('sklearn-precision-score:', precision_score(test_y, pre, average='weighted')) print('sklearn-recall-score:', recall_score(test_y, pre, average='weighted')) print('sklearn-f1-score:', f1_score(test_y, pre, average='weighted')) print('') print('') print('')
def evaluation_mbrnn_load_model(): test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=0, load_train=False) test_x_noise, test_y_noise = load_data_for_rnn_new_add_noise( data_set=3, train_num=1, test_num=400, noise_type='shuffle', noise_percent=10, load_train=False) brnn = DynamicWeightHybridModel(time_step=TIME_STEP, feature_size=WORD2VEC_FEATURE_NUM, rnn_utils=RNN_UTILS, rnn_layers_num=RNN_LAYERS_NUM) brnn.load_weights(DW_HYBRID_600_PATH) begin = 0 process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin) test_x = trans_to_wordvec_by_word2vec( test_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant( test_y, dtype=tf.float32) process_rnn_label_list(test_y_noise, time_step=brnn.time_step, begin=begin) test_x_noise = trans_to_wordvec_by_word2vec( test_x_noise, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin) test_x_noise, test_y_noise = tf.constant( test_x_noise, dtype=tf.float32), tf.constant(test_y_noise, dtype=tf.float32) ev_noise, weight_noise = brnn.evaluate(test_x_noise, test_y_noise, return_weight=True) ev, weight = brnn.evaluate(test_x, test_y, return_weight=True) template = 'test data precision:{}, recall:{}, f1-score:{}' print(template.format(ev['precision'], ev['recall'], ev['f1-score'])) print( template.format(ev_noise['precision'], ev_noise['recall'], ev_noise['f1-score'])) x = [i for i in range(0, 400)] # colors = [] # for i in range(400): # colors.append('r') # for i in range(400, 800): # colors.append('g') # plt.scatter(x, tf.concat([weight, weight_noise], axis=-1), c=colors, s=20) # plt.scatter(x, weight_noise, c=['g'], s=20) plt.scatter(x, weight_noise, c="r", alpha=0.5, label="abnormal test set", s=15, marker='^') # 第二个散点图,颜色为蓝色,透明度50%,图例为散点图2 plt.scatter(x, weight, c="g", alpha=0.5, label="normal test set", s=15, marker='*') plt.xlabel('index of resume sample') plt.ylabel('total weight value of each time step ') plt.legend(loc='best') plt.title('weight value distribution') plt.savefig(ROOT_PATH + '\\lspd_weight_distribution.pdf') plt.show()
# coding=utf-8 """ @File : text_clustering.py @Author: Xu Qiqiang @Date : 2020/11/11 0011 """ from load_resume_data import load_data_for_single_muti_classification from feature_engineer import trans_to_wordvec_by_word2vec from special_string import * from sklearn.cluster import KMeans from sklearn.metrics import adjusted_mutual_info_score, silhouette_score from sklearn.decomposition import PCA if __name__ == '__main__': train_x, train_y, test_x, test_y = load_data_for_single_muti_classification( data_set=1, train_num=100, test_num=10) train_x = trans_to_wordvec_by_word2vec( train_x, feature_size=100, word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='full') # pca = PCA(n_components=10) # train_x = pca.fit_transform(train_x) model = KMeans(n_clusters=10) model.fit(train_x) res = model.predict(train_x) # print(train_y) # print(adjusted_mutual_info_score(train_y, res)) print(silhouette_score(train_x, res))