Esempio n. 1
0
 def evaluation_joint():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=700, test_num=400, noise_percent=10)
     brnn = FNNModel(time_step=12, feature_size=100)
     begin = 0
     process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn',
             time_step=brnn.time_step, begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(train_x, train_y, batchsz=5, epochs=12)
     ev = brnn.evaluate(test_x, test_y, choose=0)
     # ev_b = brnn.evaluate(test_x, test_y, 1)
     # ev_f = brnn.evaluate(test_x, test_y, 2)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score']))
     # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score']))
     # model_path = ROOT_PATH + '\\fnn_11_30'
     brnn.save_weights(JOINT_100_PATH)
Esempio n. 2
0
 def evaluation_mbrnn_load_model():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=3,
         train_num=100,
         test_num=400,
         noise_percent=10,
         noise_type='shuffle')
     brnn = GRUModel(time_step=TIME_STEP,
                     feature_size=WORD2VEC_FEATURE_NUM,
                     rnn_utils=RNN_UTILS,
                     rnn_layers_num=RNN_LAYERS_NUM)
     brnn.load_weights(BRNN_700_PATH)
     begin = 0
     # process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     # train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100,
     #         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     # train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     # brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Esempio n. 3
0
 def evaluation_mbrnn():
     test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                      train_num=1,
                                                      test_num=400,
                                                      load_train=False,
                                                      noise_percent=0)
     brnn = DynamicWeightHybridModel(time_step=TIME_STEP,
                                     feature_size=WORD2VEC_FEATURE_NUM,
                                     rnn_utils=RNN_UTILS,
                                     rnn_layers_num=RNN_LAYERS_NUM,
                                     brnn_model_path=BRNN_700_PATH,
                                     fnn_model_path=JOINT_100_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=WORD2VEC_FEATURE_NUM,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     ev, weights = brnn.evaluate(test_x, test_y, return_weight=True)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     brnn.save_weights(DW_HYBRID_600_PATH)
Esempio n. 4
0
 def evaluation_mbrnn_load_model():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=3,
         train_num=1,
         test_num=400,
         noise_type='shuffle',
         noise_percent=10)
     brnn = KnowledgeDistillModel(time_step=TIME_STEP,
                                  feature_size=WORD2VEC_FEATURE_NUM,
                                  rnn_utils=RNN_UTILS,
                                  rnn_layers_num=RNN_LAYERS_NUM,
                                  fnn_model_path=FNN_700_PATH)
     brnn.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     ev = brnn.evaluate(test_x, test_y)
     print('b-brnn:')
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     ev = brnn.evaluate(test_x, test_y, choose=1)
     print('fnn:')
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Esempio n. 5
0
 def evaluation_hbrnn():
     # from tensorflow.python.client import device_lib
     # print(device_lib.list_local_devices())
     # set_gpu()
     train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3,
                                                              train_num=50,
                                                              test_num=400)
     h_brnn = HBRNN(time_step=12,
                    feature_size=100,
                    rnn_utils=64,
                    rnn_layers_num=1,
                    hidden_vector_size=64,
                    word_num=MAX_LEN)
     begin = 0
     process_rnn_label_list(train_y,
                            time_step=h_brnn.time_step,
                            begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=h_brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(
         train_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='attention',
         time_step=h_brnn.time_step,
         begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='attention',
         time_step=h_brnn.time_step,
         begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
         train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     print(train_x.shape, train_y.shape)
     print(test_x.shape, test_y.shape)
     inputs, label_list = None, None
     # wbrnn = WBRNNLayer(rnn_utils=32, output_vector_size=32)
     # output_train, output_test = wbrnn(train_x), wbrnn(test_x)
     # print(output_train.shape, output_test.shape)
     h_brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = h_brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Esempio n. 6
0
    def evaluation_mbrnn():
        train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
            data_set=3,
            train_num=600,
            test_num=400,
            noise_percent=10,
            noise_type='swap')
        brnn = GRUModel(time_step=12,
                        feature_size=100,
                        rnn_utils=64,
                        rnn_layers_num=2)
        begin = 0

        process_rnn_label_list(train_y, time_step=brnn.time_step,
                               begin=begin)  # 原地修改label_list,统一维度
        process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
        # print(train_y)
        train_x = trans_to_wordvec_by_word2vec(
            train_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x = trans_to_wordvec_by_word2vec(
            test_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
            train_y, dtype=tf.float32)
        test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
            test_y, dtype=tf.float32)
        inputs, label_list = None, None
        brnn.fit(train_x, train_y, batchsz=5, epochs=15)
        ev = brnn.evaluate(test_x, test_y)
        # ev_b = brnn.evaluate(test_x, test_y, 1)
        # ev_f = brnn.evaluate(test_x, test_y, 2)
        template = 'test data precision:{}, recall:{}, f1-score:{}'
        print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
        # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score']))
        # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score']))
        # model_path = ROOT_PATH + '\\B-BRNN-IMPROVED-BY-FEATURE-INTEGRATION'
        brnn.save_weights(BRNN_600_PATH)
Esempio n. 7
0
 def evaluation_load_model():
     test_x, test_y = load_data_for_rnn_new_add_noise(load_train=False, data_set=3, train_num=700, test_num=400, noise_percent=10)
     brnn = FNNModel(time_step=12, feature_size=100)
     brnn.load_weights(JOINT_100_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn',
             time_step=brnn.time_step, begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32)
     ev = brnn.evaluate(test_x, test_y, choose=1)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Esempio n. 8
0
 def evaluation_mbrnn():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3,
                                                              train_num=500,
                                                              test_num=400)
     brnn = GRUModel1(time_step=12,
                      feature_size=100,
                      rnn_utils=64,
                      rnn_layers_num=2)
     begin = 0
     process_rnn_label_list(train_y, time_step=brnn.time_step,
                            begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(
         train_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
         train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     model_path = ROOT_PATH + '\\wbrnn_feature_block_attention_11_24_19_30'
     brnn.save_weights(model_path)
Esempio n. 9
0
    def test_model():
        file_path = 'D:\\Download\\简历模板.docx'
        brnn_save = FNNModel(time_step=12, feature_size=100)
        brnn_save.load_weights(FNN_MODEL_PATH)
        one_resume = segment_one_resume_from_file(file_path)
        for module in one_resume:
            print(module)
            print('-------------------------------------------------------------------------')
        text_list = [one_resume]
        inputs = trans_to_wordvec_by_word2vec(text_list, feature_size=100,
                                              word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn',
                                              time_step=brnn_save.time_step)

        print(brnn_save.predict(inputs), brnn_save.predict(inputs, number2label=number2label))
Esempio n. 10
0
    def evaluation_mbrnn_best_params():
        test_xs, test_ys = [], []
        for i in range(0, 11):  # 异常比例从0到100%
            test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                             train_num=1,
                                                             test_num=400,
                                                             noise_percent=i,
                                                             load_train=False)
            test_xs.append(test_x)
            test_ys.append(test_y)
        begin = 0
        for i in range(len(test_ys)):
            process_rnn_label_list(test_ys[i],
                                   time_step=TIME_STEP,
                                   begin=begin)
            test_xs[i] = trans_to_wordvec_by_word2vec(
                test_xs[i],
                feature_size=WORD2VEC_FEATURE_NUM,
                word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
                type='rnn',
                time_step=TIME_STEP,
                begin=begin)
            test_xs[i], test_ys[i] = tf.constant(
                test_xs[i], dtype=tf.float32), tf.constant(test_ys[i],
                                                           dtype=tf.float32)
        print('-----------------------------------------------')
        model = KnowledgeDistillModel(time_step=TIME_STEP,
                                      feature_size=WORD2VEC_FEATURE_NUM,
                                      rnn_utils=RNN_UTILS,
                                      rnn_layers_num=RNN_LAYERS_NUM)
        model.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH)
        res = 0
        length = len(test_xs)
        for i, test_x in enumerate(test_xs):
            ev = model.evaluate(test_x, test_ys[i])
            print(i * 10, '%:', ev['f1-score'])
            res += ev['f1-score']

        res /= length
        print('brnn mean f1-score:', res)
        res = 0
        for i, test_x in enumerate(test_xs):
            ev = model.evaluate(test_x, test_ys[i], choose=1)
            print(i * 10, '%:', ev['f1-score'])
            res += ev['f1-score']

        res /= length
        print('fnn mean f1-score:', res)
Esempio n. 11
0
    def test_model():
        file_path = 'D:\\Download\\简历模板.docx'
        brnn_save = DynamicWeightHybridModel(time_step=TIME_STEP,
                                             feature_size=WORD2VEC_FEATURE_NUM,
                                             rnn_utils=RNN_UTILS,
                                             rnn_layers_num=RNN_LAYERS_NUM)
        brnn_save.load_weights(DW_HYBRID_700_PATH)
        # one_resume = segment_one_resume_from_file(file_path)
        one_resume = [
            """计算机中级 英语""", """ """,
            """姓 名:孙XX           性 别:男           出生年月:1992.07
                        籍 贯:广东湛江         身 高:170cm        政治面貌:团员
                        学 历:高技/专科        专 业:室内设计
                        手 机:13XXXXXXXX94
                        电子邮箱:[email protected]
                        在读院校:广州市XXXXXXXXX术学院
                        """, """
                        计算机中级      英语
                        """, """
                        深圳印刷玩具兼职开机员,
                        一味餐厅兼职后厨;
                        河源精雕装饰材料店兼职;
                        泰康人寿职员;
                        """, """
                        在学校担任班干部-
                        “橘阳话剧社”社员;
                        加入“英语爱好者学会”成为了一名英语爱好者;
                        """, """                      
                        掌握WORD、EXCEL、POWERPOINT、AutoCAD、3ds max、
                        精通AutoCAD绘图与建模;
                        在大学期间,培养了我较强的组织能力和较强的责任心。课余时间一直在腾讯课堂增强专业知识,完善各个方面的能力。"""
        ]
        for module in one_resume:
            print(module)
            print(
                '-------------------------------------------------------------------------'
            )
        text_list = [one_resume]
        inputs = trans_to_wordvec_by_word2vec(
            text_list,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn_save.time_step)

        print(brnn_save.predict(inputs),
              brnn_save.predict(inputs, number2label=number2label))
 def evaluation_mbrnn1():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=1, train_num=1, test_num=400)
     brnn = TeacherModel(time_step=TIME_STEP,
                         feature_size=WORD2VEC_FEATURE_NUM,
                         rnn_utils=RNN_UTILS,
                         rnn_layers_num=RNN_LAYERS_NUM,
                         brnn_model_path=BBRNN_MODEL_PATH,
                         fnn_model_path=FNN_MODEL_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=WORD2VEC_FEATURE_NUM,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(test_x, test_y, batchsz=10, epochs=10)
Esempio n. 13
0
def segment_one_resume_list_format(txt_list, nn_model_w=None, word2vec_model=None):
    """
    
    :param txt_list:DataFrame [ [txt,font1,font2,...,label,title],   [txt,font1,font2,...,label,title],  ...]
    :param title_list :title key list
    :return: [[key,txt], [key,txt],...]
    """""
    length = len(txt_list)
    modules = []
    key_loc = 0
    last_key = 'base_info'
    one_module = ''
    if nn_model_w is None:
        nn_model_w = load_models(muti_textcnn_api_model_update2_path_zhwiki_corpus_word2vec)
    # nn_model_w.build((None, 350, 100))
        nn_model_w.summary()
    if word2vec_model is None:
        word2vec_model = gensim.models.word2vec.Word2Vec.load(word2vec_model_path_zhwiki_rnn_update_20_923)
    for i in range(length):
        if txt_list.iloc[i, :]['Label'] == 0:
            one_module += '\n' + txt_list.iloc[i, :]['Text']
        else:
            text_array = trans_to_wordvec_by_word2vec([one_module], feature_size=WORD2VEC_FEATURE_NUM, type='cnn', max_len=MAX_LEN,
                                                      word2vec_model=word2vec_model)  # 转为词向量
            res = nn_model_w.predict(text_array)
            # print(res[0])
            res_list = list(res[0])
            # print(res_list)
            tag = number2label[res_list.index(max(res[0]))]
            # print(tag)
            # print(one_module[1:])
            modules.append([tag, one_module[1:]])
            one_module = ''
            key_loc = i
            last_key = txt_list.iloc[i, :]['Title']
    if key_loc != length-1:
        modules.append([last_key, one_module[1:]])
    return modules
Esempio n. 14
0
    def test_model():
        file_path = 'D:\\Download\\简历模板.docx'
        model_path = ROOT_PATH + '\\wbrnn_feature_h_brnn_固定投票权重_11_24_15_05'
        brnn_save = GRUModel(time_step=12,
                             feature_size=100,
                             rnn_utils=64,
                             rnn_layers_num=1)
        brnn_save.load_weights(model_path)
        one_resume = segment_one_resume_from_file(file_path)
        for module in one_resume:
            print(module)
            print(
                '-------------------------------------------------------------------------'
            )
        text_list = [one_resume]
        inputs = trans_to_wordvec_by_word2vec(
            text_list,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn_save.time_step)

        print(brnn_save.predict(inputs),
              brnn_save.predict(inputs, number2label=number2label))
Esempio n. 15
0
    def test_svm():
        tests1 = [100, 200, 300, 400, 500, 600, 700]
        tests = [5, 10, 20, 50, 60, 70, 100]
        for train_num in tests:
            train_x, train_y, test_x, test_y = load_data_for_single_muti_classification(
                data_set=3, train_num=train_num, test_num=400)
            print(len(train_x), len(train_y))
            train_x = trans_to_wordvec_by_word2vec(
                train_x,
                feature_size=100,
                word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
                type='full')
            test_x = trans_to_wordvec_by_word2vec(
                test_x,
                feature_size=100,
                word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
                type='full')
            # print(train_y)
            # train_x, train_y = np.array(train_x), np.array(train_y)
            # print(np.isnan(train_x).all())
            model = SVC()
            # model = GradientBoostingClassifier()
            model.fit(train_x, train_y)

            pre = model.predict(test_x)
            total = len(test_y)
            trues = 0
            total_p, total_p_t = 0, 0
            total_r, total_r_p = 0, 0
            for i in range(total):
                if pre[i] == test_y[i]:
                    trues += 1
            acc = trues / total
            # print('accuracy is:', acc)
            pre_c = [[], [], [], [], [], [], [], [], [], []]  # 每一个类别对应一个P
            recall_c = [[], [], [], [], [], [], [], [], [], []]  # 每一个类别对应一个R
            p_arr = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            c_arr = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            for i in range(total):
                recall_c[test_y[i]].append(i)
                pre_c[pre[i]].append(i)

            weight_r = [len(recall_c[i]) / total for i in range(len(recall_c))]
            # print(weight_r)
            for i in range(len(pre_c)):
                total_p = len(pre_c[i])
                total_r = len(recall_c[i])
                total_p_t = 0
                # print('numer', str(i), 'pre and actual:', total_p, total_r)
                for ele in pre_c[i]:
                    if test_y[ele] == i:
                        total_p_t += 1
                if total_p != 0:
                    p_arr[i] = total_p_t / total_p * weight_r[i]
            precision = sum(p_arr)
            # print('precision is:', precision)
            # print(p_arr)
            for i in range(len(recall_c)):
                total_r = len(recall_c[i])
                total_r_p = 0
                for ele in recall_c[i]:
                    if pre[ele] == i:
                        total_r_p += 1
                if total_r != 0:
                    c_arr[i] = total_r_p / total_r * weight_r[i]
            recall = sum(c_arr)
            # print('recall is:', recall)
            # print(c_arr)
            print('')
            print('')
            print('')
            print('sklearn-precision-score:',
                  precision_score(test_y, pre, average='weighted'))
            print('sklearn-recall-score:',
                  recall_score(test_y, pre, average='weighted'))
            print('sklearn-f1-score:', f1_score(test_y,
                                                pre,
                                                average='weighted'))
            print('')
            print('')
            print('')
Esempio n. 16
0
    def evaluation_mbrnn_load_model():
        test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                         train_num=1,
                                                         test_num=400,
                                                         noise_type='shuffle',
                                                         noise_percent=0,
                                                         load_train=False)
        test_x_noise, test_y_noise = load_data_for_rnn_new_add_noise(
            data_set=3,
            train_num=1,
            test_num=400,
            noise_type='shuffle',
            noise_percent=10,
            load_train=False)
        brnn = DynamicWeightHybridModel(time_step=TIME_STEP,
                                        feature_size=WORD2VEC_FEATURE_NUM,
                                        rnn_utils=RNN_UTILS,
                                        rnn_layers_num=RNN_LAYERS_NUM)
        brnn.load_weights(DW_HYBRID_600_PATH)
        begin = 0
        process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
        test_x = trans_to_wordvec_by_word2vec(
            test_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
            test_y, dtype=tf.float32)

        process_rnn_label_list(test_y_noise,
                               time_step=brnn.time_step,
                               begin=begin)
        test_x_noise = trans_to_wordvec_by_word2vec(
            test_x_noise,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x_noise, test_y_noise = tf.constant(
            test_x_noise, dtype=tf.float32), tf.constant(test_y_noise,
                                                         dtype=tf.float32)
        ev_noise, weight_noise = brnn.evaluate(test_x_noise,
                                               test_y_noise,
                                               return_weight=True)
        ev, weight = brnn.evaluate(test_x, test_y, return_weight=True)
        template = 'test data precision:{}, recall:{}, f1-score:{}'
        print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
        print(
            template.format(ev_noise['precision'], ev_noise['recall'],
                            ev_noise['f1-score']))
        x = [i for i in range(0, 400)]
        # colors = []
        # for i in range(400):
        #     colors.append('r')
        # for i in range(400, 800):
        #     colors.append('g')
        # plt.scatter(x, tf.concat([weight, weight_noise], axis=-1), c=colors, s=20)
        # plt.scatter(x, weight_noise, c=['g'], s=20)
        plt.scatter(x,
                    weight_noise,
                    c="r",
                    alpha=0.5,
                    label="abnormal test set",
                    s=15,
                    marker='^')
        # 第二个散点图,颜色为蓝色,透明度50%,图例为散点图2
        plt.scatter(x,
                    weight,
                    c="g",
                    alpha=0.5,
                    label="normal test set",
                    s=15,
                    marker='*')
        plt.xlabel('index of resume sample')
        plt.ylabel('total weight value of each time step ')
        plt.legend(loc='best')
        plt.title('weight value distribution')
        plt.savefig(ROOT_PATH + '\\lspd_weight_distribution.pdf')
        plt.show()
Esempio n. 17
0
# coding=utf-8
"""
@File  : text_clustering.py
@Author: Xu Qiqiang
@Date  : 2020/11/11 0011
"""
from load_resume_data import load_data_for_single_muti_classification
from feature_engineer import trans_to_wordvec_by_word2vec
from special_string import *
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_mutual_info_score, silhouette_score
from sklearn.decomposition import PCA

if __name__ == '__main__':
    train_x, train_y, test_x, test_y = load_data_for_single_muti_classification(
        data_set=1, train_num=100, test_num=10)
    train_x = trans_to_wordvec_by_word2vec(
        train_x,
        feature_size=100,
        word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
        type='full')
    # pca = PCA(n_components=10)
    # train_x = pca.fit_transform(train_x)
    model = KMeans(n_clusters=10)
    model.fit(train_x)
    res = model.predict(train_x)
    # print(train_y)
    # print(adjusted_mutual_info_score(train_y, res))
    print(silhouette_score(train_x, res))