コード例 #1
0
    def test1(self):
        json_ltp = demjson.decode_file("./" + "ltp_0_test.json")
        data_ltp = json_ltp["data"]

        number_of_phrase = len(data_ltp)

        for j in range(3):
            ltp_entities_info = nerstat.find_entity_info(data_ltp[j], "ltp")
            entities = ltp_entities_info["entity"]
            ner_tags = ltp_entities_info["entity_type"]
            entity_unicode_lens = ltp_entities_info["entity_unicode_len"]
            startposes = ltp_entities_info["startpos"]
            endposes = ltp_entities_info["endpos"]
            num = len(entities)
            for i in range(num):
                print entities[i].decode("utf-8")
                print ner_tags[i]
                print startposes[i], endposes[i], entity_unicode_lens[i]
コード例 #2
0
ファイル: test_nerstat.py プロジェクト: jiyzhang/NER
    def test2(self):

        jsonObject = {}
        jsonObjArray = []

        # create phrase_id <--> doc_id dictionary for look up doc_id by phrase_id
        # for boson NER
        myDocDic = nerstat.setupDoc_ID_Dic()

        # 文件个数
        for i in range(1):  # there is only 1 file for boson
            # for i in range(22):
            print i
            json_corenlp = demjson.decode_file("./" + "corenlp_" + str(i) +
                                               "_test.json",
                                               encoding="utf8")
            json_ltp = demjson.decode_file("./" + "ltp_" + str(i) +
                                           "_test.json",
                                           encoding="utf8")
            json_hanlp = demjson.decode_file("./" + "hanlp_" + str(i) +
                                             "_test.json",
                                             encoding="utf8")
            json_fnlp = demjson.decode_file("./" + "fnlp_" + str(i) +
                                            "_test.json",
                                            encoding="utf8")
            json_fool = demjson.decode_file("./" + "foolnltk_" + str(i) +
                                            "_test.json",
                                            encoding="utf8")

            # data array
            data_corenlp = json_corenlp["data"]
            data_ltp = json_ltp["data"]
            data_hanlp = json_hanlp["data"]
            data_fnlp = json_fnlp["data"]
            data_fool = json_fool["data"]

            number_of_phrase = len(data_ltp)

            for j in range(1):
                # for j in range(3):
                # 合并organization/personal/location, 获取entities
                corenlp_entities_info = nerstat.find_entity_info(
                    data_corenlp[j], "corenlp")
                ltp_entities_info = nerstat.find_entity_info(
                    data_ltp[j], "ltp")
                hanlp_entities_info = nerstat.find_entity_info(
                    data_hanlp[j], "hanlp")
                fnlp_entities_info = nerstat.find_entity_info(
                    data_fnlp[j], "fnlp")
                fool_entities_info = nerstat.find_foolnltk_entity(data_fool[j])

                # ltp: 1, corenlp: 2, hanlp 4, 根据sum来得知两两是否相同

                # 实体名、实体长度、实体起始位置
                # 1. 先比较三者的实体个数是否相等

                # 2. 实际比较
                # 2.1 PERSON 个数,有几个相同
                # 2.2 LOCATION 个数,有几个相同
                # 2.3 ORGANIZATION个数,有几个相同

                # 通过set的 &, in, not in来处理

                phrase_id = corenlp_entities_info["phrase_id"]

                # ----------------------------------------------------------
                doc_id, sentence = myDocDic[phrase_id]
                boson_entities_info = nerstat.find_boson_entity(
                    doc_id, phrase_id)
                # ----------------------------------------------------------

                # update space info
                spaceinfo = self.getspaceinfo(sentence)
                # ltp_entities_info   = self.updateentityoffset(spaceinfo, ltp_entities_info)
                # fnlp_entities_info  = self.updateentityoffset(spaceinfo, fnlp_entities_info)
                # boson_entities_info = self.updateentityoffset(spaceinfo, boson_entities_info)
                self.updateentityoffset(spaceinfo, ltp_entities_info)
                self.updateentityoffset(spaceinfo, fnlp_entities_info)
                self.updateentityoffset(spaceinfo, boson_entities_info)

                print "test verification 0"

                self.printentityinfo("corenlp", corenlp_entities_info)
                self.printentityinfo("ltp", ltp_entities_info)
                self.printentityinfo("hanlp", hanlp_entities_info)
                self.printentityinfo("fnlp", fnlp_entities_info)
                self.printentityinfo("fool", fool_entities_info)
                self.printentityinfo("boson", boson_entities_info)

                np_entity_corenlp = np.array(corenlp_entities_info["entity"])
                np_entity_ltp = np.array(ltp_entities_info["entity"])
                np_entity_hanlp = np.array(hanlp_entities_info["entity"])
                np_entity_fnlp = np.array(fnlp_entities_info["entity"])
                np_entity_fool = np.array(fool_entities_info["entity"])
                np_entity_boson = np.array(boson_entities_info["entity"])

                np_type_corenlp = np.array(
                    corenlp_entities_info["entity_type"])
                np_type_ltp = np.array(ltp_entities_info["entity_type"])
                np_type_hanlp = np.array(hanlp_entities_info["entity_type"])
                np_type_fnlp = np.array(fnlp_entities_info["entity_type"])
                np_type_fool = np.array(fool_entities_info["entity_type"])
                np_type_boson = np.array(boson_entities_info["entity_type"])

                # for the overlap of entity between corenlp, ltp and hanlp
                np_startpos_corenlp = np.array(
                    corenlp_entities_info["startpos"])
                np_startpos_ltp = np.array(ltp_entities_info["startpos"])
                np_startpos_hanlp = np.array(hanlp_entities_info["startpos"])
                np_startpos_fnlp = np.array(fnlp_entities_info["startpos"])
                np_startpos_fool = np.array(fool_entities_info["startpos"])
                np_startpos_boson = np.array(boson_entities_info["startpos"])

                np_endpos_corenlp = np.array(corenlp_entities_info["endpos"])
                np_endpos_ltp = np.array(ltp_entities_info["endpos"])
                np_endpos_hanlp = np.array(hanlp_entities_info["endpos"])
                np_endpos_fnlp = np.array(fnlp_entities_info["endpos"])
                np_endpos_fool = np.array(fool_entities_info["endpos"])
                np_endpos_boson = np.array(boson_entities_info["endpos"])

                np_entitylen_corenlp = np.array(
                    corenlp_entities_info["entity_unicode_len"])
                np_entitylen_ltp = np.array(
                    ltp_entities_info["entity_unicode_len"])
                np_entitylen_hanlp = np.array(
                    hanlp_entities_info["entity_unicode_len"])
                np_entitylen_fnlp = np.array(
                    fnlp_entities_info["entity_unicode_len"])
                np_entitylen_fool = np.array(
                    fool_entities_info["entity_unicode_len"])
                np_entitylen_boson = np.array(
                    boson_entities_info["entity_unicode_len"])

                print "first verification"
                print "-------------corenlp----------------"
                self.printnp("   np_entity_corenlp: ", np_entity_corenlp)
                self.printnp("     np_type_corenlp: ", np_type_corenlp)
                self.printnp(" np_startpos_corenlp: ", np_startpos_corenlp)
                self.printnp("   np_endpos_corenlp: ", np_endpos_corenlp)
                self.printnp("np_entitylen_corenlp: ", np_entitylen_corenlp)

                print "-------------ltp----------------"
                self.printnp("   np_entity_ltp: ", np_entity_ltp)
                self.printnp("     np_type_ltp: ", np_type_ltp)
                self.printnp(" np_startpos_ltp: ", np_startpos_ltp)
                self.printnp("   np_endpos_ltp: ", np_endpos_ltp)
                self.printnp("np_entitylen_ltp: ", np_entitylen_ltp)

                print "-------------hanlp----------------"
                self.printnp("   np_entity_hanlp: ", np_entity_hanlp)
                self.printnp("     np_type_hanlp: ", np_type_hanlp)
                self.printnp(" np_startpos_hanlp: ", np_startpos_hanlp)
                self.printnp("   np_endpos_hanlp: ", np_endpos_hanlp)
                self.printnp("np_entitylen_hanlp: ", np_entitylen_hanlp)

                print "-------------fnlp----------------"
                self.printnp("   np_entity_fnlp: ", np_entity_fnlp)
                self.printnp("     np_type_fnlp: ", np_type_fnlp)
                self.printnp(" np_startpos_fnlp: ", np_startpos_fnlp)
                self.printnp("   np_endpos_fnlp: ", np_endpos_fnlp)
                self.printnp("np_entitylen_fnlp: ", np_entitylen_fnlp)

                print "-------------fool----------------"
                self.printnp("   np_entity_fool: ", np_entity_fool)
                self.printnp("     np_type_fool: ", np_type_fool)
                self.printnp(" np_startpos_fool: ", np_startpos_fool)
                self.printnp("   np_endpos_fool: ", np_endpos_fool)
                self.printnp("np_entitylen_fool: ", np_entitylen_fool)

                print "-------------boson----------------"
                self.printnp("   np_entity_boson: ", np_entity_boson)
                self.printnp("     np_type_boson: ", np_type_boson)
                self.printnp(" np_startpos_boson: ", np_startpos_boson)
                self.printnp("   np_endpos_boson: ", np_endpos_boson)
                self.printnp("np_entitylen_boson: ", np_entitylen_boson)

                subJsonObject = {}
                subJsonObject["phrase_id"] = phrase_id

                # 统计每个NER中PERSON、LOCATION, ORGANIZATION的个数

                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                c_person_indexes = nerstat.find_all_index(
                    np_type_corenlp, "PERSON")
                c_location_indexes = nerstat.find_all_index(
                    np_type_corenlp, "LOCATION")
                c_organization_indexes = nerstat.find_all_index(
                    np_type_corenlp, "ORGANIZATION")

                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                l_person_indexes = nerstat.find_all_index(
                    np_type_ltp, "PERSON")
                l_location_indexes = nerstat.find_all_index(
                    np_type_ltp, "LOCATION")
                l_organization_indexes = nerstat.find_all_index(
                    np_type_ltp, "ORGANIZATION")

                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                h_person_indexes = nerstat.find_all_index(
                    np_type_hanlp, "PERSON")
                h_location_indexes = nerstat.find_all_index(
                    np_type_hanlp, "LOCATION")
                h_organization_indexes = nerstat.find_all_index(
                    np_type_hanlp, "ORGANIZATION")

                # FNLP
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                f_person_indexes = nerstat.find_all_index(
                    np_type_fnlp, "PERSON")
                f_location_indexes = nerstat.find_all_index(
                    np_type_fnlp, "LOCATION")
                f_organization_indexes = nerstat.find_all_index(
                    np_type_fnlp, "ORGANIZATION")

                # foolnltk
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                o_person_indexes = nerstat.find_all_index(
                    np_type_fool, "PERSON")
                o_location_indexes = nerstat.find_all_index(
                    np_type_fool, "LOCATION")
                o_organization_indexes = nerstat.find_all_index(
                    np_type_fool, "ORGANIZATION")

                # boson_ner
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                b_person_indexes = nerstat.find_all_index(
                    np_type_boson, "PERSON")
                b_location_indexes = nerstat.find_all_index(
                    np_type_boson, "LOCATION")
                b_organization_indexes = nerstat.find_all_index(
                    np_type_boson, "ORGANIZATION")

                print "second verification"

                print "c_person_indexes: ", c_person_indexes
                print "c_organization_indexes: ", c_organization_indexes

                print "l_person_indexes: ", l_person_indexes
                print "l_organization_indexes: ", l_organization_indexes

                print "h_person_indexes: ", h_person_indexes
                print "h_organization_indexes: ", h_organization_indexes

                print "f_person_indexes: ", f_person_indexes
                print "f_organization_indexes: ", f_organization_indexes

                print "o_person_indexes: ", o_person_indexes
                print "o_organization_indexes: ", o_organization_indexes

                print "b_person_indexes: ", b_person_indexes
                print "b_organization_indexes: ", b_organization_indexes

                corenlp_amount_stat = [
                    len(c_person_indexes),
                    len(c_location_indexes),
                    len(c_organization_indexes)
                ]
                ltp_amount_stat = [
                    len(l_person_indexes),
                    len(l_location_indexes),
                    len(l_organization_indexes)
                ]
                hanlp_amount_stat = [
                    len(h_person_indexes),
                    len(h_location_indexes),
                    len(h_organization_indexes)
                ]
                fnlp_amount_stat = [
                    len(f_person_indexes),
                    len(f_location_indexes),
                    len(f_organization_indexes)
                ]
                fool_amount_stat = [
                    len(o_person_indexes),
                    len(o_location_indexes),
                    len(o_organization_indexes)
                ]
                boson_amount_stat = [
                    len(b_person_indexes),
                    len(b_location_indexes),
                    len(b_organization_indexes)
                ]

                # ------------------------------------------------
                subJsonObject["corenlp"] = corenlp_amount_stat
                subJsonObject["ltp"] = ltp_amount_stat
                subJsonObject["hanlp"] = hanlp_amount_stat
                subJsonObject["fnlp"] = fnlp_amount_stat
                subJsonObject["fool"] = fool_amount_stat
                subJsonObject["boson"] = boson_amount_stat
                # ------------------------------------------------

                # ## 2018-01-31 set operation deleted
                # for overlap inforamtion

                bc_overlapped, bc_matched = nerstat.find_overlaps(
                    boson_entities_info, b_person_indexes, b_location_indexes,
                    b_organization_indexes, corenlp_entities_info,
                    c_person_indexes, c_location_indexes,
                    c_organization_indexes)

                bl_overlapped, bl_matched = nerstat.find_overlaps(
                    boson_entities_info, b_person_indexes, b_location_indexes,
                    b_organization_indexes, ltp_entities_info,
                    l_person_indexes, l_location_indexes,
                    l_organization_indexes)

                bh_overlapped, bh_matched = nerstat.find_overlaps(
                    boson_entities_info, b_person_indexes, b_location_indexes,
                    b_organization_indexes, hanlp_entities_info,
                    h_person_indexes, h_location_indexes,
                    h_organization_indexes)

                bf_overlapped, bf_matched = nerstat.find_overlaps(
                    boson_entities_info, b_person_indexes, b_location_indexes,
                    b_organization_indexes, fnlp_entities_info,
                    f_person_indexes, f_location_indexes,
                    f_organization_indexes)

                bo_overlapped, bo_matched = nerstat.find_overlaps(
                    boson_entities_info, b_person_indexes, b_location_indexes,
                    b_organization_indexes, fool_entities_info,
                    o_person_indexes, o_location_indexes,
                    o_organization_indexes)

                # ------------------------------------------------
                subJsonObject["bc"] = bc_matched
                subJsonObject["bl"] = bl_matched
                subJsonObject["bh"] = bh_matched
                subJsonObject["bf"] = bf_matched
                subJsonObject["bo"] = bo_matched
                # ------------------------------------------------
                # ------------------------------------------------
                subJsonObject["bc_overlapped"] = bc_overlapped
                subJsonObject["bl_overlapped"] = bl_overlapped
                subJsonObject["bh_overlapped"] = bh_overlapped
                subJsonObject["bf_overlapped"] = bf_overlapped
                subJsonObject["bo_overlapped"] = bo_overlapped
                # subJsonObject["clh"] = clh
                # ------------------------------------------------

                print "verification 3"

                print "bc_matched", bc_matched
                print "bl_matched", bl_matched
                print "bh_matched", bh_matched
                print "bf_matched", bf_matched
                print "bo_matched", bo_matched

                print "bc_overlapped", bc_overlapped
                print "bl_overlapped", bl_overlapped
                print "bh_overlapped", bh_overlapped
                print "bf_overlapped", bf_overlapped
                print "bo_overlapped", bo_overlapped

                jsonObjArray.append(subJsonObject)

            jsonObject["stats"] = jsonObjArray
            #
            print "*" * 20
            print "writing to : test_o_" + str(i) + ".json"

            # demjson.encode_to_file("./" + str(i) + ".json", encoding="utf-8")

            with open("test_o_" + str(i) + ".json", "w") as fp:
                json.dump(jsonObject, fp)
            print "done."
コード例 #3
0
ファイル: find_Entity_Diff.py プロジェクト: jiyzhang/NER
    def test1(self):
        # jsonObject = {}
        # jsonObjArray = []

        # create phrase_id <--> doc_id dictionary for look up doc_id by phrase_id
        # for boson NER
        myDocDic, myTextDic = setupDic()

        # 文件个数
        for i in range(1):  # there is only 1 file for boson
            # for i in range(22):
            print i

            json_corenlp = demjson.decode_file("./" + "corenlp_" + str(i) +
                                               ".json")
            json_ltp = demjson.decode_file("./" + "ltp_" + str(i) + ".json")
            json_hanlp = demjson.decode_file("./" + "hanlp_" + str(i) +
                                             ".json")
            json_fnlp = demjson.decode_file("./" + "fnlp_" + str(i) + ".json")
            json_fool = demjson.decode_file("./" + "foolnltk_" + str(i) +
                                            ".json")

            # data array
            data_corenlp = json_corenlp["data"]
            data_ltp = json_ltp["data"]
            data_hanlp = json_hanlp["data"]
            data_fnlp = json_fnlp["data"]
            data_fool = json_fool["data"]

            number_of_phrase = len(data_corenlp)

            for j in range(100):
                #for j in range(number_of_phrase):
                # for j in range(3):
                # 合并organization/personal/location, 获取entities
                print "No. " + str(j)
                corenlp_entities_info = nerstat.find_entity_info(
                    data_corenlp[j], "corenlp")
                ltp_entities_info = nerstat.find_entity_info(
                    data_ltp[j], "ltp")
                hanlp_entities_info = nerstat.find_entity_info(
                    data_hanlp[j], "hanlp")
                fnlp_entities_info = nerstat.find_entity_info(
                    data_fnlp[j], "fnlp")
                fool_entities_info = nerstat.find_foolnltk_entity(data_fool[j])

                # ltp: 1, corenlp: 2, hanlp 4, 根据sum来得知两两是否相同

                # 实体名、实体长度、实体起始位置
                # 1. 先比较三者的实体个数是否相等

                # 2. 实际比较
                # 2.1 PERSON 个数,有几个相同
                # 2.2 LOCATION 个数,有几个相同
                # 2.3 ORGANIZATION个数,有几个相同

                # 通过set的 &, in, not in来处理

                phrase_id = corenlp_entities_info["phrase_id"]

                # ----------------------------------------------------------
                doc_id = myDocDic[phrase_id]
                phrase = myTextDic[phrase_id]
                boson_entities_info = nerstat.find_boson_entity(
                    doc_id, phrase_id)
                # ----------------------------------------------------------

                np_entity_corenlp = np.array(corenlp_entities_info["entity"])
                np_entity_ltp = np.array(ltp_entities_info["entity"])
                np_entity_hanlp = np.array(hanlp_entities_info["entity"])
                np_entity_fnlp = np.array(fnlp_entities_info["entity"])
                np_entity_fool = np.array(fool_entities_info["entity"])
                np_entity_boson = np.array(boson_entities_info["entity"])

                np_type_corenlp = np.array(
                    corenlp_entities_info["entity_type"])
                np_type_ltp = np.array(ltp_entities_info["entity_type"])
                np_type_hanlp = np.array(hanlp_entities_info["entity_type"])
                np_type_fnlp = np.array(fnlp_entities_info["entity_type"])
                np_type_fool = np.array(fool_entities_info["entity_type"])
                np_type_boson = np.array(boson_entities_info["entity_type"])

                #phrase = "".join(np_entity_corenlp)

                #
                # # for the overlap of entity between corenlp, ltp and hanlp
                # np_startpos_corenlp = np.array(corenlp_entities_info["startpos"])
                # np_startpos_ltp = np.array(ltp_entities_info["startpos"])
                # np_startpos_hanlp = np.array(hanlp_entities_info["startpos"])
                # np_startpos_fnlp = np.array(fnlp_entities_info["startpos"])
                # np_startpos_fool = np.array(fool_entities_info["startpos"])
                # np_startpos_boson = np.array(boson_entities_info["startpos"])
                #
                # np_endpos_corenlp = np.array(corenlp_entities_info["endpos"])
                # np_endpos_ltp = np.array(ltp_entities_info["endpos"])
                # np_endpos_hanlp = np.array(hanlp_entities_info["endpos"])
                # np_endpos_fnlp = np.array(fnlp_entities_info["endpos"])
                # np_endpos_fool = np.array(fool_entities_info["endpos"])
                # np_endpos_boson = np.array(boson_entities_info["endpos"])
                #
                # np_entitylen_corenlp = np.array(corenlp_entities_info["entity_unicode_len"])
                # np_entitylen_ltp = np.array(ltp_entities_info["entity_unicode_len"])
                # np_entitylen_hanlp = np.array(hanlp_entities_info["entity_unicode_len"])
                # np_entitylen_fnlp = np.array(fnlp_entities_info["entity_unicode_len"])
                # np_entitylen_fool = np.array(fool_entities_info["entity_unicode_len"])
                # np_entitylen_boson = np.array(boson_entities_info["entity_unicode_len"])

                #     subJsonObject = {}
                #     subJsonObject["phrase_id"] = phrase_id
                #
                #     # 统计每个NER中PERSON、LOCATION, ORGANIZATION的个数
                #
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                c_person_indexes = find_all_index(np_type_corenlp, "PERSON")
                c_location_indexes = find_all_index(np_type_corenlp,
                                                    "LOCATION")
                c_organization_indexes = find_all_index(
                    np_type_corenlp, "ORGANIZATION")

                # # for detecting the overlap of entity between corenlp, ltp and hanlp
                c_person_list = np_entity_corenlp[c_person_indexes]
                c_location_list = np_entity_corenlp[c_location_indexes]
                c_organization_list = np_entity_corenlp[c_organization_indexes]

                # # set
                # c_person_set = set(np_entity_corenlp[c_person_indexes])
                # c_location_set = set(np_entity_corenlp[c_location_indexes])
                # c_organization_set = set(np_entity_corenlp[c_organization_indexes])

                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                l_person_indexes = find_all_index(np_type_ltp, "PERSON")
                l_location_indexes = find_all_index(np_type_ltp, "LOCATION")
                l_organization_indexes = find_all_index(
                    np_type_ltp, "ORGANIZATION")

                l_person_list = np_entity_ltp[l_person_indexes]
                l_location_list = np_entity_ltp[l_location_indexes]
                l_organization_list = np_entity_ltp[l_organization_indexes]

                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                h_person_indexes = find_all_index(np_type_hanlp, "PERSON")
                h_location_indexes = find_all_index(np_type_hanlp, "LOCATION")
                h_organization_indexes = find_all_index(
                    np_type_hanlp, "ORGANIZATION")

                h_person_list = np_entity_hanlp[h_person_indexes]
                h_location_list = np_entity_hanlp[h_location_indexes]
                h_organization_list = np_entity_hanlp[h_organization_indexes]

                # FNLP
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                f_person_indexes = find_all_index(np_type_fnlp, "PERSON")
                f_location_indexes = find_all_index(np_type_fnlp, "LOCATION")
                f_organization_indexes = find_all_index(
                    np_type_fnlp, "ORGANIZATION")

                f_person_list = np_entity_fnlp[f_person_indexes]
                f_location_list = np_entity_fnlp[f_location_indexes]
                f_organization_list = np_entity_fnlp[f_organization_indexes]

                # foolnltk
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                o_person_indexes = find_all_index(np_type_fool, "PERSON")
                o_location_indexes = find_all_index(np_type_fool, "LOCATION")
                o_organization_indexes = find_all_index(
                    np_type_fool, "ORGANIZATION")

                o_person_list = np_entity_fool[o_person_indexes]
                o_location_list = np_entity_fool[o_location_indexes]
                o_organization_list = np_entity_fool[o_organization_indexes]

                # boson_ner
                # for i in ["PERSON", "LOCATION", "ORGANIZATION"]:
                b_person_indexes = find_all_index(np_type_boson, "PERSON")
                b_location_indexes = find_all_index(np_type_boson, "LOCATION")
                b_organization_indexes = find_all_index(
                    np_type_boson, "ORGANIZATION")

                b_person_list = np_entity_boson[b_person_indexes]
                b_location_list = np_entity_boson[b_location_indexes]
                b_organization_list = np_entity_boson[b_organization_indexes]

                print phrase
                print "corenlp"
                print " " + "person: ",
                for e in c_person_list:
                    print e,
                print ""
                print " " + "organization:",
                for e in c_organization_list:
                    print e,
                print ""

                print "ltp"
                print " " + "person: ",
                for e in l_person_list:
                    print e,
                print ""
                print " " + "organization:",
                for e in l_organization_list:
                    print e,
                print ""

                print "HanLP"
                print " " + "person: ",
                for e in h_person_list:
                    print e,
                print ""
                print " " + "organization:",
                for e in h_organization_list:
                    print e,
                print ""

                print "FNLP"
                print " " + "person: ",
                for e in f_person_list:
                    print e,
                print ""
                print " " + "organization:",
                for e in f_organization_list:
                    print e,
                print ""

                print "foolNLTK"
                print " " + "person: ",
                for e in o_person_list:
                    print e,
                print ""
                print " " + "organization:",
                for e in o_organization_list:
                    print e,
                print ""

                try:
                    print "BosonNER"
                    print " " + "person: ",
                    for e in b_person_list:
                        print e,
                    print ""
                    print " " + "organization:",
                    for e in b_organization_list:
                        print e,
                    print ""
                except UnicodeDecodeError, e:
                    print e.message()