コード例 #1
0
    def start_fix_exist_aliases_for_aliases_type(self, alias_type):
        pass

        self.logger = Logger(self.logger_file_name).get_log()
        self.logger.info("-----------------------start-----------------------")
        self.logger.info("generate api aliases for alias type=%d", alias_type)

        session = self.get_session()
コード例 #2
0
 def start_import(self, graphClient):
     self.logger = Logger(self.logger_file_name).get_log()
     if not self.session:
         self.session = EngineFactory.create_session()
     self.graphClient = graphClient
     all_relation_list = self.session.query(APIRelation).all()
     for api_relation in all_relation_list:
         self.import_one_relation(api_relation)
     print("import api entity relation complete")
コード例 #3
0
    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient

        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            self.import_one_api_entity(api_entity)
        print("import api entity complete")
コード例 #4
0
    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient
        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            api_id = api_entity.id
            api_document_website_list = APIDocumentWebsite.get_document_website_list_by_api_id(
                self.session, api_id)
            self.import_document_website_to_one_entity(
                api_id, api_document_website_list)

        print("import api doc url complete")
コード例 #5
0
    def init(self, vector_dir_path="./model/"):
        self.kg_models = KnowledgeGraphFeafureModels()
        self.kg_models.init(vector_dir_path=vector_dir_path)

        self._session = EngineFactory.create_session(echo=False)
        self._entity_extractor = EntityExtractor()

        # self._tf_idf_model = TFIDFModel()
        # self._tf_idf_model.load(dict_type=2)

        self.qa_searcher = QAEntitySearcher()
        client = GraphClient(server_number=4)
        self.semanticSearchAccessor = SemanticSearchAccessor(client)
        self.defaultAccessor = DefaultGraphAccessor(client)
        self._logger = Logger("QAResultSearch").get_log()
コード例 #6
0
 def __init__(self, sentence_list, logger=None):
     self._api_searcher = APISearcher()
     if logger is None:
         self.logger = Logger("TreeView").get_log()
     else:
         self.logger = logger
     self._tree = self.get_api_tree_from_sentence_list(sentence_list)
コード例 #7
0
    def start_import(self):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.init_knowledge_table()

        cur = ConnectionFactory.create_cursor_by_knowledge_table(
            self.data_source_knowledge_table)

        select_sql = "select {primary_key_name},{html_column} from {table}".format(
            primary_key_name=self.primary_key_name,
            html_column=self.html_column,
            table=self.table)
        cur.execute(select_sql)
        data_list = cur.fetchall()
        result_tuples = []
        for i in range(0, len(data_list)):
            row_data = data_list[i]
            primary_key = row_data[0]
            html_text = row_data[1]

            if KnowledgeTableColumnMapRecord.exist_import_record(
                    session=self.session,
                    start_knowledge_table=self.data_source_knowledge_table,
                    end_knowledge_table=self.api_html_table,
                    start_row_id=primary_key,
                    start_row_name=self.html_column):
                self.logger.info("%d has been import to new table",
                                 primary_key)
                continue
            api_html_text = self.create_from_one_row_data(
                primary_key, html_text)

            if api_html_text:
                api_html_text = api_html_text.create(self.session,
                                                     autocommit=False)
                result_tuples.append((api_html_text, primary_key))
            else:
                self.logger.warn("None api_html_text fot %s", str(row_data))
                continue

            if len(result_tuples) > self.commit_step:
                self.commit_to_server_for_column_map(map_tuples=result_tuples)
                result_tuples = []
        self.commit_to_server_for_column_map(map_tuples=result_tuples)
        self.logger.info("import api html completed!")
        cur.close()
コード例 #8
0
    def start_generate_aliases_for_api_type(self,
                                            alias_type,
                                            api_type=ALL_API):
        self.logger = Logger(self.logger_file_name).get_log()
        self.logger.info("-----------------------start-----------------------")
        self.logger.info("generate api aliases for api type=%d alias type=%d",
                         api_type, alias_type)

        session = self.get_session()

        api_alias_generator = APIAliasGeneratorFactory.create_generator(
            alias_type)
        if api_alias_generator is None:
            self.logger.error("Not Implemented Generator for %d ", alias_type)
            return

        if api_type == APIAliasesTableFuller.ALL_API:
            count_api = session.query(APIEntity).count()
            api_list_query = session.query(APIEntity)
        else:
            count_api = session.query(APIEntity).filter(
                APIEntity.api_type == api_type).count()
            api_list_query = session.query(APIEntity).filter(
                APIEntity.api_type == api_type)

        start_index_list = range(0, count_api, self.commit_step)

        for start_index in start_index_list:
            end_index = min(start_index + self.commit_step, count_api)
            for api in api_list_query.all()[start_index:end_index]:
                try:
                    api_alias_list = api_alias_generator.generate_aliases(api)
                    self.add_aliases_to_api_entity(api, api_alias_list,
                                                   session)
                except Exception:
                    traceback.print_exc()

            self.logger.info("complete %d-%d", start_index, end_index)
            session.commit()
        self.logger.info("complete all")
コード例 #9
0
class APIRelationImporter:
    def __init__(self):
        self.logger_file_name = "import_api_relation_to_neo4j"
        self.logger = None
        self.graphClient = None
        self.session = None

    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient
        all_relation_list = self.session.query(APIRelation).all()
        for api_relation in all_relation_list:
            self.import_one_relation(api_relation)
        print("import api entity relation complete")

    def import_one_relation(self, api_relation):
        if api_relation is not None:
            relation_type = APIRelation.get_type_string(
                api_relation.relation_type)
            start_api_id = api_relation.start_api_id
            end_api_id = api_relation.end_api_id
            start_node = self.graphClient.find_node_by_api_id(start_api_id)
            end_node = self.graphClient.find_node_by_api_id(end_api_id)
            if start_node is not None and end_node is not None:
                if relation_type == "belong to":
                    relation_type = self.transfer_belong_to_type(start_api_id)
                    relationship = Relationship(end_node, relation_type,
                                                start_node)
                    self.graphClient.graph.merge(relationship)
                else:
                    relationship = Relationship(start_node, relation_type,
                                                end_node)
                    self.graphClient.graph.merge(relationship)
                self.logger.info("create or merge relation" +
                                 str(relationship))
            else:
                self.logger.warn(
                    "fail create relation because start node or end node is none."
                )
        else:
            self.logger.warn(
                "fail create relation because api relation is none.")

    def transfer_belong_to_type(self, start_api_id):
        if start_api_id is not None:
            start_api_entity = APIEntity.find_by_id(self.session, start_api_id)
            if start_api_entity is not None:
                start_api_type = start_api_entity.api_type
                type_str = APIEntity.get_simple_type_string(start_api_type)
                type_str = type_str.replace("api", "")
                relation_str = "has" + type_str
                return relation_str
        return None
コード例 #10
0
    def init_importer(self, client=None):
        if client is None:
            return
        self.logger = Logger(self.logger_file_name).get_log()

        self.wikidata_accessor = WikiDataGraphAccessor(client)
        property_name_dict = {}
        session = EngineFactory.create_session()
        wikidata_property_list = session.query(
            WikiDataProperty.wd_item_id, WikiDataProperty.property_name).all()
        for wikipedia_property in wikidata_property_list:
            property_name_dict[wikipedia_property.
                               wd_item_id] = wikipedia_property.property_name
        print("load all property name")
        self.wiki_creator = WikiDataNodeCreator(
            wikidata_graph_accessor=self.wikidata_accessor,
            init_property_from_file=False)
        self.wiki_creator.init_property_clean_Util(
            property_name_dict=property_name_dict)
        print("init wiki_creator")
コード例 #11
0
class APIDocumentWebsiteImporter:
    def __init__(self):
        self.logger_file_name = "import_api_document_website_to_neo4j"
        self.logger = None
        self.graphClient = None
        self.session = None

    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient
        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            api_id = api_entity.id
            api_document_website_list = APIDocumentWebsite.get_document_website_list_by_api_id(
                self.session, api_id)
            self.import_document_website_to_one_entity(
                api_id, api_document_website_list)

        print("import api doc url complete")

    def import_document_website_to_one_entity(self, api_id,
                                              api_document_website_list):
        if api_id is not None and api_document_website_list is not None:
            node = self.graphClient.find_node_by_api_id(api_id)
            if node is not None:
                index = 1
                for each in api_document_website_list:
                    website = each[0]
                    key = "api_document_website#" + str(index)
                    index += 1
                    if key not in dict(node).keys():
                        node[key] = website
                self.graphClient.push_node(node)
                self.logger.info("add document website property " + str(node))
            else:
                self.logger.warn(
                    "fail to add document property because node is none")
        else:
            self.logger.warn(
                "fail to add document property because api id is none")
コード例 #12
0
class AllAPIEntityImporter:
    def __init__(self):
        self.logger_file_name = "import_api_entity_to_neo4j"
        self.logger = None
        self.graphClient = None
        self.session = None

    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient

        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            self.import_one_api_entity(api_entity)
        print("import api entity complete")

    def import_one_api_entity(self, api_entity):
        api_type = APIEntity.get_simple_type_string(api_entity.api_type)
        print(api_type)
        property_dict = api_entity.__dict__
        property_dict.pop("_sa_instance_state")
        property_dict["api_id"] = property_dict.pop("id")
        builder = NodeBuilder()
        builder.add_entity_label().add_property(
            **property_dict).add_api_label().add_label(api_type)

        node = builder.build()
        # when the node's qualifier name is "byte","int", the print will cause ValueError: Invalid identifier error
        # print node

        try:
            self.graphClient.create_or_update_api_node(node=node)
            self.logger.info('create node for api entity %s',
                             property_dict['api_id'])
        except Exception, error:
            self.logger.warn('-%s- fail for create node for api entity ',
                             property_dict['api_id'])
            self.logger.exception('this is an exception message')
コード例 #13
0
from db.engine_factory import ConnectionFactory, EngineFactory
from db.model import KnowledgeTableRowMapRecord, APIRelation, KnowledgeTableColumnMapRecord
from db.model_factory import KnowledgeTableFactory
from shared.logger_util import Logger

logger = Logger("import_belong_to_relation_for_jdk_method").get_log()

cur = ConnectionFactory.create_cursor_for_jdk_importer()
session = EngineFactory.create_session()
jdk_method_knowledge_table = KnowledgeTableFactory.get_jdk_method_table(
    session)

jdk_class_knowledge_table = KnowledgeTableFactory.get_jdk_class_table(session)
api_knowledge_table = KnowledgeTableFactory.get_api_entity_table(session)

api_relation_table = KnowledgeTableFactory.get_api_relation_table(session)

COMMIT_STEP = 5000


def is_imported(row_id):
    if KnowledgeTableColumnMapRecord.exist_import_record(
            session, jdk_method_knowledge_table, api_relation_table, row_id,
            "class_id"):
        return True
    else:
        return False


def create_method_belong_to_relation(old_method_id, old_class_id):
    logger.info("old_method_id=%d old_class_id=%d", old_method_id,
コード例 #14
0
from shared.logger_util import Logger
from skgraph.graph.accessor.graph_accessor import GraphAccessor

_logger = Logger("QAGraphAccessor").get_log()


class QAGraphAccessor(GraphAccessor):
    def entity_linking_by_fulltext_search(self, name, top_number=50):
        """
        linking a name to a node
        :param name: the name search
        :param top_number: the top number of search result,defalt is top 10
        :return value is a list,each element is a dict d,
        get the node from d['node'],get the score of the result from d['weight']
        """
        try:
            query = "call apoc.index.search('entity', '{name}', {top_number}) YIELD node, weight return node,weight"
            query = query.format(name=name, top_number=top_number)
            record_list = self.graph.run(query)
            result_tuple = []
            for record in record_list:
                result_tuple.append({
                    "node": record["node"],
                    "weight": record["weight"]
                })

            return result_tuple
        except Exception, error:
            _logger.exception("parameters=%r", name)
            return []
コード例 #15
0
import codecs
import json
import os
import random

from WikiDataSPARQLWrapper import WikiDataSPARQLWrapper
from WikiDataEntityIDStorage import WikiDataEntityIDStorage
from shared.logger_util import Logger

_logger = Logger(logger="wikidata_item_id_selector").get_log()


class WikiDataItemSelector:
    wikiDataEntityIDStorage = WikiDataEntityIDStorage()
    seed_property_id_set = set(['P31', 'P279', 'P361', ])
    '''
        'P31',  # instance of
        'P279',  # subclass of
        'P361',  # part of
    '''

    def __init__(self):
        self.logger = _logger
        self.wikiDataEntityIDStorage.load()

        start_deny_id_list_path = os.path.join('.', 'start_deny_id_list.txt')
        if os.path.isfile(start_deny_id_list_path):
            print "start deny id file  exist"
            self.seed_deny_id_set = set(json.load(codecs.open(start_deny_id_list_path, 'r', 'utf-8')))
        else:
            print "start deny id file not exist"
コード例 #16
0
class SentenceLevelSemanticSearch:
    SORT_FUNCTION_ENTITIES_BRIDGE = 3
    SORT_FUNCTION_AVERAGE_ENTITY_GRAPH_SIMILAR = 4

    SORT_FUNCTION_AVERAGE_VECTOR = 2

    SORT_FUNCTION_NOT_AVERAGE_GRAPH_VECTOR = 1
    SORT_FUNCTION_SELECT_PART_ENTITY_LINK = 5

    def __init__(self, ):
        self._session = None
        self.kg_models = None
        self._entity_extractor = None

        # self._tf_idf_model = None

        self.qa_searcher = None
        self.semanticSearchAccessor = None
        self.defaultAccessor = None
        self._logger = None

    def init(self, vector_dir_path="./model/"):
        self.kg_models = KnowledgeGraphFeafureModels()
        self.kg_models.init(vector_dir_path=vector_dir_path)

        self._session = EngineFactory.create_session(echo=False)
        self._entity_extractor = EntityExtractor()

        # self._tf_idf_model = TFIDFModel()
        # self._tf_idf_model.load(dict_type=2)

        self.qa_searcher = QAEntitySearcher()
        client = GraphClient(server_number=4)
        self.semanticSearchAccessor = SemanticSearchAccessor(client)
        self.defaultAccessor = DefaultGraphAccessor(client)
        self._logger = Logger("QAResultSearch").get_log()

    def semantic_search(self,
                        query_text,
                        each_np_candidate_entity_num=50,
                        sort_function=SORT_FUNCTION_SELECT_PART_ENTITY_LINK,
                        sentence_limit=20,
                        weight_context_sim=0.6,
                        weight_graph_sim=0.4,

                        ):
        try:
            qa_info_manager = self.get_candidate_sentences(query_text,
                                                           each_np_candidate_entity_num=each_np_candidate_entity_num)

            # sentence_list=qa_info_manager.get_candidate_sentence_list()
            #
            # entity_for_qa_set
            # entity_for_qa_set.print_informat()
            # entity_list = entity_for_qa_set.get_entity_node_list()
            # chunk_to_related_entity_list_map = entity_for_qa_set.keyword_2_entitynodemap

            self._logger.info("entity_list =%d sentence_list=%d" % (
                qa_info_manager.get_entity_size(), qa_info_manager.get_sentence_size()))
            # for n in entity_list:
            #     print("entity", n)
            new_sentence_list = []
            # if sort_function == SentenceLevelSemanticSearch.SORT_FUNCTION_NOT_AVERAGE_GRAPH_VECTOR:
            #     new_sentence_list = self.sort_sentence_by_build_graph_vector_for_query_in_semantic_weight(query_text,
            #                                                                                               sentence_list=sentence_list,
            #                                                                                               entity_list=entity_list,
            #                                                                                               weight_context_sim=weight_context_sim,
            #                                                                                               weight_graph_sim=weight_graph_sim)
            #
            # if sort_function == SentenceLevelSemanticSearch.SORT_FUNCTION_AVERAGE_VECTOR:
            #     new_sentence_list = self.sort_sentence_by_build_average_graph_vector_for_query(query_text,
            #                                                                                    sentence_list=sentence_list,
            #                                                                                    entity_list=entity_list,
            #                                                                                    weight_context_sim=weight_context_sim,
            #                                                                                    weight_graph_sim=weight_graph_sim
            #                                                                                    )
            #
            # if sort_function == SentenceLevelSemanticSearch.SORT_FUNCTION_ENTITIES_BRIDGE:
            #     new_sentence_list = self.sort_sentence_by_entities_as_bridge(query_text,
            #                                                                  sentence_list=sentence_list,
            #                                                                  entity_list=entity_list,
            #                                                                  weight_context_sim=weight_context_sim,
            #                                                                  weight_graph_sim=weight_graph_sim)
            #
            # if sort_function == SentenceLevelSemanticSearch.SORT_FUNCTION_AVERAGE_ENTITY_GRAPH_SIMILAR:
            #     new_sentence_list = self.sort_sentence_by_entities_for_graph_similarity_as_bridge(query_text,
            #                                                                                       sentence_list=sentence_list,
            #                                                                                       entity_list=entity_list,
            #                                                                                       weight_context_sim=weight_context_sim,
            #                                                                                       weight_graph_sim=weight_graph_sim)

            if sort_function == SentenceLevelSemanticSearch.SORT_FUNCTION_SELECT_PART_ENTITY_LINK:
                new_sentence_list = self.sort_sentence_by_select_part_entity_as_bridge(query_text,
                                                                                       qa_info_manager=qa_info_manager,
                                                                                       weight_context_sim=weight_context_sim,
                                                                                       weight_graph_sim=weight_graph_sim,
                                                                                       )


            result_list = qa_info_manager.fill_api_id_in_result_list(new_sentence_list[:sentence_limit])

            self._logger.info("result_list =%d " % len(result_list))

            return result_list
        except Exception:
            self._logger.exception("----qaexception----")
            traceback.print_exc()
            return []

    def get_candidate_sentences(self, query_text, each_np_candidate_entity_num=20):

        chunk_list = self.get_chunk_from_text(query_text)
        print("chunk num=%d %s" % (len(chunk_list), ",".join(chunk_list)))

        qa_info_manager = self.search_entity_by_fulltext(chunk_list, each_np_candidate_entity_num)
        qa_info_manager.start_create_node_info_collection()

        print("related entity for qa", qa_info_manager)

        entity_for_qa_list = qa_info_manager.get_all_entity_for_qa_list()
        print("entity_for_qa_list num=%d" % len(entity_for_qa_list))

        sentence_list = self.search_sentence_by_entity_for_qa_list(entity_for_qa_list)
        print("sentence_list num=%d" % len(sentence_list))
        qa_info_manager.add_sentence_node_list(sentence_list)

        return qa_info_manager

    def expand_the_chunk_by_words(self, final_chunk_list):
        final_set = []
        for chunk in final_chunk_list:
            final_set.append(chunk)
            for word in chunk.split(" "):
                final_set.append(word)
        print("word set", final_set)
        return list(set(final_set))

    def get_chunk_from_text(self, text):

        final_chunk_list = self._entity_extractor.get_all_possible_key_word_from_text(text)

        return final_chunk_list

    def search_entity_by_fulltext(self, chunk_list, each_np_candidate_entity_num=20):
        qa_info_manager = QACacheInfoManager(semanticSearchAccessor=self.semanticSearchAccessor,
                                             defaultSearchAccessor=self.defaultAccessor,
                                             kg_models=self.kg_models)
        for chunk in chunk_list:
            related_entity_list = self.qa_searcher.search_related_entity(chunk, each_np_candidate_entity_num)
            qa_info_manager.add(chunk, related_entity_list)
            related_entity_for_api = self.qa_searcher.search_related_entity_for_api(chunk, each_np_candidate_entity_num)
            qa_info_manager.add(chunk, related_entity_for_api)
        return qa_info_manager

    def search_all_entity_by_fulltext_by_half(self, chunk, each_np_candidate_entity_num=20):
        qa_info_manager = QACacheInfoManager(semanticSearchAccessor=self.semanticSearchAccessor,
                                             defaultSearchAccessor=self.defaultAccessor,
                                             kg_models=self.kg_models)

        related_entity_for_api = self.qa_searcher.search_related_entity_for_api(chunk, each_np_candidate_entity_num/2)
        qa_info_manager.add(chunk, related_entity_for_api)

        related_entity_list = self.qa_searcher.search_related_entity(chunk, each_np_candidate_entity_num/2)
        qa_info_manager.add(chunk, related_entity_list)

        return qa_info_manager

    def search_sentence_by_entity_for_qa_list(self, entity_for_qa_list):
        entity_id_string_list = [str(entity_for_qa.kg_id) for entity_for_qa in entity_for_qa_list]
        entity_id_string_list = list(set(entity_id_string_list))
        return self.semanticSearchAccessor.search_sentence_by_entity_list(entity_id_string_list=entity_id_string_list)

    def get_relation_by_nodes(self, node_list):
        return self.semanticSearchAccessor.get_nodes_relation(node_list)

    def sort_sentence_by_entities_as_bridge(self, question,
                                            sentence_list,
                                            entity_list,
                                            weight_context_sim=0.5,
                                            weight_graph_sim=0.5
                                            ):
        self._logger.info("run sort_sentence_by_entities_as_bridge get result=%d" % len(sentence_list))

        question_vec = self.kg_models.get_question_entity_vector(question)

        entity_vec_list, entity_graph_vec_list = self.kg_models.get_vectors_for_entity_list(entity_list)
        sentence_vec_list, sentence_graph_vec_list = self.kg_models.get_vectors_for_entity_list(sentence_list)

        qe_sim_np = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_vec, entity_vec_list)
        qe_sim_np = qe_sim_np / qe_sim_np.sum()

        kg_context_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(entity_vec_list,
                                                                                         sentence_vec_list)
        kg_graph_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(entity_graph_vec_list,
                                                                                       sentence_graph_vec_list)

        qs_context_sim = weight_context_sim * qe_sim_np * kg_context_sim
        qs_graph_sim = weight_graph_sim * qe_sim_np * kg_graph_sim

        qs_sim = qs_context_sim + qs_graph_sim
        qs_sim = qs_sim.tolist()[0]
        qs_context_sim = qs_context_sim.tolist()[0]
        qs_graph_sim = qs_graph_sim.tolist()[0]

        for sum_sim, sentence, context_sim, graph_sim in zip(qs_sim, sentence_list, qs_context_sim, qs_graph_sim):
            sentence["qs_sim"] = sum_sim
            sentence["qs_context_sim"] = context_sim
            sentence["qs_graph_sim"] = graph_sim

        result = []
        for sentence in sentence_list:
            result.append({
                "kg_id": self.defaultAccessor.get_id_for_node(sentence),
                "sentence_id": sentence["sentence_id"],
                "sentence_type": sentence["sentence_type_code"],
                "text": sentence["sentence_text"],
                "qs_sim": sentence["qs_sim"],
                "qs_context_sim": sentence["qs_context_sim"],
                "qs_graph_sim": sentence["qs_graph_sim"]

            })
        self._logger.info("run sort_sentence_by_entities_as_bridge get result num=%d" % len(result))
        result.sort(key=lambda k: (k.get('qs_sim', 0)), reverse=True)

        return result

    def sort_sentence_by_entities_for_graph_similarity_as_bridge(self, question,
                                                                 sentence_list,
                                                                 entity_list,
                                                                 weight_context_sim=0.5,
                                                                 weight_graph_sim=0.5
                                                                 ):
        self._logger.info(
            "run sort_sentence_by_entities_for_graph_similarity_as_bridge get result=%d" % len(sentence_list))

        question_context_vec = self.kg_models.get_question_entity_vector(question)

        entity_vec_list, entity_graph_vec_list = self.kg_models.get_vectors_for_entity_list(entity_list)
        sentence_vec_list, sentence_graph_vec_list = self.kg_models.get_vectors_for_entity_list(sentence_list)

        qe_sim_np = np.ones((1, len(entity_list)))
        qe_sim_np = qe_sim_np / qe_sim_np.sum()
        qs_context_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                      sentence_vec_list)

        kg_graph_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(entity_graph_vec_list,
                                                                                       sentence_graph_vec_list)

        qs_context_sim = weight_context_sim * qs_context_sim
        qs_graph_sim = weight_graph_sim * qe_sim_np * kg_graph_sim

        qs_sim = qs_context_sim + qs_graph_sim
        qs_sim = qs_sim.tolist()[0]
        qs_context_sim = qs_context_sim.tolist()[0]
        qs_graph_sim = qs_graph_sim.tolist()[0]

        for sum_sim, sentence, context_sim, graph_sim in zip(qs_sim, sentence_list, qs_context_sim, qs_graph_sim):
            sentence["qs_sim"] = sum_sim
            sentence["qs_context_sim"] = context_sim
            sentence["qs_graph_sim"] = graph_sim

        result = []
        for sentence in sentence_list:
            result.append({
                "kg_id": self.defaultAccessor.get_id_for_node(sentence),
                "sentence_id": sentence["sentence_id"],
                "sentence_type": sentence["sentence_type_code"],
                "text": sentence["sentence_text"],
                "qs_sim": sentence["qs_sim"],
                "qs_context_sim": sentence["qs_context_sim"],
                "qs_graph_sim": sentence["qs_graph_sim"]

            })
        self._logger.info("run sort_sentence_by_entities_as_bridge get result num=%d" % len(result))
        result.sort(key=lambda k: (k.get('qs_sim', 0)), reverse=True)

        print("sorted result")
        for t in result:
            print("test sort", t)
        print(result[:100])

        return result

    def sort_sentence_by_build_average_graph_vector_for_query(self, question, sentence_list, entity_list,
                                                              weight_context_sim=0.5, weight_graph_sim=0.5
                                                              ):
        self._logger.info(
            "run sort_sentence_by_build_average_graph_vector_for_query get sentence_list=%d" % len(sentence_list))

        kg_models = self.kg_models
        question_context_vec = kg_models.get_question_entity_vector(question)
        entity_vec_list, entity_graph_vec_list = self.kg_models.get_vectors_for_entity_list(entity_list)
        sentence_vec_list, sentence_graph_vec_list = self.kg_models.get_vectors_for_entity_list(sentence_list)

        entity_list, entity_vec_list, entity_graph_vec_list = self.remove_the_not_related_entity(entity_graph_vec_list,
                                                                                                 entity_list,
                                                                                                 entity_vec_list,
                                                                                                 question_context_vec)

        query_graph_vector = kg_models.get_question_graph_vector_by_average_all_entities(
            question=question,
            entity_graph_vec_list=entity_graph_vec_list)

        qs_context_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                      sentence_vec_list)

        qs_graph_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(query_graph_vector,
                                                                                    sentence_graph_vec_list)
        qs_context_sim = weight_context_sim * qs_context_sim

        qs_graph_sim = weight_graph_sim * qs_graph_sim

        qs_sim = qs_context_sim + qs_graph_sim

        qs_sim = qs_sim.tolist()[0]
        qs_context_sim = qs_context_sim.tolist()[0]
        qs_graph_sim = qs_graph_sim.tolist()[0]

        for sum_sim, sentence, context_sim, graph_sim in zip(qs_sim, sentence_list, qs_context_sim, qs_graph_sim):
            sentence["qs_sim"] = sum_sim
            sentence["qs_context_sim"] = context_sim
            sentence["qs_graph_sim"] = graph_sim

        result = []
        for sentence in sentence_list:
            result.append({
                "kg_id": self.defaultAccessor.get_id_for_node(sentence),
                "sentence_id": sentence["sentence_id"],
                "text": sentence["sentence_text"],
                "sentence_type": sentence["sentence_type_code"],
                "qs_sim": sentence["qs_sim"],
                "qs_context_sim": sentence["qs_context_sim"],
                "qs_graph_sim": sentence["qs_graph_sim"]

            })
        self._logger.info("run sort_sentence_by_build_average_graph_vector_for_query get result num=%d" % len(result))
        result.sort(key=lambda k: (k.get('qs_sim', 0)), reverse=True)

        return result

    def sort_sentence_by_select_part_entity_as_bridge(self, question,
                                                      qa_info_manager,
                                                      # sentence_list,
                                                      # entity_list,
                                                      weight_context_sim=0.6,
                                                      weight_graph_sim=0.4,
                                                      # chunk_to_related_entity_list_map=None,
                                                      ):
        self._logger.info(
            "run sort part entity result=%d" % qa_info_manager.get_sentence_size())

        print("entity for node")
        qa_info_manager.print_entities()
        print("sentence for node")
        # qa_info_manager.print_sentences()

        entity_info_collection = qa_info_manager.get_entity_info_collection()
        sentence_info_collection = qa_info_manager.get_sentence_info_collection()
        entity_info_collection.init_vectors(self.kg_models)
        sentence_info_collection.init_vectors(self.kg_models)
        sentence_list = sentence_info_collection.get_entity_list()
        entity_vec_list = entity_info_collection.get_entity_context_list()
        entity_graph_vec_list = entity_info_collection.get_entity_graph_list()
        entity_list = entity_info_collection.get_entity_list()
        sentence_vec_list = sentence_info_collection.get_entity_context_list()
        sentence_graph_vec_list = sentence_info_collection.get_entity_graph_list()

        question_context_vec = self.kg_models.get_question_entity_vector(question)

        entity_list, entity_vec_list, entity_graph_vec_list = self.get_top_related_entity_info_list(
            question_context_vec=question_context_vec, qa_info_manager=qa_info_manager)

        # entity_list, entity_vec_list, entity_graph_vec_list = self.remove_the_not_related_entity_by_only_save_one_for_each(
        #     entity_graph_vec_list=entity_graph_vec_list, entity_vec_list=entity_vec_list, entity_list=entity_list,
        #     question_context_vec=question_context_vec,
        #     qa_info_manager=qa_info_manager
        #
        # )

        qs_context_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                      sentence_vec_list)
        # todo:change to the average graph similarity
        # qs_graph_sim = self.get_graph_similarity_by_average_entity_graph_vector(entity_graph_vec_list, question,
        #                                                                         sentence_graph_vec_list)

        qs_graph_sim = self.get_query_to_sentence_graph_sim_by_select_top_enttity(entity_graph_vec_list, entity_list,
                                                                                  entity_vec_list,
                                                                                  sentence_graph_vec_list,
                                                                                  sentence_vec_list)

        qs_context_sim = weight_context_sim * qs_context_sim
        qs_graph_sim = weight_graph_sim * qs_graph_sim

        qs_sim = qs_context_sim + qs_graph_sim
        qs_sim = qs_sim.tolist()[0]
        qs_context_sim = qs_context_sim.tolist()[0]
        qs_graph_sim = qs_graph_sim.tolist()[0]

        for sum_sim, sentence, context_sim, graph_sim in zip(qs_sim, sentence_list, qs_context_sim, qs_graph_sim):
            sentence["qs_sim"] = sum_sim
            sentence["qs_context_sim"] = context_sim
            sentence["qs_graph_sim"] = graph_sim

        result = []
        for sentence in sentence_list:
            result.append({
                "kg_id": self.defaultAccessor.get_id_for_node(sentence),
                "sentence_id": sentence["sentence_id"],
                "sentence_type": sentence["sentence_type_code"],
                "text": sentence["sentence_text"],
                "qs_sim": sentence["qs_sim"],
                "qs_context_sim": sentence["qs_context_sim"],
                "qs_graph_sim": sentence["qs_graph_sim"]

            })
        self._logger.info("run sort_sentence_by_entities_as_bridge get result num=%d" % len(result))
        result.sort(key=lambda k: (k.get('qs_sim', 0)), reverse=True)

        print(result[:100])

        return result

    def get_graph_similarity_by_average_entity_graph_vector(self, entity_graph_vec_list, question,
                                                            sentence_graph_vec_list):
        query_graph_vector = self.kg_models.get_question_graph_vector_by_average_all_entities(
            question=question,
            entity_graph_vec_list=entity_graph_vec_list)
        qs_graph_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(query_graph_vector,
                                                                                    sentence_graph_vec_list)
        return qs_graph_sim

    def get_graph_similarity_average_entity_graph_vector_similarity(self, entity_graph_vec_list, question,
                                                                    sentence_graph_vec_list):
        # query_graph_vector = self.kg_models.get_question_graph_vector_by_average_all_entities(
        #     question=question,
        #     entity_graph_vec_list=entity_graph_vec_list)
        qs_graph_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(sentence_graph_vec_list,
                                                                                       entity_graph_vec_list)
        return np.mean(qs_graph_sim, axis=1)

    def get_query_to_sentence_graph_sim_by_select_top_enttity(self, entity_graph_vec_list, entity_list, entity_vec_list,
                                                              sentence_graph_vec_list, sentence_vec_list):
        # kg_se_graph_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(sentence_graph_vec_list,
        #                                                                                   entity_graph_vec_list,
        #                                                                                   )
        kg_se_context_sim = MatrixCalculation.compute_cossin_for_matrix_to_matrix_normalize(
            sentence_vec_list,
            entity_vec_list)
        # TODO
        # kg_se_sim = 0.5 * kg_se_graph_sim + 0.5 * kg_se_context_sim
        kg_se_sim = kg_se_context_sim

        print("final entity list", len(entity_list), entity_list)
        select_linking_entity_num = min(5, len(entity_list))
        onehot_maxsim_se_matrix = MatrixCalculation.get_most_similar_top_n_entity_as_matrix(
            top_n=select_linking_entity_num, s_e_similarity_matrix=kg_se_sim)
        s_query_graph_vec_matrix = onehot_maxsim_se_matrix * np.matrix(
            entity_graph_vec_list) / select_linking_entity_num
        qs_graph_sim = MatrixCalculation.compute_cossin_for_one_to_one_in_two_list_normalize(sentence_graph_vec_list,
                                                                                             s_query_graph_vec_matrix.getA())
        return qs_graph_sim

    def remove_the_not_related_entity_by_only_save_one_for_each(self, entity_graph_vec_list, entity_list,
                                                                entity_vec_list, question_context_vec,
                                                                qa_info_manager):
        chunk_to_related_entity_list_map = qa_info_manager.keyword_2_entitynodemap
        qe_sim_np = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                 entity_vec_list)

        entity_info_sumary_list = []
        for (entity, sim, entity_vec, entity_graph_vec) in zip(entity_list, qe_sim_np.getA()[0], entity_vec_list,
                                                               entity_graph_vec_list):
            print("after first removing sim=", sim, "entity=", entity)
            entity_info_sumary_list.append({"entity": entity,
                                            "sim": sim,
                                            "entity_vec": entity_vec,
                                            "entity_graph_vec": entity_graph_vec
                                            })

        entity_info_sumary_list.sort(key=lambda k: (k.get('sim', 0)), reverse=True)

        valid_word_set = set([])
        word_to_related_entity_list_map = {}

        for chunk, related_entity_list in chunk_to_related_entity_list_map.items():
            word = chunk
            if word not in valid_word_set:
                valid_word_set.add(word)
                word_to_related_entity_list_map[word] = related_entity_list
            else:
                word_to_related_entity_list_map[word].extend(related_entity_list)

        # clean_entity_info_list = self.get_clean_entity_for_each_word_by_max_similarity(entity_info_sumary_list,
        #                                                                                word_to_related_entity_list_map)
        #
        clean_entity_info_list = self.get_clean_entity_for_each_word_by_max_n_similarity(entity_info_sumary_list,
                                                                                         word_to_related_entity_list_map)

        new_entity_list = []
        new_entity_graph_vec_list = []
        new_entity_vec_list = []
        for entity_info_sumary in clean_entity_info_list:
            new_entity_list.append(entity_info_sumary["entity"])
            new_entity_graph_vec_list.append(entity_info_sumary["entity_graph_vec"])
            new_entity_vec_list.append(entity_info_sumary["entity_vec"])
            print("final save sim=", entity_info_sumary["sim"], "entity=", entity_info_sumary["entity"])

        return new_entity_list, new_entity_vec_list, new_entity_graph_vec_list

    def get_top_related_entity_info_list(self, question_context_vec,
                                         qa_info_manager):

        node_info_collection = qa_info_manager.get_node_info_collection()
        node_info_collection.fill_each_entity_with_similary_to_question(question_context_vec)
        node_info_collection.sort_by_qe_sim()

        # selected_entity_info_list = qa_info_manager.get_top_node_info_by_each_keywords_three_different_type()
        selected_entity_info_list = qa_info_manager.get_top_node_info_by_each_keywords()

        new_entity_list = []
        new_entity_vec_list = []
        new_entity_graph_vec_list = []
        for node_info in selected_entity_info_list:
            new_entity_list.append(node_info.entity_node)
            new_entity_vec_list.append(node_info.entity_context_vec)
            new_entity_graph_vec_list.append(node_info.entity_graph_vec)

        return new_entity_list, new_entity_vec_list, new_entity_graph_vec_list

    def get_clean_entity_for_each_word_by_max_n_similarity(self, entity_info_sumary_list,
                                                           word_to_related_entity_list_map):
        clean_entity_kg_id_list = set([])
        print("start get_clean_entity_infi_sumary_list ")
        word_name_entity_mark = {}
        for valid_word, related_entity_list in word_to_related_entity_list_map.items():
            print("valid word=", valid_word)

            entity_info_list = self.get_first_from_entity_info_sumary_list_and_in_related_entity_list(
                entity_info_sumary_list, related_entity_list, 3)

            # for entity_info in entity_info_list:
            print("get candidate for word=", valid_word, entity_info_list)
            word_name_entity_mark[valid_word] = entity_info_list

            clean_entity_info_list = []
            clean_entity_kg_id_list = set([])

            for word, entity_info_list in word_name_entity_mark.items():
                for entity_info in entity_info_list:
                    kg_id = self.defaultAccessor.get_id_for_node(entity_info["entity"])
                    if kg_id not in clean_entity_kg_id_list:
                        clean_entity_info_list.append(entity_info)
                        clean_entity_kg_id_list.add(kg_id)
                        print("valid word=", word, entity_info["entity"])
        return clean_entity_info_list

    def get_clean_entity_for_each_word_by_max_similarity(self, entity_info_sumary_list,
                                                         word_to_related_entity_list_map):
        clean_entity_kg_id_list = set([])
        print("start get_clean_entity_infi_sumary_list ")
        word_name_entity_mark = {}
        for valid_word, related_entity_list in word_to_related_entity_list_map.items():
            print("valid word=", valid_word)

            entity_info_list = self.get_first_from_entity_info_sumary_list_and_in_related_entity_list(
                entity_info_sumary_list, related_entity_list)

            for entity_info in entity_info_list:
                print("get candidate for word=", valid_word, entity_info["entity"])
                word_name_entity_mark[valid_word] = entity_info

            clean_entity_info_list = []
            clean_entity_kg_id_list = set([])

            for word, entity_info in word_name_entity_mark.items():
                kg_id = self.defaultAccessor.get_id_for_node(entity_info["entity"])
                if kg_id not in clean_entity_kg_id_list:
                    clean_entity_info_list.append(entity_info)
                    clean_entity_kg_id_list.add(kg_id)
                    print("valid word=", word, entity_info["entity"])
        return clean_entity_info_list

    def get_clean_entity_infi_sumary_list(self, entity_info_sumary_list, word_to_related_entity_list_map):
        clean_entity_kg_id_list = set([])
        print("start get_clean_entity_infi_sumary_list ")
        word_name_entity_mark = {}
        for valid_word, related_entity_list in word_to_related_entity_list_map.items():
            print("valid word=", valid_word)

            entity_info_list = self.get_first_from_entity_info_sumary_list_and_in_related_entity_list(
                entity_info_sumary_list, related_entity_list)

            for entity_info in entity_info_list:
                kg_id = self.defaultAccessor.get_id_for_node(entity_info["entity"])
                print("get candidate for word=", valid_word, entity_info["entity"])

                if kg_id not in clean_entity_kg_id_list:
                    if valid_word not in word_name_entity_mark.keys():
                        word_name_entity_mark[valid_word] = entity_info
                    else:
                        old_entity_info = word_name_entity_mark[valid_word]
                        if entity_info["sim"] > old_entity_info["sim"]:
                            word_name_entity_mark[valid_word] = entity_info

                    for seperate_name in valid_word.split(" "):
                        if seperate_name not in word_name_entity_mark.keys():
                            word_name_entity_mark[seperate_name] = entity_info
                        else:
                            old_entity_info = word_name_entity_mark[seperate_name]
                            if entity_info["sim"] > old_entity_info["sim"]:
                                word_name_entity_mark[seperate_name] = entity_info

                clean_entity_kg_id_list.add(kg_id)

            clean_entity_info_list = []
            clean_entity_kg_id_list = set([])

            for word, entity_info in word_name_entity_mark.items():
                kg_id = self.defaultAccessor.get_id_for_node(entity_info["entity"])
                if kg_id not in clean_entity_kg_id_list:
                    clean_entity_info_list.append(entity_info)
                    clean_entity_kg_id_list.add(kg_id)
                    print("valid word=", word, entity_info["entity"])
        return clean_entity_info_list

    def remove_the_not_related_entity(self, entity_graph_vec_list, entity_list, entity_vec_list, question_context_vec):

        qe_sim_np = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                 entity_vec_list)
        print("qeustion to entity similary")

        new_entity_list = []
        new_entity_vec_list = []
        new_entity_graph_vec_list = []
        qe_sim_clean = []
        for (entity, sim, entity_vec, entity_graph_vec) in zip(entity_list, qe_sim_np.getA()[0], entity_vec_list,
                                                               entity_graph_vec_list):
            print("sim=", sim, "entity=", entity)
            if sim > MIN_RELATED_ENTITY_SIMILARITY:
                print("adding ", entity)
                new_entity_list.append(entity)
                new_entity_vec_list.append(entity_vec)
                new_entity_graph_vec_list.append(entity_graph_vec)
                qe_sim_clean.append(sim)

        entity_list = new_entity_list
        entity_vec_list = new_entity_vec_list
        entity_graph_vec_list = new_entity_graph_vec_list

        new_entity_list = []
        new_entity_vec_list = []
        new_entity_graph_vec_list = []

        entity_info_sumary_list = []

        for (entity, sim, entity_vec, entity_graph_vec) in zip(entity_list, qe_sim_clean, entity_vec_list,
                                                               entity_graph_vec_list):
            print("after first removing sim=", sim, "entity=", entity)
            entity_info_sumary_list.append({"entity": entity,
                                            "sim": sim,
                                            "entity_vec": entity_vec,
                                            "entity_graph_vec": entity_graph_vec
                                            })

        entity_info_sumary_list.sort(key=lambda k: (k.get('sim', 0)), reverse=True)

        api_class_name_set = set([])

        new_entity_info_sumary_list = []
        for entity_info_sumary in entity_info_sumary_list:
            if entity_info_sumary["entity"].has_label("api"):
                qualified_name = entity_info_sumary["entity"]["qualified_name"]
                if qualified_name in api_class_name_set:
                    continue
                if "(" in qualified_name:
                    simple_name = qualified_name.split("(")[0]

                    class_name = ".".join(simple_name.split(".")[:-1])
                    if class_name in api_class_name_set:
                        continue
                    else:
                        api_class_name_set.add(class_name)
                        new_entity_info_sumary_list.append(entity_info_sumary)
                else:
                    api_class_name_set.add(qualified_name)
                    new_entity_info_sumary_list.append(entity_info_sumary)
            else:
                new_entity_info_sumary_list.append(entity_info_sumary)

        for entity_info_sumary in new_entity_info_sumary_list:
            new_entity_list.append(entity_info_sumary["entity"])
            new_entity_graph_vec_list.append(entity_info_sumary["entity_graph_vec"])
            new_entity_vec_list.append(entity_info_sumary["entity_vec"])
            print("final save sim=", entity_info_sumary["sim"], "entity=", entity_info_sumary["entity"])

        return new_entity_list, new_entity_vec_list, new_entity_graph_vec_list

    def sort_sentence_by_build_graph_vector_for_query_in_semantic_weight(self, question, sentence_list, entity_list,
                                                                         weight_context_sim=0.5, weight_graph_sim=0.5
                                                                         ):

        self._logger.info(
            "run sort_sentence_by_build_graph_vector_for_query_in_semantic_weight get sentence_list=%d" % len(
                sentence_list))

        kg_models = self.kg_models
        question_context_vec = kg_models.get_question_entity_vector(question)
        entity_vec_list, entity_graph_vec_list = self.kg_models.get_vectors_for_entity_list(entity_list)
        sentence_vec_list, sentence_graph_vec_list = self.kg_models.get_vectors_for_entity_list(sentence_list)

        query_graph_vector = kg_models.get_question_graph_vector_by_semantic_weight_all_entities(
            question_context_vec=question_context_vec,
            entity_context_vec_list=entity_vec_list,
            entity_graph_vec_list=entity_graph_vec_list)

        qs_context_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(question_context_vec,
                                                                                      sentence_vec_list)

        qs_graph_sim = MatrixCalculation.compute_cossin_for_vec_to_matrix_normalize(query_graph_vector,
                                                                                    sentence_graph_vec_list)
        qs_context_sim = weight_context_sim * qs_context_sim

        qs_graph_sim = weight_graph_sim * qs_graph_sim

        qs_sim = qs_context_sim + qs_graph_sim

        qs_sim = qs_sim.tolist()[0]
        qs_context_sim = qs_context_sim.tolist()[0]
        qs_graph_sim = qs_graph_sim.tolist()[0]

        for sum_sim, sentence, context_sim, graph_sim in zip(qs_sim, sentence_list, qs_context_sim, qs_graph_sim):
            sentence["qs_sim"] = sum_sim
            sentence["qs_context_sim"] = context_sim
            sentence["qs_graph_sim"] = graph_sim

        result = []
        for sentence in sentence_list:
            result.append({
                "kg_id": self.defaultAccessor.get_id_for_node(sentence),
                "sentence_id": sentence["sentence_id"],
                "text": sentence["sentence_text"],
                "qs_sim": sentence["qs_sim"],
                "qs_context_sim": sentence["qs_context_sim"],
                "qs_graph_sim": sentence["qs_graph_sim"]
            })
        self._logger.info(
            "run sort_sentence_by_build_graph_vector_for_query_in_semantic_weight get result=%d" % len(result))
        result.sort(key=lambda k: (k.get('qs_sim', 0)), reverse=True)

        return result

    def get_all_entity(self, entity_for_qa_list):
        entity_id_string_list = [str(entity_for_qa.kg_id) for entity_for_qa in entity_for_qa_list]
        entity_id_string_list = list(set(entity_id_string_list))
        return self.semanticSearchAccessor.get_all_entity(entity_id_string_list=entity_id_string_list)

    def get_first_from_entity_info_sumary_list_and_in_related_entity_list(self, entity_info_sumary_list,
                                                                          related_entity_list, top_relate_entity_num=1):
        return_result_list = []
        for entity_info in entity_info_sumary_list:
            kg_id = self.defaultAccessor.get_id_for_node(entity_info["entity"])
            entity = self.get_entity_from_entity_list_by_kgid(kg_id, related_entity_list)
            if entity is not None:
                return_result_list.append(entity_info)
                if len(return_result_list) >= top_relate_entity_num:
                    return return_result_list
        return []

    def get_entity_from_entity_list_by_kgid(self, kg_id, related_entity_list):
        for related_entity in related_entity_list:
            if related_entity.kg_id == kg_id:
                return related_entity
        return None
コード例 #17
0
from db.engine_factory import EngineFactory
from db.cursor_factory import ConnectionFactory
from db.model import KnowledgeTableRowMapRecord, APIDocumentWebsite, KnowledgeTableColumnMapRecord
from db.model_factory import KnowledgeTableFactory
from shared.logger_util import Logger

logger = Logger("import_doc_website_for_jdk_package").get_log()

cur = ConnectionFactory.create_cursor_for_jdk_importer()
session = EngineFactory.create_session()
jdk_package_knowledge_table = KnowledgeTableFactory.get_jdk_package_table(
    session)
api_knowledge_table = KnowledgeTableFactory.get_api_entity_table(session)

api_document_website_table = KnowledgeTableFactory.get_api_document_website_table(
    session)


def create_doc_website_relation(old_package_id, doc_website):
    if doc_website is None:
        logger.error("no doc_website for %d", old_package_id)
        return None
    new_package_api_entity_id = KnowledgeTableRowMapRecord.get_end_row_id(
        session=session,
        start_knowledge_table=jdk_package_knowledge_table,
        end_knowledge_table=api_knowledge_table,
        start_row_id=old_package_id)
    if new_package_api_entity_id is None:
        logger.error("no new_package_api_entity_id for %d", old_package_id)
        return None
コード例 #18
0
from tagme import wiki_title

from factory import NodeBuilder
from graph_accessor import GraphAccessor, DefaultGraphAccessor
from shared.logger_util import Logger

_logger = Logger("WikipediaGraphAccessor").get_log()


class WikipediaGraphAccessor(GraphAccessor):
    def create_wikipedia_item_entity_by_url(self, url):
        if url.startswith("https://en.wikipedia.org/") == False:
            return None
        accessor = DefaultGraphAccessor(self)
        node = accessor.get_node_by_wikipedia_link(url)
        if node is None:
            property_dict = {
                "name": wiki_title(url.split("/")[-1]),
                "url": url,
                "site:enwiki": url
            }
            if "(" in property_dict["name"]:
                alias = [(property_dict["name"].split(" ("))[0]]
                property_dict["alias"] = alias
            node = NodeBuilder().add_entity_label().add_label(
                "wikipedia").add_property(**property_dict).build()
            self.graph.merge(node)
            _logger.info("create wikipedia node" + str(property_dict))
        return node
コード例 #19
0
from graph_accessor import DefaultGraphAccessor
from shared.logger_util import Logger

_logger = Logger("APIGraphAccessor").get_log()


class APIGraphAccessor(DefaultGraphAccessor):
    """
    a GraphAccessor for API node query
    """
    def get_parameter_nodes_of_method(self, method_node_id):
        """
        get all parameter nodes belong to one method
        :param method_node_id: method node id
        :return: parameter nodes list
        """
        try:
            query = 'Match  (n:`java method parameter`)-[r:`belong to`]->(m:`java method`)  where ID(m)={method_node_id} return distinct n'.format(
                method_node_id=method_node_id)
            node_list = []
            result = self.graph.run(query)
            for n in result:
                node_list.append(n['n'])
            return node_list

        except Exception, error:
            _logger.exception()
            return []

    def get_parent_class_node_for_method_node(self, method_node_id):
        """
コード例 #20
0
from py2neo import Relationship

from factory import NodeBuilder
from graph_accessor import GraphAccessor
from shared.logger_util import Logger

_logger = Logger("AliasGraphAccessor").get_log()


class AliasGraphAccessor(GraphAccessor):
    def find_root_entity_by_alias_name(self, alias):
        query = "match (n:alias)-[:alias]-(m) where n.name='{alias}' return  m"
        query = query.format(alias=alias)
        return self.graph.run(query)

    def build_alias_relation(self, alias, node):
        end_node = self.find_or_create_alias_node(alias)
        relation = Relationship(node, 'alias', end_node)
        self.graph.merge(relation)

    def find_or_create_alias_node(self, alias):
        end_node = NodeBuilder().add_as_alias().add_one_property(
            property_name='name', property_value=alias).build()
        self.graph.merge(end_node)
        return end_node

    def create_alias_node_for_name(self, name, node_id):
        alias_node = NodeBuilder().add_as_alias().add_one_property(
            "name", name).build()
        self.graph.merge(alias_node)
        alias_node_link_id_list = alias_node["link_id"]
コード例 #21
0
import gc

from db.engine_factory import ConnectionFactory, EngineFactory
from db.model import KnowledgeTableRowMapRecord, APIEntity, APIDocumentWebsite
from db.model_factory import KnowledgeTableFactory
from shared.logger_util import Logger

logger = Logger("import_doc_website_for_jdk_method").get_log()

cur = ConnectionFactory.create_cursor_for_jdk_importer()
session = EngineFactory.create_session()
jdk_method_knowledge_table = KnowledgeTableFactory.get_jdk_method_table(
    session)
api_knowledge_table = KnowledgeTableFactory.get_api_entity_table(session)


def create_doc_website_relation(method_name, full_declaration, qualified_name,
                                class_website):
    # print class_website
    if "http://docs.oracle.com/javase/7/docs/api/" in class_website:
        return None
    if full_declaration:
        if method_name[0] == "_":
            method_name = "Z:Z" + method_name
        if "(" in qualified_name and ")" in qualified_name:
            left_bracket_index = qualified_name.find("(")
            right_bracket_index = qualified_name.find(")")
            param_str = qualified_name[left_bracket_index +
                                       1:right_bracket_index]
            if "," in param_str:
                param_list = param_str.split(",")
from db.engine_factory import ConnectionFactory, EngineFactory
from db.model import KnowledgeTableRowMapRecord, APIRelation, KnowledgeTableColumnMapRecord
from db.model_factory import KnowledgeTableFactory
from shared.logger_util import Logger

logger = Logger("import_belong_to_relation_for_jdk_class").get_log()

cur = ConnectionFactory.create_cursor_for_jdk_importer()
session = EngineFactory.create_session()
jdk_package_knowledge_table = KnowledgeTableFactory.get_jdk_package_table(
    session)
jdk_class_knowledge_table = KnowledgeTableFactory.get_jdk_class_table(session)
api_knowledge_table = KnowledgeTableFactory.get_api_entity_table(session)

api_relation_table = KnowledgeTableFactory.get_api_relation_table(session)


def create_class_belong_to_relation(old_class_id, old_package_id):
    if old_package_id is None:
        logger.error("no old_package_id for %d", old_class_id)
        return None
    new_class_api_entity_id = KnowledgeTableRowMapRecord.get_end_row_id(
        session=session,
        start_knowledge_table=jdk_class_knowledge_table,
        end_knowledge_table=api_knowledge_table,
        start_row_id=old_class_id)
    if new_class_api_entity_id is None:
        logger.error("no new_class_api_entity_id for %d", old_class_id)
        return None
    new_package_api_entity_id = KnowledgeTableRowMapRecord.get_end_row_id(
        session=session,
コード例 #23
0
from db.engine_factory import ConnectionFactory, EngineFactory
from db.model import KnowledgeTableRowMapRecord, APIEntity
from db.model_factory import KnowledgeTableFactory
from db.util.code_text_process import clean_html_text
from shared.logger_util import Logger

session = EngineFactory.create_session()

logger = Logger("import_android_class").get_log()
IMPORT_DATA_SOURCE_TABLE_NAME = "androidAPI_class"
package_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name="androidAPI_package")
class_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name=IMPORT_DATA_SOURCE_TABLE_NAME)
api_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name=APIEntity.__tablename__)


def get_package_full_name_by_old_package_id(package_id):
    return KnowledgeTableRowMapRecord.get_end_row_id(
        session=session,
        start_knowledge_table=package_knowledge_table,
        end_knowledge_table=api_knowledge_table,
        start_row_id=package_id)


def get_qualified_name_of_package(package_id):
    api_entity_id = get_package_full_name_by_old_package_id(
        package_id=package_id)
    api_entity = APIEntity.find_by_id(session, api_entity_id)
    if api_entity:
コード例 #24
0
from db.engine_factory import EngineFactory
from db.model import DocumentText, DocumentAnnotationStatus
from shared.logger_util import Logger

logger = Logger("import_return_value_relation_for_jdk_method").get_log()
session = EngineFactory.create_session()


def import_jdk_document_annotation_status():
    doc_id_list = DocumentText.get_doc_id_list(session)
    if doc_id_list is not None:
        for each in doc_id_list:
            doc_id = each[0]
            print doc_id
            document_annotation_status = DocumentAnnotationStatus(doc_id, DocumentAnnotationStatus.STATUS_TO_ANNOTATE)
            print document_annotation_status
            document_annotation_status.create(session, autocommit=False)
    session.commit()


if __name__ == "__main__":
    import_jdk_document_annotation_status()
コード例 #25
0
ファイル: search.py プロジェクト: zk467701860/SKnowledgeGraph
 def __init__(self, session=None, logger=None):
     self.__session = session
     if logger is None:
         self.logger = Logger("DBSearcher").get_log()
     else:
         self.logger = logger
コード例 #26
0
import MySQLdb
# Mysql connect

from py2neo import Relationship

from skgraph.graph.accessor.graph_accessor import GraphClient
from skgraph.graph.accessor.graph_client_for_wikidata import WikiDataGraphAccessor
from shared.logger_util import Logger

conn = None
cur = None
graphClient = WikiDataGraphAccessor(GraphClient(server_number=1))
# neo4j connect
connect_graph = graphClient.graph
logger = Logger("android_sdk_importer").get_log()
# buffer
q = Queue.Queue()
n = 0


def get_android_sdk_node():
    labels = ["library"]
    property_dict = {"name": "android API"}
    from skgraph.graph.accessor.factory import NodeBuilder
    nodeBuilder = NodeBuilder().add_labels(*labels).add_property(
        **property_dict)
    android_sdk_node = nodeBuilder.build()
    connect_graph.merge(android_sdk_node, "library", "name")
    return android_sdk_node
コード例 #27
0
from py2neo import Relationship

from graph_accessor import GraphAccessor
from shared.logger_util import Logger
from skgraph.graph.accessor.factory import NodeBuilder

_logger = Logger("SentenceAccessor").get_log()


class DomainEntityAccessor(GraphAccessor):
    def get_all_domain_entity(self):
        try:
            query = "MATCH (n:`domain entity`) return n"
            result = self.graph.run(query)
            node_list = []
            for n in result:
                node_list.append(n['n'])
            return node_list
        except Exception, error:
            _logger.exception("------")
            return []

    def create_entity_to_general_concept_relation(self, entity,
                                                  wikipedia_entity):
        relation = Relationship(entity, 'may link', wikipedia_entity)
        self.graph.merge(relation)

    def delete_all_domain_entity_to_wikipedia_relation(self):
        try:
            query = "MATCH (n:`domain entity`)-[r:`may link`]-(m:wikipedia) delete r"
            self.graph.run(query)
コード例 #28
0
from shared.logger_util import SQLAlchemyHandler
from skgraph.graph.accessor.graph_accessor import DefaultGraphAccessor, GraphClient
from skgraph.graph.apientitylinking import APIEntityLinking
from skgraph.graph.label_util import LabelUtil
from skgraph.graph.node_cleaner import GraphJsonParser

reload(sys)
sys.setdefaultencoding("utf-8")

app = Flask(__name__)
CORS(app)
db_handler = SQLAlchemyHandler()
db_handler.setLevel(logging.WARN)  # Only serious messages
app.logger.addHandler(db_handler)

logger = Logger("neo4jServer").get_log()
logger.info("create logger")

graphClient = DefaultGraphAccessor(GraphClient(server_number=1))
logger.info("create graphClient")

api_entity_linker = APIEntityLinking()
logger.info("create api_entity_linker object")

questionAnswerSystem = QuestionAnswerSystem()
logger.info("create questionAnswerSystem")

dbSOPostSearcher = SOPostSearcher(EngineFactory.create_so_session(),
                                  logger=app.logger)
logger.info("create SO POST Searcher")
コード例 #29
0
import codecs
import json

from py2neo import Relationship

from skgraph.graph.accessor.graph_accessor import GraphClient
from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor
from shared.logger_util import Logger

_logger = Logger("AwesomeImporter").get_log()

awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0))
baseGraphClient = awesomeGraphAccessor.graph

file_name = "complete_list_of_awesome_list_collect_relation.json"
with codecs.open(file_name, 'r', 'utf-8') as f:
    relation_list = json.load(f)
for relation in relation_list:
    start_url = relation["start_url"]
    relation_str = relation["relation"]
    end_url = relation["end_url"]
    start_node = awesomeGraphAccessor.find_awesome_list_entity(start_url)
    end_node = awesomeGraphAccessor.find_awesome_list_entity(end_url)
    if start_node is not None and end_node is not None:
        relationship = Relationship(start_node, relation_str, end_node)
        baseGraphClient.merge(relationship)
        _logger.info("create or merge relation" + str(relation))
    else:
        _logger.warn("fail create relation" + str(relation))
コード例 #30
0
from bs4 import BeautifulSoup

from db.engine_factory import EngineFactory
from db.cursor_factory import ConnectionFactory
from db.model import KnowledgeTableRowMapRecord, APIEntity
from db.model_factory import KnowledgeTableFactory
from db.util.code_text_process import parse_declaration_html_with_full_qualified_type, clean_declaration_html, \
    clean_html_text_with_format
from shared.logger_util import Logger

session = EngineFactory.create_session()

logger = Logger("import_android_method").get_log()
IMPORT_DATA_SOURCE_TABLE_NAME = "androidAPI_method"
class_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name="androidAPI_class")

method_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name="androidAPI_method")

api_knowledge_table = KnowledgeTableFactory.find_knowledge_table_by_name(
    session=session, name=APIEntity.__tablename__)


def get_api_entity_id_by_old_class_id(old_class_id):
    return KnowledgeTableRowMapRecord.get_end_row_id(
        session=session,
        start_knowledge_table=class_knowledge_table,
        end_knowledge_table=api_knowledge_table,
        start_row_id=old_class_id)