def test_get_annotation_by_index(self):
     session = EngineFactory.create_session()
     annotation = DocumentSentenceTextAnnotation.get_annotation_count_by_index(
         session, 1, 1)
     expected = -1
     print annotation
     self.assertEqual(expected, annotation)
Example #2
0
    def test_full_text_searcher(self):
        session = EngineFactory.create_session(autocommit=True)

        searcher = GeneralConceptEntitySearcher(session=session)
        result = searcher.search("apk file")
        print(len(result))
        print(result)
def start_fix_duplicate_domain_entity():
    session = EngineFactory.create_session()
    domain_entity_name_list = DomainEntity.get_all_domain_entity_name_distinct(
        session)
    print("distinct name=%d" % len(domain_entity_name_list))
    for team_domain_entity in domain_entity_name_list:
        name = team_domain_entity.name
        print("current name=%s" % name)
        if len(name) <= 2:
            print("delete name=%s" % name)
            all_same_name_domain_entity_list = DomainEntity.get_all_domain_entity_with_same_name(
                session, name)
            for domain_entity in all_same_name_domain_entity_list:
                delete_domain_entity_and_relation_to_sentence(
                    session=session, domain_entity=domain_entity)

            continue

        all_same_name_domain_entity_list = DomainEntity.get_all_domain_entity_with_same_name(
            session, name)
        same_list_of_list = get_same_domain_entity_list(
            all_same_name_domain_entity_list)
        for same_list in same_list_of_list:
            merge_domain_entity_and_relation(session=session,
                                             same_list=same_list)
        session.commit()
    session.commit()
Example #4
0
    def init(self, path="word2vec_api_software_wiki.txt", binary=True):
        self.session = EngineFactory.create_session()
        self.graphClient = DefaultGraphAccessor(GraphClient(server_number=4))

        self.entity_vector_model = EntityVectorComputeModel()
        self.entity_vector_model.init_word2vec_model(path=path, binary=binary)
        print("init complete")
 def test_full_text_search_in_nature_language_for_alias(self):
     session = EngineFactory.create_session()
     searcher = DBSearcher(session)
     result = searcher.full_text_search_in_nature_language("Json", APIAlias)
     for alias in result:
         print(alias)
     self.assertEqual(19, len(result))
Example #6
0
    def get_session(self):
        if not self.__session:
            ## todo, init the session from a factory instance,this factory is init in the construction
            self.__session = EngineFactory.create_session(autocommit=True,
                                                          echo=False)

        return self.__session
Example #7
0
    def init(self):
        self.session = EngineFactory.create_session()
        self.wikipedia_vector_map = EntityVectorModel.load("wikipedia.binary.txt", binary=True)
        self.domain_entity_vector_map = EntityVectorModel.load("domain_entity.binary.txt", binary=True)
        self.api_entity_vector_map = EntityVectorModel.load("api.binary.txt", binary=True)

        self.general_concept_searcher = GeneralConceptEntitySearcher(session=self.session)
        print("init complete")
    def test_API_entity_searcher(self):
        session = EngineFactory.create_session()
        searcher = APISearcher(session)
        result = searcher.search_api_entity("XML", result_limit=20)
        for api in result:
            print(api)

        self.assertEqual(20, len(result))
Example #9
0
 def test_full_text_search_for_os_answer(self):
     session = EngineFactory.create_session()
     searcher = SentenceSearcher(session)
     result = searcher.search_sentence_answer("java", 10)
     print(result)
     for line in result:
         print(line)
     self.assertEqual(10, len(result))
Example #10
0
 def test_full_text_search_for_domain_qa(self):
     session = EngineFactory.create_session()
     searcher = QA_FullText_EntityList(session)
     result = searcher.search_related_entity("java", 10)
     print(result)
     for line in result:
         print(line)
     self.assertEqual(10, len(result))
    def test_API_aliases_searcher(self):
        session = EngineFactory.create_session()
        searcher = APISearcher(session)
        result = searcher.search_api_aliases("XML")
        for post in result:
            print(post)

        self.assertEqual(500, len(result))
Example #12
0
 def init(self):
     self.session = EngineFactory.create_session()
     self.df = pd.DataFrame(columns=[
         'sentence_id', 'doc_id', 'sentence_index', 'text', 'type'
     ])
     self.df_same = pd.DataFrame(columns=[
         'sentence_id', 'doc_id', 'sentence_index', 'text', 'vote_type'
     ])
 def start_import(self, graphClient):
     self.logger = Logger(self.logger_file_name).get_log()
     if not self.session:
         self.session = EngineFactory.create_session()
     self.graphClient = graphClient
     all_relation_list = self.session.query(APIRelation).all()
     for api_relation in all_relation_list:
         self.import_one_relation(api_relation)
     print("import api entity relation complete")
Example #14
0
 def test_get_unfinished_doc_list(self):
     session = EngineFactory.create_session()
     unfinished_doc_list = DocumentAnnotationStatus.get_unfinished_doc_list(session=session)
     print unfinished_doc_list
     result = []
     for each in unfinished_doc_list:
         result.append(each[0])
     expected = [i for i in range(1, 94894)]
     self.assertEqual(expected, result)
    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient

        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            self.import_one_api_entity(api_entity)
        print("import api entity complete")
Example #16
0
    def test_search(self):
        api_entity_session = EngineFactory.create_session(autocommit=True)
        api_searcher = APISearcher(session=api_entity_session, )

        graph_client = GraphClient(server_number=1)

        search_util = SearchUtil(graph_client, api_searcher)
        result = search_util.search("string buffer", 10)
        print(result)
        self.assertEqual(len(result), 10)
Example #17
0
    def build_table():
        session = EngineFactory.create_session()
        # 慎重清表#
        # EntityForQA.clear_table(session)
        sql_list=["MATCH (n:api) RETURN id(n),n",
                  "MATCH(n:wikidata) RETURN id(n), n",
            "MATCH (n:`domain entity`) RETURN id(n), n.`domain_entity:name`,n.domain_entity_id"]
        label_list=["api","wikidata","domain entity"]

        for str, la in zip(sql_list, label_list):
            Neo4j2MySQL.neo4j_to_db(session, str, la)
    def test_exist_import_record(self):
        session = EngineFactory.create_session()
        jdk_method_knowledge_table = KnowledgeTableFactory.get_jdk_method_table(session)
        api_relation_table = KnowledgeTableFactory.get_api_relation_table(session)

        api_knowledge_table = KnowledgeTableFactory.get_api_entity_table(session)

        result= KnowledgeTableColumnMapRecord.exist_import_record(session, jdk_method_knowledge_table, api_relation_table,
                                                             1,
                                                             "class_id")
        print result

        self.assertEqual(result,True)
    def start_import(self, graphClient):
        self.logger = Logger(self.logger_file_name).get_log()
        if not self.session:
            self.session = EngineFactory.create_session()
        self.graphClient = graphClient
        all_apis = self.session.query(APIEntity).all()
        for api_entity in all_apis:
            api_id = api_entity.id
            api_document_website_list = APIDocumentWebsite.get_document_website_list_by_api_id(
                self.session, api_id)
            self.import_document_website_to_one_entity(
                api_id, api_document_website_list)

        print("import api doc url complete")
    def test_API_entity_searcher_in_tuple(self):
        session = EngineFactory.create_session()
        searcher = APISearcher(session)
        all_query_tuple = [
            ("XML", APIEntity.API_TYPE_ALL_API_ENTITY, 427),
            ("json", APIEntity.API_TYPE_ALL_API_ENTITY, 10),
            ("http", APIEntity.API_TYPE_METHOD, 31),
        ]
        for query, api_type, size in all_query_tuple:
            result = searcher.search_api_entity(query, api_type=api_type)
            for api_entity in result:
                print(api_entity)

            self.assertEqual(size, len(result))
    def fix_the_valid_problem_for_paragraph_and_sentence(self):
        session = EngineFactory.create_session()
        # fix the problem of duplicate document
        all_invalid_document_list = session.query(DocumentText).filter_by(
            valid=0).all()
        count = 0
        step = 3000
        for invalid_document in all_invalid_document_list:
            all_invalid_paragraph_list = session.query(
                DocumentParagraphText).filter(
                    DocumentParagraphText.doc_id == invalid_document.id).all()

            all_invalid_sentence_list = session.query(
                DocumentSentenceText).filter(
                    DocumentSentenceText.doc_id == invalid_document.id).all()

            for paragraph in all_invalid_paragraph_list:
                paragraph.valid = 0
            for sentence in all_invalid_sentence_list:
                sentence.valid = 0

            count = count + 1
            if count > step:
                session.commit()
                count = 0
        session.commit()

        all_paragraph_list = session.query(DocumentParagraphText).filter_by(
            valid=1).all()
        for paragraph in all_paragraph_list:
            if paragraph.text == None or paragraph.text == "":
                paragraph.valid = 0
                continue
            text = paragraph.text
            text = text.strip()
            if len(text) <= 3 or len(text.split(" ")) <= 2:
                paragraph.valid = 0
        session.commit()

        all_sentence_list = session.query(DocumentSentenceText).filter_by(
            valid=1).all()
        for sentence in all_sentence_list:
            if sentence.text == None or sentence.text == "":
                sentence.valid = 0
                continue
            text = sentence.text
            text = text.strip()
            if len(text) <= 3 or len(text.split(" ")) <= 2:
                sentence.valid = 0
        session.commit()
Example #22
0
 def search_first_sentence_by_api_id(api_id):
     session = EngineFactory.create_session()
     sentence_id = session.query(
         SentenceToAPIEntityRelation.sentence_id).filter(
             SentenceToAPIEntityRelation.api_id == api_id).first()[0]
     if sentence_id:
         first_sentence = session.query(DocumentSentenceText.text).filter(
             DocumentSentenceText.id == sentence_id).first()[0]
         if first_sentence:
             return first_sentence
         else:
             return None
     else:
         return None
    def init(self, vector_dir_path="./model/"):
        self.kg_models = KnowledgeGraphFeafureModels()
        self.kg_models.init(vector_dir_path=vector_dir_path)

        self._session = EngineFactory.create_session(echo=False)
        self._entity_extractor = EntityExtractor()

        # self._tf_idf_model = TFIDFModel()
        # self._tf_idf_model.load(dict_type=2)

        self.qa_searcher = QAEntitySearcher()
        client = GraphClient(server_number=4)
        self.semanticSearchAccessor = SemanticSearchAccessor(client)
        self.defaultAccessor = DefaultGraphAccessor(client)
        self._logger = Logger("QAResultSearch").get_log()
    def start_import(self):
        self.session = EngineFactory.create_session()
        html_type = APIHTMLText.HTML_TYPE_API_DETAIL_DESCRIPTION

        api_entity_list = APIEntity.get_all_value_instance_api(
            session=self.session)
        for api_entity in api_entity_list:
            description = api_entity.short_description
            if description is None or description == "":
                continue
            api_html_entity = APIHTMLText(api_id=api_entity.id,
                                          html=description,
                                          html_type=html_type)
            api_html_entity.find_or_create(session=self.session,
                                           autocommit=False)
        self.session.commit()
    def test_API_entity_searcher_in_tuple_by_limit(self):
        session = EngineFactory.create_session()
        searcher = APISearcher(session)
        all_query_tuple = [
            ("XML", APIEntity.API_TYPE_ALL_API_ENTITY, 10, 10),
            ("json", APIEntity.API_TYPE_ALL_API_ENTITY, 10, 6),
            ("http", APIEntity.API_TYPE_METHOD, 20, 20),
            ("java", APIEntity.API_TYPE_CLASS, 15, 0),
        ]
        for query, api_type, limit, size in all_query_tuple:
            result = searcher.search_api_entity(query,
                                                api_type=api_type,
                                                result_limit=limit)
            for api_entity in result:
                print(api_entity)

            self.assertEqual(size, len(result))
Example #26
0
def save_sentence_annotation():
    session = EngineFactory.create_session()
    if not request.json:
        return "fail"
    j = request.json
    for each in j:
        if 'doc_id' not in each and "sentence_index" not in each and "type" not in each and "username" not in each:
            return "fail"
        doc_id = each["doc_id"]
        sentence_index = each["sentence_index"]
        type = each["type"]
        username = each["username"]
        sentence_text_annotation = DocumentSentenceTextAnnotation(
            doc_id, sentence_index, type, username)
        sentence_text_annotation.find_or_create(session, autocommit=False)
        if sentence_text_annotation.type != type:
            sentence_text_annotation.type = type
    session.commit()
    return "save successful"
Example #27
0
def get_sentence_list():
    if not request.json:
        return "fail"
    j = request.json
    if "os_question" not in j:
        return "fail"
    os = j["os_question"]
    session = EngineFactory.create_session()
    searcher = SentenceSearcher(session)
    sentence_data = searcher.search_sentence_answer(os, 10)
    sentence_list = []
    if len(sentence_data) > 0:
        for each in sentence_data:
            text = each[1]
            doc_id = each[0]
            if text is not None and text != "":
                sentence_list.append({"doc_id": doc_id, "text": text})
    result = {"sentence_list": sentence_list}
    return jsonify(result)
    def get_test_node(self, label_name):
        temp_id_list = self.get_node_id_list(label_name)
        node_num = 0
        id_list = []
        while node_num < 500:
            example = random.sample(temp_id_list, 700)
            for id_num in example:
                name, des = self.get_node_by_id(id_num, label_name)
                if label_name == "domain entity":
                    sentence = self.get_sentence_related_to_node(id_num, label_name)
                    if len(sentence) == 0:
                        print(id_num)
                        continue
                elif name.endswith("(E)") or des == "#Null":
                    continue
                id_list.append(id_num)
            node_num = len(example)

        id_list = id_list

        session = EngineFactory.create_session()
        general_concept_searcher = GeneralConceptEntitySearcher(session)
        node_list = []
        for item in id_list:
            dic = {}
            dic["name"], full_des = self.get_node_by_id(item, label_name)
            dic["id"] = item
            dic["node_type"] = label_name

            # if not self.is_valid(dic['name'], general_concept_searcher):
            #     continue
            if not self.api_is_valid(dic["name"],general_concept_searcher, session):
                continue

            if label_name == "domain entity":
                dic["sentence"] = self.get_sentence_related_to_node(item, label_name)
                if len(dic["sentence"]) == 0:
                    continue
            else:
                dic["sentence"] = full_des.replace("#", "").replace("\r", "").replace("\n", "").encode("raw_unicode_escape")
            dic["related_wiki"] = self.get_wikidata_related_to_node(item, label_name)
            node_list.append(dic)
        return node_list
    def test_search_api_entity_by_id_list(self):
        session = EngineFactory.create_session()
        searcher = APISearcher(session)

        all_query_tuple = [
            ([1, 2, 3, 1111, 4444, 6666,
              1], APIEntity.API_TYPE_ALL_API_ENTITY, 6),
            ([
                1, 2, 3, 4, 6, 7, 9, 1111, 4444, 6666, 1, 111, 444, 7777, 2345,
                12222, 12222, 12225, 12227
            ], APIEntity.API_TYPE_ALL_API_ENTITY, 17),
            ([1, 12222, 12225, 12227, 12228, 12220,
              12229], APIEntity.API_TYPE_METHOD, 6),
        ]
        for query_id_list, api_type, size in all_query_tuple:
            result = searcher.query_api_entity(query_id_list, api_type)
            for api_entity in result:
                print(api_entity)

            self.assertEqual(size, len(result))
Example #30
0
    def test_get_api_relation(self):
        session = EngineFactory.create_session()
        api = APIEntity.find_by_id(session, 462)
        api=APIEntity.find_by_qualifier(session,"java.lang.String")
        print "api =", api
        print api.document_websites
        print "out_relation"
        for r in api.out_relation:
            print r
            print "start_api=", r.start_api
            print "end_api=", r.end_api
            print "------------"

        print "in_relation"

        for r in api.in_relation:
            print r
            print "start_api=", r.start_api
            print "end_api=", r.end_api
            print "------------"