Beispiel #1
0
 def __init__(self, doc_collection):
     graph_data_path = PathUtil.graph_data(pro_name="jabref",
                                           version="v3.7")
     self.graph_data = GraphData.load(graph_data_path)
     self.doc_collection = doc_collection
     self.entity_words = set()
     self.entity_2_score = dict()
     self.counter = 0
     self.entity_path = str(Path(OUTPUT_DIR) / "entity.json")
Beispiel #2
0
 def __init__(self,
              doc_collection,
              graph_data_path=PathUtil.graph_data(pro_name="jabref",
                                                  version="v3.10")):
     if isinstance(graph_data_path, GraphData):
         self.graph_data: GraphData = graph_data_path
     else:
         self.graph_data: GraphData = GraphData.load(graph_data_path)
     self.doc_collection = doc_collection
     self.functionClassifier = FastTextClassifier()
     self.G = nx.Graph(self.graph_data.graph)
Beispiel #3
0
 def __init__(self, input_graph_version):
     self.save_expand_res_path = str(
         Path(OUTPUT_DIR) / "prefix_suffix_relations.pickle")
     self.api_id_2_record_text_path = str(
         Path(OUTPUT_DIR) / "api_id_2_record.pickle")
     self.api_id_2_record_text = Tool.load_pickle(
         self.api_id_2_record_text_path)
     graph_data_path = PathUtil.graph_data(pro_name="jabref",
                                           version=input_graph_version)
     self.graph_data = GraphData.load(graph_data_path)
     self.func_relation_set = {
         RelationNameConstant.has_Functionality_Relation,
         RelationNameConstant.Functionality_Compare_Relation,
         RelationNameConstant.has_Behavior_Relation,
     }
     self.concept_classification = {
         RelationNameConstant.Ontology_IS_A_Relation,
     }
     self.membership = {
         RelationNameConstant.Ontology_Derive_Relation,
     }
     self.characteristic = {
         RelationNameConstant.has_Feature_Relation,
         RelationNameConstant.has_Constraint_Relation,
     }
     self.category_name_2_id = dict()
     self.type_of_class = {
         CodeEntityCategory.CATEGORY_CLASS,
         CodeEntityCategory.CATEGORY_INTERFACE,
         CodeEntityCategory.CATEGORY_EXCEPTION_CLASS,
         CodeEntityCategory.CATEGORY_ERROR_CLASS,
         CodeEntityCategory.CATEGORY_ENUM_CLASS,
         CodeEntityCategory.CATEGORY_ANNOTATION_CLASS
     }
     self.type_of_method = {
         CodeEntityCategory.CATEGORY_METHOD,
         CodeEntityCategory.CATEGORY_CONSTRUCT_METHOD,
         CodeEntityCategory.CATEGORY_BASE_OVERRIDE_METHOD,
     }
     self.CODE_NAME_UTIL = CodeElementNameUtil()
Beispiel #4
0
from pathlib import Path

from sekg.pipeline.base import KGBuildPipeline

from definitions import OUTPUT_DIR
from project.extractor_module.structure_extractor.category_structure_extractor import CategoryStructureExtractor
from project.extractor_module.structure_extractor.characteristic_structure_extractor import CharacteristicStructureExtractor
from project.extractor_module.structure_extractor.func_name_extractor import FuncNameExtractor
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = CharacteristicStructureExtractor()
    component1.set_json_save_path(
        Path(OUTPUT_DIR) / "json" / "name_characteristic.json")
    component1.set_save_path(
        PathUtil.graph_data(pro_name=pro_name, version="v1.1"))
    pipeline.add_component("从名称和结构中抽取特征", component1)
    pipeline.run()

    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1.1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = FuncNameExtractor()
    component1.set_json_save_path(
        Path(OUTPUT_DIR) / "json" / "name_functionality.json")
    component1.set_save_path(
from project.utils.path_util import PathUtil
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection, MultiFieldDocument
from sekg.graph.exporter.graph_data import GraphData
from sekg.graph.exporter.graph_data import NodeInfo
import json
from definitions import OUTPUT_DIR
from pathlib import Path

pro_name = 'jabref'
dc_file_location = PathUtil.doc(pro_name=pro_name, version='v1')
graph_data_file_location = PathUtil.graph_data(pro_name=pro_name,
                                               version='v1.8')
dc_file_destination = PathUtil.doc(pro_name=pro_name, version='v1.1')
comment_json_file = Path(OUTPUT_DIR) / "json" / "mid_2_dp_comment.json"
qualified_name_json_file = Path(
    OUTPUT_DIR) / "json" / "mid_2_qualified_name.json"

if __name__ == '__main__':
    doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
        dc_file_location)
    graph_data: GraphData = GraphData.load(graph_data_file_location)

    comment_list = []
    comments = open(comment_json_file, 'r').readlines()
    for line in comments:
        comment_list.append(json.loads(line))

    qualified_name_list = []
    names = open(qualified_name_json_file, 'r').readlines()
    for line in names:
        qualified_name_list.append(json.loads(line))
"""

对方法进行分类
将方法分为: accessor, mutator, creational, constructor, undefined五类

"""

from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.utils.path_util import PathUtil
from nltk.corpus import wordnet as wn

pro_name = "jabref"
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.4")
graph_data: GraphData = GraphData.load(graph_data_path)

accessor_key_word = ("get", "toString", "find", "search", "test", "contains", "is", "has", "show")
mutator_key_word = ("set", "add", "delete", "move", "remove", "parse", "insert", "extract", "open")
creational_key_word = ("copy", "construct", "create")
nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}


def get_pure_method_name_without_parameter(qualified_name=None):
    if qualified_name is None or qualified_name is "":
        raise ValueError("qualified name needed")
    qualified_name = qualified_name[:qualified_name.find("(")]
    result = qualified_name[qualified_name.rfind(".")+1:]
    return result


# 根据一系列的key word去做最基本的划分
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.classification_module import method_classification
from nltk.corpus import wordnet as wn
from project.classification_module.method_classification import split

if __name__ == '__main__':
    # 1. 得到图中所有方法节点 2. qualified_name传入classification中做判断
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v1.6")
    graph_data: GraphData = GraphData.load(graph_data_path)
    graph_data_output_path = PathUtil.graph_data(pro_name=pro_name,
                                                 version='v1.7')
    methods_id: set = graph_data.get_node_ids_by_label("method")
    nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
    verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}

    count = [0, 0, 0, 0, 0]
    for i in iter(methods_id):
        node: NodeInfo = graph_data.find_nodes_by_ids(i)[0]
        qualified_name = node['properties']['qualified_name']
        label = method_classification.basic_classification(qualified_name)
        if label is "undefined":
            first_word = split(camel_case=qualified_name)
            if first_word in verbs:
                label = "mutator"
            if first_word in nouns:
                label = "accessor"

        if label is "accessor":
            count[0] += 1
Beispiel #8
0
from sekg.graph.exporter.graph_data import NodeInfo, GraphData
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection, MultiFieldDocument

from project.utils.path_util import PathUtil

pro_name = "jabref"
doc_path = PathUtil.doc(pro_name=pro_name, version="v1")
graph_data_path = PathUtil.graph_data(pro_name="jabref", version="v1")
graph_data = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_path)
# e.g. org.jabref.model.metadata.event.MetaDataChangedEvent
api_name = "org.jabref.model.metadata.event.MetaDataChangedEvent"
node = graph_data.find_one_node_by_property(property_name='qualified_name',
                                            property_value=api_name)
api_id = node["id"]
doc: MultiFieldDocument = doc_collection.get_by_id(api_id)
return_data = dict()
return_data['doc_info'] = dict()
return_data['api_name'] = api_name
return_data['doc_info']['full_html_description'] = doc.get_doc_text_by_field(
    'full_html_description')
return_data['doc_info']['full_description'] = doc.get_doc_text_by_field(
    'full_description')
return_data['doc_info']['sentence_description'] = doc.get_doc_text_by_field(
    'sentence_description')
print(return_data)
Beispiel #9
0
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from sekg.ir.doc.wrapper import MultiFieldDocument, MultiFieldDocumentCollection
import json
import definitions
from pathlib import Path

pro_name = 'jabref'
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version='v3.9')
doc_collection_path = PathUtil.doc(pro_name=pro_name, version='v3.1')
doc_collection_save_path = PathUtil.doc(pro_name=pro_name, version='v3.2')
api_to_example_json_path = Path(
    definitions.ROOT_DIR) / "output" / "json" / "api_2_example_sorted.json"
mid_to_method_info_json_path = Path(
    definitions.ROOT_DIR
) / "output" / "json" / "mid_2_method_info_without_comment.json"
graph_data: GraphData = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_collection_path)
'''
doc文件抽取样例代码
'''


def find_doc(qualified_name):
    node: NodeInfo = graph_data.find_one_node_by_property(
        property_name='qualified_name', property_value=qualified_name)
    if node is None:
        node: NodeInfo = graph_data.find_one_node_by_property_value_starts_with(
            property_name='qualified_name',
            property_value_starter=qualified_name)
Beispiel #10
0
            print(self.counter)
            self.graph_data.add_relation(start_id, relation_str, end_id)
        except Exception as e:
            print(e)

    def load_entity_words(self):
        load_dict = self.load_json(self.entity_path)
        for each in load_dict:
            self.entity_words.add(each["entity_name"])
            self.entity_2_score[each["entity_name"]] = each["tf_idf"]

    def load_json(self, path):
        with open(path, "r") as load_f:
            load_dict = json.load(load_f)
            return load_dict

    def save_graph(self, output_path):
        self.graph_data.save(output_path)


if __name__ == '__main__':
    pro_name = "jabref"
    data_dir = PathUtil.doc(pro_name=pro_name, version="v3.3")
    doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
        data_dir)
    entity_service = EntityService(doc_collection)
    entity_service.link_all_api_entity()
    entity_service.save_graph(
        str(PathUtil.graph_data(pro_name="jabref", version="v3.8")))
    print("counter:" + str(entity_service.counter))
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.classification_module import method_classification
from nltk.corpus import wordnet as wn
from project.classification_module.method_classification import split
'''
对method进行分类
'''

if __name__ == '__main__':
    # 1. 得到图中所有方法节点 2. qualified_name传入classification中做判断
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.4")
    graph_data: GraphData = GraphData.load(graph_data_path)
    graph_data_output_path = PathUtil.graph_data(pro_name=pro_name,
                                                 version='v3.5')
    methods_id: set = graph_data.get_node_ids_by_label("method")
    nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
    verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}

    count = [0, 0, 0, 0, 0]
    for i in iter(methods_id):
        node: NodeInfo = graph_data.find_nodes_by_ids(i)[0]
        qualified_name = node['properties']['qualified_name']
        label = method_classification.basic_classification(qualified_name)
        if label is "undefined":
            first_word = split(camel_case=qualified_name)
            if first_word in verbs:
                label = "mutator"
            if first_word in nouns:
                label = "accessor"
import networkx as nx

from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v1.3")
    output_path = PathUtil.graph_data(pro_name=pro_name, version="v1.4")

    graph_data: GraphData = GraphData.load(graph_data_path)
    nx_graph = nx.Graph(graph_data.graph)
    result = nx.pagerank(nx_graph)
    # todo: result是一个字典格式{node_id: pr_value},将其插入graph相应节点的node['properties']['pr_value']中
    for i in range(41167):
        node: NodeInfo = graph_data.find_nodes_by_ids(i + 1)[0]
        node["properties"]["pr_value"] = result[i + 1]
    graph_data.save(output_path)
Beispiel #13
0
                node_id)
            if node_doc:
                full_description = node_doc.get_doc_text_by_field(
                    'full_description')
                for concept_list_item in self.concepts_list:
                    concept_node_id = -1
                    for concept in concept_list_item:
                        if concept in self.concept_2_node_id:
                            concept_node_id = self.concept_2_node_id[concept]
                            break
                    if concept_node_id >= 0:
                        for concept in concept_list_item:
                            if concept in full_description:
                                self.graph.add_relation(
                                    node_id, "has concept", concept_node_id)
                                break
        print("relation添加完毕")


if __name__ == "__main__":
    concept_and_relation_path = Path(DATA_DIR) / "concept_and_relation"
    concept_2_graph = Concept2Graph(
        PathUtil.graph_data("jabref", "v3.8"),
        PathUtil.doc(pro_name="jabref", version='v3.3'),
        str(concept_and_relation_path / "concepts.json"),
        str(concept_and_relation_path / "relations.json"))
    concept_2_graph.add_concept_2_graph()
    concept_2_graph.add_relation_2_graph()
    concept_2_graph.graph.save(PathUtil.graph_data("jabref", "v3.9"))
    print("图导入完成")
Beispiel #14
0
from pathlib import Path

from sekg.pipeline.base import KGBuildPipeline

from definitions import OUTPUT_DIR
from project.extractor_module.structure_extractor.characteristic_structure_extractor import CharacteristicStructureExtractor
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = CharacteristicStructureExtractor()
    component1.set_json_save_path(Path(OUTPUT_DIR) / "json" / "name_characteristic.json")
    component1.set_save_path(PathUtil.graph_data(pro_name=pro_name, version="v1.1"))
    pipeline.add_component("从名称和结构中抽取特征", component1)
    pipeline.run()
Beispiel #15
0
        label_info = {"entity"}
        type_class = self.get_record_entity_type_by_relation(statement.r_name)
        label_info.add(str(type_class.LABEL))
        label_info.add(str("statement"))
        node_properties = {
            type_class.PRIMARY_PROPERTY_NAME: statement.e_name,
        }
        for extra_info_key in statement.extra_info:
            node_properties[extra_info_key] = statement.extra_info[
                extra_info_key]
        node_properties["which_extractor"] = statement.which_extractor
        node_properties["e_type"] = statement.e_type
        node_properties["s_name"] = statement.s_name
        node_properties["r_name"] = statement.r_name
        graph_id = self.graph_data.add_node(
            label_info,
            node_properties,
            primary_property_name=type_class.PRIMARY_PROPERTY_NAME)
        return graph_id


if __name__ == '__main__':
    start_time = time.asctime(time.localtime(time.time()))
    print(start_time)
    api_diff_graph_builder = APIDiffGraphBuilder(input_graph_version="v1.4")
    api_diff_graph_builder.build_simple_graph()
    api_diff_graph_builder.graph_data.save(
        PathUtil.graph_data(pro_name="jabref", version="v1.5"))
    end_time = time.asctime(time.localtime(time.time()))
    print(end_time)