Python GraphDataの例、kgdt.models.graph.GraphData Pythonの例

コード例 #1

0

ファイルを表示

ファイル: base.py プロジェクト: FudanSELab/kgdt

 def __init__(self):
     self.__name2component = {}
     self.__component_order = []
     self.__graph_data = GraphData()
     self.__doc_collection = MultiFieldDocumentCollection()
     self.__before_run_component_listeners = {}
     self.__after_run_component_listeners = {}

コード例 #2

0

ファイルを表示

    def train(cls, graph_data: GraphData or str or Path, *properties):
        """
        train the kg name searcher model from a graph data object by specifying name properties.
        :param properties: the properties that need to be searched on, could be more than one. e.g., "name","qualified_name","labels_en"
        :param graph_data:the path of graph data.
        :return:
        """
        # todo: add some config arguments, to control whether lower the case, split the words.
        if graph_data == None:
            raise Exception("Input GraphData object not exist")

        graph_data_source = None
        if type(graph_data) == str:
            graph_data_source: GraphData = GraphData.load(graph_data)
        if type(graph_data) == Path:
            graph_data_source: GraphData = GraphData.load(str(graph_data))
        if type(graph_data) == GraphData:
            graph_data_source = graph_data

        if graph_data_source is None:
            raise Exception("can't find the graph data")

        searcher = cls()
        searcher.start_training(graph_data_source, *properties)
        return searcher

コード例 #3

0

ファイルを表示

ファイル: base.py プロジェクト: FudanSELab/kgdt

    def load_graph(self, graph_data_path):
        self.__graph_data = GraphData.load(graph_data_path)
        # update component graph data
        for component_name in self.__component_order:
            component: Component = self.__name2component[component_name]
            component.set_graph_data(self.__graph_data)

        print("load graph")

コード例 #4

0

ファイルを表示

ファイル: neo4j.py プロジェクト: FudanSELab/kgdt

    def node_csv2graphdata(file, graph: GraphData = None, csv_id=GraphData.DEFAULT_KEY_NODE_ID, csv_labels =GraphData.DEFAULT_KEY_NODE_LABELS):
        '''
        :param file:  节点csv文件的全路径
        :param graph: 将要导入csv的graph，将要导入的graphdata，没有传参则新建
        :param csv_id: csv文件id所在列的列名，默认是id
        :param csv_labels: csv文件labels所在列的列名，默认是labels
        :return: 导入节点后的graphdata
        '''

        if not graph:
            graph = GraphData()
        count = 0
        with open(file, 'r', encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                row = dict(row)
                node_id = None
                node_labels = set([])
                node_dic = {}
                for row_k, row_v in row.items():
                    if row_k == csv_id:
                        node_id = eval(row_v)
                        continue
                    if row_k == csv_labels:
                        node_labels = eval(row_v)
                        continue
                    if row_v == '':
                        continue
                    if row_v[0] == '[':
                        try:
                            row_v_list = eval(row_v)
                            node_dic[row_k] = row_v_list
                        except BaseException:
                            node_dic[row_k] = row_v
                        continue
                    try:
                        row_v_int = int(row_v)
                        node_dic[row_k] = row_v_int
                    except BaseException:
                        node_dic[row_k] = row_v
                result = graph.add_node(node_labels, node_dic, node_id)
                if result != -1:
                    count = count + 1
        print("从", file, "一共导入graphdata节点个数:   ", count)
        return graph

コード例 #5

0

ファイルを表示

ファイル: neo4j.py プロジェクト: FudanSELab/kgdt

    def import_all_graph_data(self, graph_data: GraphData, clear=True):
        """
        import all data in one GraphData into neo4j and create index on node
        :param graph_data:
        :param clear: clear the graph content, default is not clear the graph contain
        :return:
        """
        index_accessor = IndexGraphAccessor(self.graph_accessor)
        index_accessor.create_index(label=self.DEFAULT_LABEL, property_name=self.DEFAULT_PRIMARY_KEY)

        if clear:
            self.graph_accessor.delete_all_relations()
            self.graph_accessor.delete_all_nodes()

        # todo: this is slow, need to speed up, maybe not commit on every step
        all_node_ids = graph_data.get_node_ids()
        for node_id in all_node_ids:
            ## todo: fix this by not using 'properties','labels'
            node_info_dict = graph_data.get_node_info_dict(node_id)
            properties = node_info_dict['properties']
            labels = node_info_dict['labels']
            self.import_one_entity(node_id, properties, labels)

        print("all entity imported")
        relations = graph_data.get_relations()
        for r in relations:
            start_node_id, r_name, end_node_id = r
            start_node = self.graph_accessor.find_node(primary_label=self.DEFAULT_LABEL,
                                                       primary_property=self.DEFAULT_PRIMARY_KEY,
                                                       primary_property_value=start_node_id)
            end_node = self.graph_accessor.find_node(primary_label=self.DEFAULT_LABEL,
                                                     primary_property=self.DEFAULT_PRIMARY_KEY,
                                                     primary_property_value=end_node_id)

            if start_node is not None and end_node is not None:
                try:
                    self.graph_accessor.create_relation_without_duplicate(start_node, r_name, end_node)
                except Exception as e:
                    traceback.print_exc()
            else:
                print("fail create relation because start node or end node is none.")
        print("all relation imported")

        print("all graph data import finish")

コード例 #6

0

ファイルを表示

 def __init__(self, graph_data=None, doc_collection=None):
     if graph_data is not None:
         self.graph_data = graph_data
     else:
         self.graph_data = GraphData()
     if doc_collection is not None:
         self.doc_collection = doc_collection
     else:
         self.doc_collection = MultiFieldDocumentCollection()
     self.__before_run_listeners = []
     self.__after_run_listeners = []

コード例 #7

0

ファイルを表示

    def start_training(self, graph_data: GraphData, *properties):
        """
        start train the kg name searcher model from a graph data object by specifying name properties.
        :param properties: the properties that need to be searched on. e.g., "name","qualified_name","labels_en"
        :param graph_data: the GraphData instance
        :return:
        """
        # todo: add some config arguments, to control whether lower the case, split the words.
        self.clear()
        for node_id in graph_data.get_node_ids():
            node_properties = graph_data.get_properties_for_node(
                node_id=node_id)
            for property_name in properties:
                property_value = node_properties.get(property_name, None)
                if not property_value:
                    continue
                if type(property_value) == list or type(property_value) == set:
                    iterable_property_values = property_value
                    for single_value in iterable_property_values:
                        self.add_from_property_value(single_value, node_id)

                else:
                    single_value = property_value
                    self.add_from_property_value(single_value, node_id)

コード例 #8

0

ファイルを表示

ファイル: neo4j.py プロジェクト: FudanSELab/kgdt

    def export_all_graph_data(self, graph, node_label):
        accessor = DataExporterAccessor(graph=graph)
        nodes = accessor.get_all_nodes(node_label=node_label)
        graph_data = GraphData()

        for node in nodes:
            labels = [label for label in node.labels]
            graph_data.add_node(node_id=node.identity, node_labels=labels, node_properties=dict(node))

        print("load entity complete, num=%d" % len(nodes))
        relations = accessor.get_all_relation(node_label=node_label)
        print("load relation complete,num=%d" % len(relations))
        graph_data.set_relations(relations=relations)

        return graph_data

コード例 #9

0

ファイルを表示

ファイル: neo4j.py プロジェクト: FudanSELab/kgdt

 def relation_csv2graphdata(file, graph=None, start_name=GraphData.DEFAULT_KEY_RELATION_START_ID,
                            relation_type_name=GraphData.DEFAULT_KEY_RELATION_TYPE, end_name=GraphData.DEFAULT_KEY_RELATION_END_ID):
     '''
     :param file: 关系csv文件的全路径
     :param graph: 将要导入的graphdata，没有传参则新建
     :param start_name: csv文件关系开始点ID那一列的列名，默认是startId
     :param relation_type_name: csv文件关系类型那一列的列名，默认是 relationType
     :param end_name: csv文件关系结束点ID那一列的列列名，默认是endId
     :return: 导入完成的graphdata
     '''
     count = 0
     if not graph:
         return GraphData()
     with open(file, 'r', encoding="utf-8") as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             row = dict(row)
             if row[start_name] != '' and row[relation_type_name] != '' and row[end_name] != '':
                 result = graph.add_relation(int(row[start_name]), row[relation_type_name], int(row[end_name]))
                 if result:
                     count = count + 1
     print("从", file, "一共导入graphdata关系个数:   ", count)
     return graph

コード例 #10

0

ファイルを表示