def get_connection_multigraph_weighted(name2dp, connections): G = MultiDiGraph() for c in connections: dp1 = c.dp1 dp2 = c.dp2 if not G.has_edge(dp1, dp2): already = [] G.add_edge(dp1, dp2) else: already = G.edge[dp1][dp2]['spaces'] R = name2dp[c.dp1].get_rtype(c.s1) already.append(R) G.edge[dp1][dp2]['spaces'] = already # cycles = list(simple_cycles(G)) # for cycle in cycles: # cycle = list(cycle) # cycle = cycle + [cycle[0]] # # for i in range(len(cycle) - 1): # # XXX # _val = G.edge[cycle[i]][cycle[i + 1]]['spaces'] # # print('%s -> %s -> %s' % (cycle[i], val, cycle[i + 1])) return G
def get_connection_multigraph_weighted(name2dp, connections): G = MultiDiGraph() for c in connections: dp1 = c.dp1 dp2 = c.dp2 if not G.has_edge(dp1, dp2): already = [] G.add_edge(dp1, dp2) else: already = G.edge[dp1][dp2]['spaces'] R = name2dp[c.dp1].get_rtype(c.s1) already.append(R) G.edge[dp1][dp2]['spaces'] = already # cycles = list(simple_cycles(G)) # for cycle in cycles: # cycle = list(cycle) # cycle = cycle + [cycle[0]] # # for i in range(len(cycle) - 1): # # XXX # _val = G.edge[cycle[i]][cycle[i + 1]]['spaces'] # # print('%s -> %s -> %s' % (cycle[i], val, cycle[i + 1])) return G
def add_all_edges(g1: nx.MultiDiGraph, g2: nx.MultiDiGraph, preserve: bool = True) -> int: """ Add all edges from source graph (``g2``) to target graph (``g1``). Parameters ---------- g1: networkx.MultiDiGraph Target graph g2: networkx.MultiDiGraph Source graph preserve: bool Whether or not to preserve conflicting properties Returns ------- int Number of edges merged during this operation """ logging.info(f"Adding {g2.number_of_edges()} edges from {g2} to {g1}") merge_count = 0 for u, v, key, data in g2.edges(keys=True, data=True): if g1.has_edge(u, v, key): merge_edge(g1, u, v, key, data, preserve) merge_count += 1 else: g1.add_edge(u, v, key, **data) return merge_count
def calculate_dijkstra(source: int, layered_graph: LayeredGraph, ) -> Tuple[MultiDiGraph, MultiDiGraph]: node_info = {node: {COST: inf, ARC: None, VISITED: False, DEPTH: inf} for node in layered_graph} node_info[source][COST] = 0 node_info[source][DEPTH] = 0 tree = MultiDiGraph() to_visit = [] insert(to_visit, (0, source)) images = {node: [] for node in layered_graph.origin_nodes} insert(images[layered_graph.origin_node_index(source)], ((0, 0), source)) leafs = set() while len(to_visit) > 0: to_open = extract_minimum(to_visit)[1] if node_info[to_open][VISITED]: continue node_info[to_open][VISITED] = True for arc in layered_graph.out_edges(to_open, data=True): dest = arc[1] if not layered_graph.out_edges(dest): leafs.add(dest) new_cost = node_info[to_open][COST] + arc[2][WEIGHT] new_depth = node_info[to_open][DEPTH] + 1 if node_info[dest][COST] > new_cost: node_info[dest][COST] = new_cost node_info[dest][ARC] = arc node_info[dest][DEPTH] = new_depth insert(to_visit, (new_cost, dest)) insert(images[layered_graph.origin_node_index(dest)], ((new_cost, new_depth), dest)) if tree.has_edge(to_open, dest): # TODO change on link to arc tree.remove_edge(to_open, dest) tree.add_edge(to_open, dest, COST=new_cost) # node_info truncation - усечение дерева not_truncated_tree = MultiDiGraph(tree) while leafs: leaf = leafs.pop() path_cost = node_info[leaf][COST] path_depth = node_info[leaf][DEPTH] if nsmallest(1, images[layered_graph.origin_node_index(leaf)]) != ((path_cost, path_depth), leaf): parent = node_info[leaf][ARC][0] tree.remove_node(leaf) if not tree.out_edges(parent): leafs.add(parent) return tree, not_truncated_tree
def distributed_induction(graph: nx.MultiDiGraph, sample: nx.MultiDiGraph, partition_map: PartitionMap, ownership: Set[Vertex]): # Step 1: Get non-sampled edges non-owned nodes edge_queries = [[] for _ in range(mpi.size)] for edge in filter( lambda e: not sample.has_edge(*e) and sample.has_node(e[0]), graph.edges): owners = partition_map.get_owners(edge[1]) edge_queries[random.choice(owners)].append( edge) # Select only one of the owners randomly # Step 2: Resolve induction of owned nodes for edge in edge_queries[mpi.rank]: if edge[1] in ownership: sample.add_edge(*edge) edge_queries[mpi.rank].clear() # Step 3: Query each node's owner for query_inductions(sample, edge_queries, ownership)
class NxGraph(BaseGraph): """ NxGraph is a wrapper that provides methods to interact with a networkx.MultiDiGraph. NxGraph extends kgx.graph.base_graph.BaseGraph and implements all the methods from BaseGraph. """ def __init__(self): super().__init__() self.graph = MultiDiGraph() self.name = None def add_node(self, node: str, **kwargs: Any) -> None: """ Add a node to the graph. Parameters ---------- node: str Node identifier **kwargs: Any Any additional node properties """ if "data" in kwargs: data = kwargs["data"] else: data = kwargs self.graph.add_node(node, **data) def add_edge(self, subject_node: str, object_node: str, edge_key: str = None, **kwargs: Any) -> None: """ Add an edge to the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key kwargs: Any Any additional edge properties """ if "data" in kwargs: data = kwargs["data"] else: data = kwargs return self.graph.add_edge(subject_node, object_node, key=edge_key, **data) def add_node_attribute(self, node: str, attr_key: str, attr_value: Any) -> None: """ Add an attribute to a given node. Parameters ---------- node: str The node identifier attr_key: str The key for an attribute attr_value: Any The value corresponding to the key """ self.graph.add_node(node, **{attr_key: attr_value}) def add_edge_attribute( self, subject_node: str, object_node: str, edge_key: Optional[str], attr_key: str, attr_value: Any, ) -> None: """ Add an attribute to a given edge. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key attr_key: str The attribute key attr_value: Any The attribute value """ self.graph.add_edge(subject_node, object_node, key=edge_key, **{attr_key: attr_value}) def update_node_attribute(self, node: str, attr_key: str, attr_value: Any, preserve: bool = False) -> Dict: """ Update an attribute of a given node. Parameters ---------- node: str The node identifier attr_key: str The key for an attribute attr_value: Any The value corresponding to the key preserve: bool Whether or not to preserve existing values for the given attr_key Returns ------- Dict A dictionary corresponding to the updated node properties """ node_data = self.graph.nodes[node] updated = prepare_data_dict(node_data, {attr_key: attr_value}, preserve=preserve) self.graph.add_node(node, **updated) return updated def update_edge_attribute( self, subject_node: str, object_node: str, edge_key: Optional[str], attr_key: str, attr_value: Any, preserve: bool = False, ) -> Dict: """ Update an attribute of a given edge. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key attr_key: str The attribute key attr_value: Any The attribute value preserve: bool Whether or not to preserve existing values for the given attr_key Returns ------- Dict A dictionary corresponding to the updated edge properties """ e = self.graph.edges((subject_node, object_node, edge_key), keys=True, data=True) edge_data = list(e)[0][3] updated = prepare_data_dict(edge_data, {attr_key: attr_value}, preserve) self.graph.add_edge(subject_node, object_node, key=edge_key, **updated) return updated def get_node(self, node: str) -> Dict: """ Get a node and its properties. Parameters ---------- node: str The node identifier Returns ------- Dict The node dictionary """ n = {} if self.graph.has_node(node): n = self.graph.nodes[node] return n def get_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> Dict: """ Get an edge and its properties. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key Returns ------- Dict The edge dictionary """ e = {} if self.graph.has_edge(subject_node, object_node, edge_key): e = self.graph.get_edge_data(subject_node, object_node, edge_key) return e def nodes(self, data: bool = True) -> Dict: """ Get all nodes in a graph. Parameters ---------- data: bool Whether or not to fetch node properties Returns ------- Dict A dictionary of nodes """ return self.graph.nodes(data) def edges(self, keys: bool = False, data: bool = True) -> Dict: """ Get all edges in a graph. Parameters ---------- keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- Dict A dictionary of edges """ return self.graph.edges(keys=keys, data=data) def in_edges(self, node: str, keys: bool = False, data: bool = False) -> List: """ Get all incoming edges for a given node. Parameters ---------- node: str The node identifier keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- List A list of edges """ return self.graph.in_edges(node, keys=keys, data=data) def out_edges(self, node: str, keys: bool = False, data: bool = False) -> List: """ Get all outgoing edges for a given node. Parameters ---------- node: str The node identifier keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- List A list of edges """ return self.graph.out_edges(node, keys=keys, data=data) def nodes_iter(self) -> Generator: """ Get an iterable to traverse through all the nodes in a graph. Returns ------- Generator A generator for nodes where each element is a Tuple that contains (node_id, node_data) """ for n in self.graph.nodes(data=True): yield n def edges_iter(self) -> Generator: """ Get an iterable to traverse through all the edges in a graph. Returns ------- Generator A generator for edges where each element is a 4-tuple that contains (subject, object, edge_key, edge_data) """ for u, v, k, data in self.graph.edges(keys=True, data=True): yield u, v, k, data def remove_node(self, node: str) -> None: """ Remove a given node from the graph. Parameters ---------- node: str The node identifier """ self.graph.remove_node(node) def remove_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> None: """ Remove a given edge from the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key """ self.graph.remove_edge(subject_node, object_node, edge_key) def has_node(self, node: str) -> bool: """ Check whether a given node exists in the graph. Parameters ---------- node: str The node identifier Returns ------- bool Whether or not the given node exists """ return self.graph.has_node(node) def has_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> bool: """ Check whether a given edge exists in the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key Returns ------- bool Whether or not the given edge exists """ return self.graph.has_edge(subject_node, object_node, key=edge_key) def number_of_nodes(self) -> int: """ Returns the number of nodes in a graph. Returns ------- int """ return self.graph.number_of_nodes() def number_of_edges(self) -> int: """ Returns the number of edges in a graph. Returns ------- int """ return self.graph.number_of_edges() def degree(self): """ Get the degree of all the nodes in a graph. """ return self.graph.degree() def clear(self) -> None: """ Remove all the nodes and edges in the graph. """ self.graph.clear() @staticmethod def set_node_attributes(graph: BaseGraph, attributes: Dict) -> None: """ Set nodes attributes from a dictionary of key-values. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attributes: Dict A dictionary of node identifier to key-value pairs """ return set_node_attributes(graph.graph, attributes) @staticmethod def set_edge_attributes(graph: BaseGraph, attributes: Dict) -> None: """ Set nodes attributes from a dictionary of key-values. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attributes: Dict A dictionary of node identifier to key-value pairs Returns ------- Any """ return set_edge_attributes(graph.graph, attributes) @staticmethod def get_node_attributes(graph: BaseGraph, attr_key: str) -> Dict: """ Get all nodes that have a value for the given attribute ``attr_key``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attr_key: str The attribute key Returns ------- Dict A dictionary where nodes are the keys and the values are the attribute values for ``key`` """ return get_node_attributes(graph.graph, attr_key) @staticmethod def get_edge_attributes(graph: BaseGraph, attr_key: str) -> Dict: """ Get all edges that have a value for the given attribute ``attr_key``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attr_key: str The attribute key Returns ------- Dict A dictionary where edges are the keys and the values are the attribute values for ``attr_key`` """ return get_edge_attributes(graph.graph, attr_key) @staticmethod def relabel_nodes(graph: BaseGraph, mapping: Dict) -> None: """ Relabel identifiers for a series of nodes based on mappings. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify mapping: Dict A dictionary of mapping where the key is the old identifier and the value is the new identifier. """ relabel_nodes(graph.graph, mapping, copy=False)
class GraphBuilder: def __init__(self, model: Model): self.model = model self.graph = MultiDiGraph() self.nodes = 0 self.edges = 0 def add_from_json(self, js: Json) -> None: if "id" in js and Section.reported in js: self.add_node( js["id"], js[Section.reported], js.get(Section.desired, None), js.get(Section.metadata, None), js.get("search", None), js.get("replace", False) is True, ) elif "from" in js and "to" in js: self.add_edge(js["from"], js["to"], js.get("edge_type", EdgeType.default)) else: raise AttributeError(f"Format not understood! Got {json.dumps(js)} which is neither vertex nor edge.") def add_node( self, node_id: str, reported: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None, search: Optional[str] = None, replace: bool = False, ) -> None: self.nodes += 1 # validate kind of this reported json coerced = self.model.check_valid(reported) reported = reported if coerced is None else coerced kind = self.model[reported] # create content hash sha = GraphBuilder.content_hash(reported, desired, metadata) # flat all properties into a single string for search flat = search if isinstance(search, str) else (GraphBuilder.flatten(reported, kind)) self.graph.add_node( node_id, id=node_id, reported=reported, desired=desired, metadata=metadata, hash=sha, kind=kind, kinds=list(kind.kind_hierarchy()), kinds_set=kind.kind_hierarchy(), flat=flat, replace=replace | metadata.get("replace", False) is True if metadata else False, ) def add_edge(self, from_node: str, to_node: str, edge_type: str) -> None: self.edges += 1 key = GraphAccess.edge_key(from_node, to_node, edge_type) self.graph.add_edge(from_node, to_node, key, edge_type=edge_type) @staticmethod def content_hash(js: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None) -> str: sha256 = hashlib.sha256() # all content hashes will be different, when the version changes sha256.update(ContentHashVersion.to_bytes(2, "big")) sha256.update(json.dumps(js, sort_keys=True).encode("utf-8")) if desired: sha256.update(json.dumps(desired, sort_keys=True).encode("utf-8")) if metadata: sha256.update(json.dumps(metadata, sort_keys=True).encode("utf-8")) return sha256.hexdigest() @staticmethod def flatten(js: Json, kind: Kind) -> str: result = "" def dispatch(value: Any, k: Kind) -> None: nonlocal result if isinstance(value, dict): for prop, elem in value.items(): sub = ( k.property_kind_of(prop, AnyKind()) if isinstance(k, ComplexKind) else (k.value_kind if isinstance(k, DictionaryKind) else AnyKind()) ) dispatch(elem, sub) elif isinstance(value, list): sub = k.inner if isinstance(k, ArrayKind) else AnyKind() for elem in value: dispatch(elem, sub) elif value is None or isinstance(value, bool): pass else: # in case of date time: "2017-05-30T22:04:34Z" -> "2017-05-30 22:04:34" if isinstance(k, DateTimeKind): value = re.sub("[ZT]", " ", value) if result: result += " " result += str(value).strip() dispatch(js, kind) return result def check_complete(self) -> None: # check that all vertices are given, that were defined in any edge definition # note: DiGraph will create an empty vertex node automatically for node_id, node in self.graph.nodes(data=True): assert node.get(Section.reported), f"{node_id} was used in an edge definition but not provided as vertex!" edge_types = {edge[2] for edge in self.graph.edges(data="edge_type")} al = EdgeType.all assert not edge_types.difference(al), f"Graph contains unknown edge types! Given: {edge_types}. Known: {al}" # make sure there is only one root node rid = GraphAccess.root_id(self.graph) root_node = self.graph.nodes[rid] # make sure the root if value_in_path(root_node, NodePath.reported_kind) == "graph_root" and rid != "root": # remove node with wrong id + root_node = self.graph.nodes[rid] root_node["id"] = "root" self.graph.add_node("root", **root_node) for succ in list(self.graph.successors(rid)): for edge_type in EdgeType.all: key = GraphAccess.edge_key(rid, succ, edge_type) if self.graph.has_edge(rid, succ, key): self.graph.remove_edge(rid, succ, key) self.add_edge("root", succ, edge_type) self.graph.remove_node(rid)
class GraphData(SaveLoad): """ the store of a graph data. each node is represent as a dict of node info named 'node_json', Example Format for 'node_json': { "id": 1, "properties": {"name":"bob","age":1}, "labels": ["entity","man"] } >>> graphdata=GraphData() graphdata.create_index_on_property("name","aliases","qualified_name") # save a graphdata to disk graphdata.save("test.v1.graph") # load a graphdata from disk graphdata=Graphdata.load("test.v1.graph") >>> """ DEFAULT_KEY_NODE_ID = "id" # the key name for the node id, every node must have it. DEFAULT_KEY_NODE_PROPERTIES = "properties" # the key name for the node properties, every node must have it. DEFAULT_KEY_NODE_LABELS = "labels" # the key name for the node labels, every node must have it. DEFAULT_KEYS = [ DEFAULT_KEY_NODE_ID, DEFAULT_KEY_NODE_PROPERTIES, DEFAULT_KEY_NODE_LABELS ] UNASSIGNED_NODE_ID = -1 # a node without a id specify, a newly created node, its id is -1 DEFAULT_KEY_RELATION_START_ID = "startId" DEFAULT_KEY_RELATION_TYPE = "relationType" DEFAULT_KEY_RELATION_END_ID = "endId" def __init__(self): # two map for self.__init_graph() def clear(self): self.__init_graph() def __init_graph(self): self.graph = MultiDiGraph() self.max_node_id = 0 self.label_to_ids_map = {} self.index_collection = GraphIndexCollection() self.relation_type_to_num_map = {} def create_index_on_property(self, *property_name_list): """ create index on some properties. It makes the query on the corresponding property faster. :param property_name_list: one or one more property names. :return: """ self.index_collection.create_index_on_property(*property_name_list) def find_all_shortest_paths(self, startId, endId): """ 找到所有的最短路 :param startId: :param endId: :return: """ shortest_paths = all_shortest_paths(self.graph, startId, endId) return shortest_paths def find_shortest_path(self, startId, endId): """ 找到一个最短路 :param startId: :param endId: :return: """ shortest_paths = shortest_path(self.graph, startId, endId) return shortest_paths def set_nodes(self, nodes): for n in nodes: self.add_node(node_id=n[self.DEFAULT_KEY_NODE_ID], node_properties=n[self.DEFAULT_KEY_NODE_PROPERTIES], node_labels=n[self.DEFAULT_KEY_NODE_LABELS]) def add_labels(self, *labels): """ add a list of label to the graph :param labels: :return: """ for label in labels: if not label: return if label not in self.label_to_ids_map.keys(): self.label_to_ids_map[label] = set([]) def add_label_by_node_id(self, node_id, label): """ add a label to a node :param node_id: the node id which the label need to add :param label: the label that need to added :return: True, add successful.False, add fail. """ if not label: return False node_json = self.get_node_info_dict(node_id) if not node_json: return False node_json[GraphData.DEFAULT_KEY_NODE_LABELS].add(label) self.label_to_ids_map[label].add(node_id) return True def get_node_ids_by_label(self, label): if label not in self.label_to_ids_map.keys(): return set([]) return self.label_to_ids_map[label] def add_label_by_label(self, label, new_label): """ add a label to node in graph, the node must has the specific label :param new_label: the new_label add to node :param label: the node must has the label :return: """ for node_id in self.get_node_ids_by_label(label): self.add_label_by_node_id(node_id, new_label) def add_label_to_all(self, label): """ add a label to node in graph :param label: :return: """ if not label: return self.add_labels(label) for node_id in self.get_node_ids(): self.add_label_by_node_id(node_id, label) def add_node(self, node_labels, node_properties, node_id=UNASSIGNED_NODE_ID, primary_property_name=""): """ add a node json to the graph :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_name:make sure the node_json["properties"][primary_property_name] is unique in GraphData. if no passing, the node json will be add to graph without check. otherwise, only the node json with unique property value ( property value is got by primary_property_name ) will be added to the GraphData. :return:-1, means that adding node json fail. otherwise, return the id of the newly added node """ if primary_property_name: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID node_json = self.find_one_node_by_property( property_name=primary_property_name, property_value=node_properties[primary_property_name]) if node_json: return node_json[self.DEFAULT_KEY_NODE_ID] if node_id == self.UNASSIGNED_NODE_ID: node_id = self.max_node_id + 1 self.max_node_id = self.max_node_id + 1 new_node_json = { self.DEFAULT_KEY_NODE_ID: node_id, self.DEFAULT_KEY_NODE_PROPERTIES: node_properties, self.DEFAULT_KEY_NODE_LABELS: set(node_labels) } self.graph.add_node(node_id, **new_node_json) if self.max_node_id < node_id: self.max_node_id = node_id self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node( node_id=node_id, node_properties=new_node_json[ GraphData.DEFAULT_KEY_NODE_PROPERTIES]) return node_id def update_node_property_by_node_id(self, node_id, node_properties): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID node_json = self.get_node_info_dict(node_id) update_node_id = node_json[self.DEFAULT_KEY_NODE_ID] update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS] for k, v in node_properties.items(): update_node_properties[k] = v update_node_json = { self.DEFAULT_KEY_NODE_ID: update_node_id, self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties, self.DEFAULT_KEY_NODE_LABELS: update_node_labels } self.graph.add_node(update_node_id, **update_node_json) self.index_collection.add_node(node_id=update_node_id, node_properties=update_node_properties) return update_node_id def update_node_by_node_id(self, node_id, node_labels, node_properties): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID node_json = self.get_node_info_dict(node_id) update_node_id = node_json[self.DEFAULT_KEY_NODE_ID] update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS] for k, v in node_properties.items(): update_node_properties[k] = v for label in node_labels: update_node_labels.add(label) update_node_json = { self.DEFAULT_KEY_NODE_ID: update_node_id, self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties, self.DEFAULT_KEY_NODE_LABELS: update_node_labels } self.graph.add_node(update_node_id, **update_node_json) self.add_labels(*update_node_labels) for label in update_node_labels: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node(node_id=update_node_id, node_properties=update_node_properties) return update_node_id def update_node_property_value_by_node_id(self, node_id, node_property_name, node_proprty_value): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID if node_property_name == "": return node_id node_property = {node_property_name: node_proprty_value} return self.update_node_property_by_node_id(node_id, node_property) def remove_node(self, node_id): if node_id not in self.graph.nodes: return None # print(type(self.graph.nodes)) node_json = self.graph.nodes[node_id] in_relations = set(self.graph.in_edges(node_id, keys=True)) out_relations = set(self.graph.out_edges(node_id, keys=True)) self.graph.remove_node(node_id) for label in node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].remove(node_id) self.index_collection.remove_node(node_id) return node_json, out_relations, in_relations def remove_all_nodes(self): ids = self.get_node_ids() for id in ids: self.remove_node(id) return True def merge_node(self, node_labels, node_properties, primary_property_name): """ merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node. we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node. properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used. :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_name: The name of the property to check, the merged node and the new node are the same on this property. :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change. """ if not primary_property_name: print("primary_property_name must given on merge") return GraphData.UNASSIGNED_NODE_ID if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID node_json = self.find_one_node_by_property( property_name=primary_property_name, property_value=node_properties[primary_property_name]) if not node_json: return self.add_node(node_labels=node_labels, node_properties=node_properties, node_id=GraphData.UNASSIGNED_NODE_ID) merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID] merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] for k, v in node_properties.items(): merge_properties[k] = v merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in node_labels: merge_labels.add(label) return self.add_node(node_labels=merge_labels, node_properties=merge_properties, node_id=merge_node_id) def add_node_with_multi_primary_property(self, node_labels, node_properties, node_id=UNASSIGNED_NODE_ID, primary_property_names=None): """ add a node json to the graph :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_names:a list of primary properties. make sure the node_json["properties"][primary_property_name] is unique in GraphData. if no passing, the node json will be add to graph without check. otherwise, only the node json with unique property value ( property value is got by primary_property_name ) will be added to the GraphData. :return:-1, means that adding node json fail. otherwise, return the id of the newly added node """ if primary_property_names is None: primary_property_names = [] match_properties = {} for primary_property_name in primary_property_names: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID match_properties[primary_property_name] = node_properties[ primary_property_name] node_json = self.find_one_node_by_properties(**match_properties) if node_json: return node_json[self.DEFAULT_KEY_NODE_ID] if node_id == self.UNASSIGNED_NODE_ID: node_id = self.max_node_id + 1 new_node_json = { self.DEFAULT_KEY_NODE_ID: node_id, self.DEFAULT_KEY_NODE_PROPERTIES: node_properties, self.DEFAULT_KEY_NODE_LABELS: set(node_labels) } self.graph.add_node(node_id, **new_node_json) if self.max_node_id < node_id: self.max_node_id = node_id self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node( node_id=node_id, node_properties=new_node_json[ GraphData.DEFAULT_KEY_NODE_PROPERTIES]) return node_id def merge_node_with_multi_primary_property(self, node_labels, node_properties, primary_property_names=None): """ merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node. we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node. properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used. :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_names: The list of name of the property to check, the merged node and the new node are the same on this property. :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change. """ if not primary_property_names: print("primary_property_names must given on merge") return GraphData.UNASSIGNED_NODE_ID match_properties = {} for primary_property_name in primary_property_names: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID match_properties[primary_property_name] = node_properties[ primary_property_name] node_json = self.find_one_node_by_properties(**match_properties) if not node_json: return self.add_node(node_labels=node_labels, node_properties=node_properties, node_id=GraphData.UNASSIGNED_NODE_ID) merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID] merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] for k, v in node_properties.items(): merge_properties[k] = v merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in node_labels: merge_labels.add(label) return self.add_node(node_labels=merge_labels, node_properties=merge_properties, node_id=merge_node_id) def refresh_indexer(self): """ refresh the index on all properties. :return: """ index_properties = self.index_collection.get_index_property() index_properties = list(index_properties) del self.index_collection self.index_collection = GraphIndexCollection() self.create_index_on_property(*index_properties) for node_id, node_json in self.graph.nodes(data=True): if node_json is None: continue node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] self.index_collection.add_node(node_id, node_properties_json) def find_one_node_by_property(self, property_name, property_value): if self.index_collection.is_property_indexed(property_name): candidate_node_ids = list( self.index_collection.find_ids(property_name, property_value=property_value)) if len(candidate_node_ids) == 0: return None return self.get_node_info_dict(candidate_node_ids[0]) for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name in node_properties_json.keys( ) and node_properties_json[property_name] == property_value: return node_json return None def find_nodes_by_ids(self, *ids): result = [] for node_id in ids: node_json = self.get_node_info_dict(node_id) if node_json: result.append(node_json) return result def find_nodes_by_property(self, property_name, property_value): if self.index_collection.is_property_indexed(property_name): candidate_node_ids = list( self.index_collection.find_ids(property_name, property_value=property_value)) return self.find_nodes_by_ids(*candidate_node_ids) nodes = [] for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name in node_properties_json.keys( ) and node_properties_json[property_name] == property_value: nodes.append(node_json) return nodes def find_one_node_by_property_value_starts_with(self, property_name, property_value_starter): """ find a node which its property value is string and the string is startswith a given string :param property_name: :param property_value_starter: :return: """ for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name not in node_properties_json.keys(): continue property_value = node_properties_json[property_name] if type(property_value) != str: continue if property_value.startswith(property_value_starter): return node_json return None def find_nodes_by_property_value_starts_with(self, property_name, property_value_starter): """ find all nodes which its property value is string and the string is startswith a given string :param property_name: :param property_value_starter: :return: """ nodes = [] for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name not in node_properties_json.keys(): continue property_value = node_properties_json[property_name] if type(property_value) != str: continue if property_value.startswith(property_value_starter): nodes.append(node_json) return nodes def __find_node_ids_by_index_properties(self, **index_properties): result_ids = self.get_node_ids() for property_name, property_value in index_properties.items(): result_ids = result_ids.intersection( self.index_collection.find_ids(property_name=property_name, property_value=property_value)) return result_ids def find_one_node_by_properties(self, **properties): indexed_properties = {} unindexed_properties = {} for property_name, property_value in properties.items(): if self.index_collection.is_property_indexed( property_name=property_name): indexed_properties[property_name] = property_value else: unindexed_properties[property_name] = property_value candidate_node_ids = self.__find_node_ids_by_index_properties( **indexed_properties) if len(candidate_node_ids) == 0: return None if len(unindexed_properties) == 0: return self.get_node_info_dict(list(candidate_node_ids)[0]) for node_id in candidate_node_ids: node_json = self.get_node_info_dict(node_id=node_id) node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] is_match = True for property_name, property_value in unindexed_properties.items(): if property_name not in node_properties_json.keys( ) or node_properties_json[property_name] != property_value: is_match = False break if is_match: return node_json return None def set_relations(self, relations): for t in relations: self.add_relation(startId=t[self.DEFAULT_KEY_RELATION_START_ID], relationType=t[self.DEFAULT_KEY_RELATION_TYPE], endId=t[self.DEFAULT_KEY_RELATION_END_ID]) def add_relation(self, startId, relationType, endId): """ add a new relation to graphData, if exist, not add. :param startId: :param relationType: :param endId: :return:False, the relation is already exist adding fail, True, add the relation successsful """ # if startId == GraphData.UNASSIGNED_NODE_ID: # return False # if endId == GraphData.UNASSIGNED_NODE_ID: # return False if startId not in self.graph.nodes or endId not in self.graph.nodes: return False if self.exist_relation(startId=startId, relationType=relationType, endId=endId): return False self.__add_one_relation_count(relationType) self.graph.add_edge(startId, endId, relationType) return True def __add_one_relation_count(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() relation_type_to_num_map[relation_type] = relation_type_to_num_map.get( relation_type, 0) + 1 def __remove_one_relation_count(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() relation_type_to_num_map[relation_type] = max( 0, relation_type_to_num_map.get(relation_type, 0) - 1) def add_relation_with_property(self, startId, relationType, endId, **kwargs): if startId not in self.graph.nodes or endId not in self.graph.nodes: return False if self.exist_relation(startId=startId, relationType=relationType, endId=endId): return False self.__add_one_relation_count(relationType) self.graph.add_edge(startId, endId, relationType, **kwargs) return True def remove_relation(self, startId, relationType, endId): if not self.exist_relation( startId=startId, relationType=relationType, endId=endId): return False self.__remove_one_relation_count(relationType) self.graph.remove_edge(startId, endId, relationType) return True def remove_all_relations(self): relation_pairs = self.get_relation_pairs() for relation_pair in relation_pairs: relations = self.get_relations(start_id=relation_pair[0], end_id=relation_pair[1]) for relation in relations: self.remove_relation(relation[0], relation[1], relation[2]) return True def exist_relation(self, startId, relationType, endId): return self.graph.has_edge(startId, endId, relationType) def exist_any_relation(self, startId, endId): return self.graph.has_edge(startId, endId) def get_relations(self, start_id=None, relation_type=None, end_id=None): candidates = None if start_id is not None: candidates = self.get_all_out_relations(start_id) if end_id is not None: tmp = self.get_all_in_relations(end_id) if candidates is not None: candidates &= tmp else: candidates = tmp candidates = self.get_relation_pairs_with_type( ) if candidates is None else candidates if relation_type is not None: candidates = set( filter(lambda r: r[1] == relation_type, candidates)) return candidates def get_all_relations(self, id_1, id_2): result = set([]) result = result | self.get_relations(start_id=id_1, end_id=id_2) result = result | self.get_relations(start_id=id_2, end_id=id_1) return result def get_edge_extra_info(self, start_id, end_id, relation_name, extra_key): relation_dict = self.graph.get_edge_data(start_id, end_id) if relation_name in relation_dict: if extra_key in relation_dict[relation_name]: return relation_dict[relation_name][extra_key] return "" def get_node_num(self): return len(self.graph.nodes) def get_relation_num(self): return len(self.graph.edges) def get_node_ids(self): return set(self.graph.nodes) def get_relation_pairs(self): # todo:cache the result? """ get the relation list in [(startId,endId)] format :return: """ pairs = set(self.graph.edges(keys=False)) return pairs def get_relation_pairs_with_type(self): """ get the relation list in [(startId,endId)] format :return: """ pairs = {(r[0], r[2], r[1]) for r in self.graph.edges(keys=True)} return pairs def get_all_out_relations(self, node_id): if node_id not in self.graph.nodes: return set() return {(r[0], r[2], r[1]) for r in self.graph.out_edges(node_id, keys=True)} def get_all_in_relations(self, node_id): if node_id not in self.graph.nodes: return set() return {(r[0], r[2], r[1]) for r in self.graph.in_edges(node_id, keys=True)} def update_node_index(self, node_id): node_info = self.get_node_info_dict(node_id=node_id) node_properties = node_info[self.DEFAULT_KEY_NODE_PROPERTIES] self.index_collection.add_node(node_id=node_id, node_properties=node_properties) def get_node_info_dict(self, node_id): """ get the node info dict, :param node_id: the node id :return: """ return self.graph.nodes.get(node_id, None) def get_properties_for_node(self, node_id, key_node_properties=DEFAULT_KEY_NODE_PROPERTIES ): """ get the node properties part from node info dict :param key_node_properties: specify the key of key_node_properties, default is "properties" :param node_id: the node id :return: {} if the node not exist """ node_info_dict = self.get_node_info_dict(node_id) if node_info_dict is None: return {} return node_info_dict[key_node_properties] def get_labels_for_node(self, node_id, key_node_labels=DEFAULT_KEY_NODE_LABELS): """ get the node properties part from node info dict :param key_node_labels: specify the key of node_labels, default is "labels" :param node_id: the node id :return: [] if the node not exist """ node_info_dict = self.get_node_info_dict(node_id) if node_info_dict is None: return [] return node_info_dict[key_node_labels] def get_all_labels(self): """ get all labels as set for current node. :return: a set of labels. """ return set(self.label_to_ids_map.keys()) def get_all_relation_types(self): """ get all relation types in graph data :return: a set of relation type strings """ return set(self.get_relation_type_to_num_map().keys()) def get_relation_count_by_type(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() return relation_type_to_num_map.get(relation_type, 0) def get_relation_type_to_num_map(self): return self.relation_type_to_num_map def __count_relation_type_to_num_map(self): relation_type_to_num_map = {} relation_type_to_relation = {} for r in self.get_relation_pairs_with_type(): if r[1] not in relation_type_to_relation: relation_type_to_relation[r[1]] = set() relation_type_to_relation[r[1]].add(r) for k, v in relation_type_to_relation.items(): relation_type_to_num_map[k] = len(v) return relation_type_to_num_map def print_label_count(self): print("Label Num=%d" % len(self.label_to_ids_map.keys())) for k, v in self.label_to_ids_map.items(): print("<Label:%r Num:%d>" % (k, len(v))) def print_graph_info(self): print("----- Graph Info ------") print(self) self.print_label_count() self.print_relation_info() print("-----------------------") def print_relation_info(self): relation_type_to_num_map = self.get_relation_type_to_num_map() print("Relation Num=%d" % len(relation_type_to_num_map.keys())) for k, v in relation_type_to_num_map.items(): print("<Relation:%r Num:%d>" % (k, v)) def __repr__(self): return "<GraphData nodeNum=%d relNum=%d maxNodeId=%d>" % ( self.get_node_num(), self.get_relation_num(), self.max_node_id) def subgraph(self, node_ids): """ get a sub graph of graph data which keep only given nodes and relations between nodes :param node_ids: the kept node ids in graph :return: a graph that keep all things. """ graph_data = deepcopy(self) remove_nodes = set(self.get_node_ids()) - node_ids for node_id in remove_nodes: graph_data.remove_node(node_id) return graph_data