Exemple #1
0
def get_connection_multigraph_weighted(name2dp, connections):
    G = MultiDiGraph()
    for c in connections:
        dp1 = c.dp1
        dp2 = c.dp2
        if not G.has_edge(dp1, dp2):
            already = []
            G.add_edge(dp1, dp2)
        else:
            already = G.edge[dp1][dp2]['spaces']
        R = name2dp[c.dp1].get_rtype(c.s1)
        already.append(R)
        G.edge[dp1][dp2]['spaces'] = already

#     cycles = list(simple_cycles(G))
#     for cycle in cycles:
#         cycle = list(cycle)
#         cycle = cycle + [cycle[0]]
#
#         for i in range(len(cycle) - 1):
#             # XXX
#             _val = G.edge[cycle[i]][cycle[i + 1]]['spaces']
#             # print('%s -> %s -> %s' % (cycle[i], val, cycle[i + 1]))

    return G
Exemple #2
0
def get_connection_multigraph_weighted(name2dp, connections):
    G = MultiDiGraph()
    for c in connections:
        dp1 = c.dp1
        dp2 = c.dp2
        if not G.has_edge(dp1, dp2):
            already = []
            G.add_edge(dp1, dp2)
        else:
            already = G.edge[dp1][dp2]['spaces']
        R = name2dp[c.dp1].get_rtype(c.s1)
        already.append(R)
        G.edge[dp1][dp2]['spaces'] = already

#     cycles = list(simple_cycles(G))
#     for cycle in cycles:
#         cycle = list(cycle)
#         cycle = cycle + [cycle[0]]
#         
#         for i in range(len(cycle) - 1):
#             # XXX
#             _val = G.edge[cycle[i]][cycle[i + 1]]['spaces']
#             # print('%s -> %s -> %s' % (cycle[i], val, cycle[i + 1]))

    return G
    
Exemple #3
0
def add_all_edges(g1: nx.MultiDiGraph, g2: nx.MultiDiGraph, preserve: bool = True) -> int:
    """
    Add all edges from source graph (``g2``) to target graph (``g1``).

    Parameters
    ----------
    g1: networkx.MultiDiGraph
        Target graph
    g2: networkx.MultiDiGraph
        Source graph
    preserve: bool
        Whether or not to preserve conflicting properties

    Returns
    -------
    int
        Number of edges merged during this operation

    """
    logging.info(f"Adding {g2.number_of_edges()} edges from {g2} to {g1}")
    merge_count = 0
    for u, v, key, data in g2.edges(keys=True, data=True):
        if g1.has_edge(u, v, key):
            merge_edge(g1, u, v, key, data, preserve)
            merge_count += 1
        else:
            g1.add_edge(u, v, key, **data)
    return merge_count
def calculate_dijkstra(source: int, layered_graph: LayeredGraph, ) -> Tuple[MultiDiGraph, MultiDiGraph]:
    node_info = {node: {COST: inf, ARC: None, VISITED: False, DEPTH: inf} for node in layered_graph}
    node_info[source][COST] = 0
    node_info[source][DEPTH] = 0
    tree = MultiDiGraph()

    to_visit = []
    insert(to_visit, (0, source))
    images = {node: [] for node in layered_graph.origin_nodes}
    insert(images[layered_graph.origin_node_index(source)], ((0, 0), source))
    leafs = set()

    while len(to_visit) > 0:
        to_open = extract_minimum(to_visit)[1]
        if node_info[to_open][VISITED]:
            continue
        node_info[to_open][VISITED] = True

        for arc in layered_graph.out_edges(to_open, data=True):
            dest = arc[1]
            if not layered_graph.out_edges(dest):
                leafs.add(dest)
            new_cost = node_info[to_open][COST] + arc[2][WEIGHT]
            new_depth = node_info[to_open][DEPTH] + 1
            if node_info[dest][COST] > new_cost:
                node_info[dest][COST] = new_cost
                node_info[dest][ARC] = arc
                node_info[dest][DEPTH] = new_depth
                insert(to_visit, (new_cost, dest))
                insert(images[layered_graph.origin_node_index(dest)], ((new_cost, new_depth), dest))
                if tree.has_edge(to_open, dest):  # TODO change on link to arc
                    tree.remove_edge(to_open, dest)
                tree.add_edge(to_open, dest, COST=new_cost)

    # node_info truncation - усечение дерева
    not_truncated_tree = MultiDiGraph(tree)
    while leafs:
        leaf = leafs.pop()
        path_cost = node_info[leaf][COST]
        path_depth = node_info[leaf][DEPTH]
        if nsmallest(1, images[layered_graph.origin_node_index(leaf)]) != ((path_cost, path_depth), leaf):
            parent = node_info[leaf][ARC][0]
            tree.remove_node(leaf)
            if not tree.out_edges(parent):
                leafs.add(parent)

    return tree, not_truncated_tree
Exemple #5
0
def distributed_induction(graph: nx.MultiDiGraph, sample: nx.MultiDiGraph,
                          partition_map: PartitionMap, ownership: Set[Vertex]):
    # Step 1: Get non-sampled edges non-owned nodes
    edge_queries = [[] for _ in range(mpi.size)]
    for edge in filter(
            lambda e: not sample.has_edge(*e) and sample.has_node(e[0]),
            graph.edges):
        owners = partition_map.get_owners(edge[1])
        edge_queries[random.choice(owners)].append(
            edge)  # Select only one of the owners randomly
    # Step 2: Resolve induction of owned nodes
    for edge in edge_queries[mpi.rank]:
        if edge[1] in ownership:
            sample.add_edge(*edge)
    edge_queries[mpi.rank].clear()
    # Step 3: Query each node's owner for
    query_inductions(sample, edge_queries, ownership)
Exemple #6
0
class NxGraph(BaseGraph):
    """
    NxGraph is a wrapper that provides methods to interact with a networkx.MultiDiGraph.

    NxGraph extends kgx.graph.base_graph.BaseGraph and implements all the methods from BaseGraph.
    """
    def __init__(self):
        super().__init__()
        self.graph = MultiDiGraph()
        self.name = None

    def add_node(self, node: str, **kwargs: Any) -> None:
        """
        Add a node to the graph.

        Parameters
        ----------
        node: str
            Node identifier
        **kwargs: Any
            Any additional node properties

        """
        if "data" in kwargs:
            data = kwargs["data"]
        else:
            data = kwargs
        self.graph.add_node(node, **data)

    def add_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: str = None,
                 **kwargs: Any) -> None:
        """
        Add an edge to the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        kwargs: Any
            Any additional edge properties

        """
        if "data" in kwargs:
            data = kwargs["data"]
        else:
            data = kwargs
        return self.graph.add_edge(subject_node,
                                   object_node,
                                   key=edge_key,
                                   **data)

    def add_node_attribute(self, node: str, attr_key: str,
                           attr_value: Any) -> None:
        """
        Add an attribute to a given node.

        Parameters
        ----------
        node: str
            The node identifier
        attr_key: str
            The key for an attribute
        attr_value: Any
            The value corresponding to the key

        """
        self.graph.add_node(node, **{attr_key: attr_value})

    def add_edge_attribute(
        self,
        subject_node: str,
        object_node: str,
        edge_key: Optional[str],
        attr_key: str,
        attr_value: Any,
    ) -> None:
        """
        Add an attribute to a given edge.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        attr_key: str
            The attribute key
        attr_value: Any
            The attribute value

        """
        self.graph.add_edge(subject_node,
                            object_node,
                            key=edge_key,
                            **{attr_key: attr_value})

    def update_node_attribute(self,
                              node: str,
                              attr_key: str,
                              attr_value: Any,
                              preserve: bool = False) -> Dict:
        """
        Update an attribute of a given node.

        Parameters
        ----------
        node: str
            The node identifier
        attr_key: str
            The key for an attribute
        attr_value: Any
            The value corresponding to the key
        preserve: bool
            Whether or not to preserve existing values for the given attr_key

        Returns
        -------
        Dict
            A dictionary corresponding to the updated node properties

        """
        node_data = self.graph.nodes[node]
        updated = prepare_data_dict(node_data, {attr_key: attr_value},
                                    preserve=preserve)
        self.graph.add_node(node, **updated)
        return updated

    def update_edge_attribute(
        self,
        subject_node: str,
        object_node: str,
        edge_key: Optional[str],
        attr_key: str,
        attr_value: Any,
        preserve: bool = False,
    ) -> Dict:
        """
        Update an attribute of a given edge.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        attr_key: str
            The attribute key
        attr_value: Any
            The attribute value
        preserve: bool
            Whether or not to preserve existing values for the given attr_key

        Returns
        -------
        Dict
            A dictionary corresponding to the updated edge properties

        """
        e = self.graph.edges((subject_node, object_node, edge_key),
                             keys=True,
                             data=True)
        edge_data = list(e)[0][3]
        updated = prepare_data_dict(edge_data, {attr_key: attr_value},
                                    preserve)
        self.graph.add_edge(subject_node, object_node, key=edge_key, **updated)
        return updated

    def get_node(self, node: str) -> Dict:
        """
        Get a node and its properties.

        Parameters
        ----------
        node: str
            The node identifier

        Returns
        -------
        Dict
            The node dictionary

        """
        n = {}
        if self.graph.has_node(node):
            n = self.graph.nodes[node]
        return n

    def get_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: Optional[str] = None) -> Dict:
        """
        Get an edge and its properties.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        Returns
        -------
        Dict
            The edge dictionary

        """
        e = {}
        if self.graph.has_edge(subject_node, object_node, edge_key):
            e = self.graph.get_edge_data(subject_node, object_node, edge_key)
        return e

    def nodes(self, data: bool = True) -> Dict:
        """
        Get all nodes in a graph.

        Parameters
        ----------
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        Dict
            A dictionary of nodes

        """
        return self.graph.nodes(data)

    def edges(self, keys: bool = False, data: bool = True) -> Dict:
        """
        Get all edges in a graph.

        Parameters
        ----------
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        Dict
            A dictionary of edges

        """
        return self.graph.edges(keys=keys, data=data)

    def in_edges(self,
                 node: str,
                 keys: bool = False,
                 data: bool = False) -> List:
        """
        Get all incoming edges for a given node.

        Parameters
        ----------
        node: str
            The node identifier
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        List
            A list of edges

        """
        return self.graph.in_edges(node, keys=keys, data=data)

    def out_edges(self,
                  node: str,
                  keys: bool = False,
                  data: bool = False) -> List:
        """
        Get all outgoing edges for a given node.

        Parameters
        ----------
        node: str
            The node identifier
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        List
            A list of edges

        """
        return self.graph.out_edges(node, keys=keys, data=data)

    def nodes_iter(self) -> Generator:
        """
        Get an iterable to traverse through all the nodes in a graph.

        Returns
        -------
        Generator
            A generator for nodes where each element is a Tuple that
            contains (node_id, node_data)

        """
        for n in self.graph.nodes(data=True):
            yield n

    def edges_iter(self) -> Generator:
        """
        Get an iterable to traverse through all the edges in a graph.

        Returns
        -------
        Generator
            A generator for edges where each element is a 4-tuple that
            contains (subject, object, edge_key, edge_data)

        """
        for u, v, k, data in self.graph.edges(keys=True, data=True):
            yield u, v, k, data

    def remove_node(self, node: str) -> None:
        """
        Remove a given node from the graph.

        Parameters
        ----------
        node: str
            The node identifier

        """
        self.graph.remove_node(node)

    def remove_edge(self,
                    subject_node: str,
                    object_node: str,
                    edge_key: Optional[str] = None) -> None:
        """
        Remove a given edge from the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        """
        self.graph.remove_edge(subject_node, object_node, edge_key)

    def has_node(self, node: str) -> bool:
        """
        Check whether a given node exists in the graph.

        Parameters
        ----------
        node: str
            The node identifier

        Returns
        -------
        bool
            Whether or not the given node exists

        """
        return self.graph.has_node(node)

    def has_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: Optional[str] = None) -> bool:
        """
        Check whether a given edge exists in the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        Returns
        -------
        bool
            Whether or not the given edge exists

        """
        return self.graph.has_edge(subject_node, object_node, key=edge_key)

    def number_of_nodes(self) -> int:
        """
        Returns the number of nodes in a graph.

        Returns
        -------
        int

        """
        return self.graph.number_of_nodes()

    def number_of_edges(self) -> int:
        """
        Returns the number of edges in a graph.

        Returns
        -------
        int

        """
        return self.graph.number_of_edges()

    def degree(self):
        """
        Get the degree of all the nodes in a graph.
        """
        return self.graph.degree()

    def clear(self) -> None:
        """
        Remove all the nodes and edges in the graph.
        """
        self.graph.clear()

    @staticmethod
    def set_node_attributes(graph: BaseGraph, attributes: Dict) -> None:
        """
        Set nodes attributes from a dictionary of key-values.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attributes: Dict
            A dictionary of node identifier to key-value pairs

        """
        return set_node_attributes(graph.graph, attributes)

    @staticmethod
    def set_edge_attributes(graph: BaseGraph, attributes: Dict) -> None:
        """
        Set nodes attributes from a dictionary of key-values.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attributes: Dict
            A dictionary of node identifier to key-value pairs

        Returns
        -------
        Any

        """
        return set_edge_attributes(graph.graph, attributes)

    @staticmethod
    def get_node_attributes(graph: BaseGraph, attr_key: str) -> Dict:
        """
        Get all nodes that have a value for the given attribute ``attr_key``.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attr_key: str
            The attribute key

        Returns
        -------
        Dict
            A dictionary where nodes are the keys and the values
            are the attribute values for ``key``

        """
        return get_node_attributes(graph.graph, attr_key)

    @staticmethod
    def get_edge_attributes(graph: BaseGraph, attr_key: str) -> Dict:
        """
        Get all edges that have a value for the given attribute ``attr_key``.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attr_key: str
            The attribute key

        Returns
        -------
        Dict
            A dictionary where edges are the keys and the values
            are the attribute values for ``attr_key``

        """
        return get_edge_attributes(graph.graph, attr_key)

    @staticmethod
    def relabel_nodes(graph: BaseGraph, mapping: Dict) -> None:
        """
        Relabel identifiers for a series of nodes based on mappings.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        mapping: Dict
            A dictionary of mapping where the key is the old identifier
            and the value is the new identifier.

        """
        relabel_nodes(graph.graph, mapping, copy=False)
Exemple #7
0
class GraphBuilder:
    def __init__(self, model: Model):
        self.model = model
        self.graph = MultiDiGraph()
        self.nodes = 0
        self.edges = 0

    def add_from_json(self, js: Json) -> None:
        if "id" in js and Section.reported in js:
            self.add_node(
                js["id"],
                js[Section.reported],
                js.get(Section.desired, None),
                js.get(Section.metadata, None),
                js.get("search", None),
                js.get("replace", False) is True,
            )
        elif "from" in js and "to" in js:
            self.add_edge(js["from"], js["to"], js.get("edge_type", EdgeType.default))
        else:
            raise AttributeError(f"Format not understood! Got {json.dumps(js)} which is neither vertex nor edge.")

    def add_node(
        self,
        node_id: str,
        reported: Json,
        desired: Optional[Json] = None,
        metadata: Optional[Json] = None,
        search: Optional[str] = None,
        replace: bool = False,
    ) -> None:
        self.nodes += 1
        # validate kind of this reported json
        coerced = self.model.check_valid(reported)
        reported = reported if coerced is None else coerced
        kind = self.model[reported]
        # create content hash
        sha = GraphBuilder.content_hash(reported, desired, metadata)
        # flat all properties into a single string for search
        flat = search if isinstance(search, str) else (GraphBuilder.flatten(reported, kind))
        self.graph.add_node(
            node_id,
            id=node_id,
            reported=reported,
            desired=desired,
            metadata=metadata,
            hash=sha,
            kind=kind,
            kinds=list(kind.kind_hierarchy()),
            kinds_set=kind.kind_hierarchy(),
            flat=flat,
            replace=replace | metadata.get("replace", False) is True if metadata else False,
        )

    def add_edge(self, from_node: str, to_node: str, edge_type: str) -> None:
        self.edges += 1
        key = GraphAccess.edge_key(from_node, to_node, edge_type)
        self.graph.add_edge(from_node, to_node, key, edge_type=edge_type)

    @staticmethod
    def content_hash(js: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None) -> str:
        sha256 = hashlib.sha256()
        # all content hashes will be different, when the version changes
        sha256.update(ContentHashVersion.to_bytes(2, "big"))
        sha256.update(json.dumps(js, sort_keys=True).encode("utf-8"))
        if desired:
            sha256.update(json.dumps(desired, sort_keys=True).encode("utf-8"))
        if metadata:
            sha256.update(json.dumps(metadata, sort_keys=True).encode("utf-8"))
        return sha256.hexdigest()

    @staticmethod
    def flatten(js: Json, kind: Kind) -> str:
        result = ""

        def dispatch(value: Any, k: Kind) -> None:
            nonlocal result
            if isinstance(value, dict):
                for prop, elem in value.items():
                    sub = (
                        k.property_kind_of(prop, AnyKind())
                        if isinstance(k, ComplexKind)
                        else (k.value_kind if isinstance(k, DictionaryKind) else AnyKind())
                    )
                    dispatch(elem, sub)
            elif isinstance(value, list):
                sub = k.inner if isinstance(k, ArrayKind) else AnyKind()
                for elem in value:
                    dispatch(elem, sub)
            elif value is None or isinstance(value, bool):
                pass
            else:
                # in case of date time: "2017-05-30T22:04:34Z" -> "2017-05-30 22:04:34"
                if isinstance(k, DateTimeKind):
                    value = re.sub("[ZT]", " ", value)
                if result:
                    result += " "
                result += str(value).strip()

        dispatch(js, kind)
        return result

    def check_complete(self) -> None:
        # check that all vertices are given, that were defined in any edge definition
        # note: DiGraph will create an empty vertex node automatically
        for node_id, node in self.graph.nodes(data=True):
            assert node.get(Section.reported), f"{node_id} was used in an edge definition but not provided as vertex!"

        edge_types = {edge[2] for edge in self.graph.edges(data="edge_type")}
        al = EdgeType.all
        assert not edge_types.difference(al), f"Graph contains unknown edge types! Given: {edge_types}. Known: {al}"
        # make sure there is only one root node
        rid = GraphAccess.root_id(self.graph)
        root_node = self.graph.nodes[rid]

        # make sure the root
        if value_in_path(root_node, NodePath.reported_kind) == "graph_root" and rid != "root":
            # remove node with wrong id +
            root_node = self.graph.nodes[rid]
            root_node["id"] = "root"
            self.graph.add_node("root", **root_node)

            for succ in list(self.graph.successors(rid)):
                for edge_type in EdgeType.all:
                    key = GraphAccess.edge_key(rid, succ, edge_type)
                    if self.graph.has_edge(rid, succ, key):
                        self.graph.remove_edge(rid, succ, key)
                        self.add_edge("root", succ, edge_type)
            self.graph.remove_node(rid)
Exemple #8
0
class GraphData(SaveLoad):
    """
    the store of a graph data.

    each node is represent as a dict of node info named 'node_json',
    Example Format for 'node_json':

     {
        "id": 1,
        "properties": {"name":"bob","age":1},
        "labels": ["entity","man"]
    }

    >>>
    graphdata=GraphData()
    graphdata.create_index_on_property("name","aliases","qualified_name")

    # save a graphdata to disk
    graphdata.save("test.v1.graph")

    # load a graphdata from disk
    graphdata=Graphdata.load("test.v1.graph")
    >>>
    """

    DEFAULT_KEY_NODE_ID = "id"  # the key name for the node id, every node must have it.
    DEFAULT_KEY_NODE_PROPERTIES = "properties"  # the key name for the node properties, every node must have it.
    DEFAULT_KEY_NODE_LABELS = "labels"  # the key name for the node labels, every node must have it.

    DEFAULT_KEYS = [
        DEFAULT_KEY_NODE_ID, DEFAULT_KEY_NODE_PROPERTIES,
        DEFAULT_KEY_NODE_LABELS
    ]
    UNASSIGNED_NODE_ID = -1  # a node without a id specify, a newly created node, its id is -1

    DEFAULT_KEY_RELATION_START_ID = "startId"
    DEFAULT_KEY_RELATION_TYPE = "relationType"
    DEFAULT_KEY_RELATION_END_ID = "endId"

    def __init__(self):
        # two map for
        self.__init_graph()

    def clear(self):
        self.__init_graph()

    def __init_graph(self):
        self.graph = MultiDiGraph()
        self.max_node_id = 0
        self.label_to_ids_map = {}
        self.index_collection = GraphIndexCollection()
        self.relation_type_to_num_map = {}

    def create_index_on_property(self, *property_name_list):
        """
        create index on some properties. It makes the query on the corresponding property faster.
        :param property_name_list: one or one more property names.
        :return:
        """
        self.index_collection.create_index_on_property(*property_name_list)

    def find_all_shortest_paths(self, startId, endId):
        """
        找到所有的最短路
        :param startId:
        :param endId:
        :return:
        """
        shortest_paths = all_shortest_paths(self.graph, startId, endId)
        return shortest_paths

    def find_shortest_path(self, startId, endId):
        """
        找到一个最短路
        :param startId:
        :param endId:
        :return:
        """
        shortest_paths = shortest_path(self.graph, startId, endId)
        return shortest_paths

    def set_nodes(self, nodes):
        for n in nodes:
            self.add_node(node_id=n[self.DEFAULT_KEY_NODE_ID],
                          node_properties=n[self.DEFAULT_KEY_NODE_PROPERTIES],
                          node_labels=n[self.DEFAULT_KEY_NODE_LABELS])

    def add_labels(self, *labels):
        """
        add a list of label to the graph
        :param labels:
        :return:
        """

        for label in labels:
            if not label:
                return
            if label not in self.label_to_ids_map.keys():
                self.label_to_ids_map[label] = set([])

    def add_label_by_node_id(self, node_id, label):
        """
        add a label to a node
        :param node_id: the node id which the label need to add
        :param label: the label that need to added
        :return: True, add successful.False, add fail.
        """
        if not label:
            return False
        node_json = self.get_node_info_dict(node_id)
        if not node_json:
            return False
        node_json[GraphData.DEFAULT_KEY_NODE_LABELS].add(label)
        self.label_to_ids_map[label].add(node_id)
        return True

    def get_node_ids_by_label(self, label):
        if label not in self.label_to_ids_map.keys():
            return set([])
        return self.label_to_ids_map[label]

    def add_label_by_label(self, label, new_label):
        """
        add a label to node in graph, the node must has the specific label
        :param new_label: the new_label add to node
        :param label: the node must has the label
        :return:
        """

        for node_id in self.get_node_ids_by_label(label):
            self.add_label_by_node_id(node_id, new_label)

    def add_label_to_all(self, label):
        """
        add a label to node in graph
        :param label:
        :return:
        """
        if not label:
            return
        self.add_labels(label)
        for node_id in self.get_node_ids():
            self.add_label_by_node_id(node_id, label)

    def add_node(self,
                 node_labels,
                 node_properties,
                 node_id=UNASSIGNED_NODE_ID,
                 primary_property_name=""):
        """
        add a node json to the graph
        :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id
        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_name:make sure the node_json["properties"][primary_property_name] is unique in GraphData.
         if no passing, the node json will be add to graph without check. otherwise, only the node json
        with unique property value ( property value is got by primary_property_name ) will be added to the GraphData.
                :return:-1, means that adding node json fail. otherwise, return the id of the newly added node
        """
        if primary_property_name:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID

            node_json = self.find_one_node_by_property(
                property_name=primary_property_name,
                property_value=node_properties[primary_property_name])
            if node_json:
                return node_json[self.DEFAULT_KEY_NODE_ID]

        if node_id == self.UNASSIGNED_NODE_ID:
            node_id = self.max_node_id + 1
            self.max_node_id = self.max_node_id + 1

        new_node_json = {
            self.DEFAULT_KEY_NODE_ID: node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: node_properties,
            self.DEFAULT_KEY_NODE_LABELS: set(node_labels)
        }

        self.graph.add_node(node_id, **new_node_json)

        if self.max_node_id < node_id:
            self.max_node_id = node_id

        self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(
            node_id=node_id,
            node_properties=new_node_json[
                GraphData.DEFAULT_KEY_NODE_PROPERTIES])
        return node_id

    def update_node_property_by_node_id(self, node_id, node_properties):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID

        node_json = self.get_node_info_dict(node_id)
        update_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS]
        for k, v in node_properties.items():
            update_node_properties[k] = v
        update_node_json = {
            self.DEFAULT_KEY_NODE_ID: update_node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties,
            self.DEFAULT_KEY_NODE_LABELS: update_node_labels
        }
        self.graph.add_node(update_node_id, **update_node_json)
        self.index_collection.add_node(node_id=update_node_id,
                                       node_properties=update_node_properties)
        return update_node_id

    def update_node_by_node_id(self, node_id, node_labels, node_properties):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID

        node_json = self.get_node_info_dict(node_id)
        update_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS]
        for k, v in node_properties.items():
            update_node_properties[k] = v
        for label in node_labels:
            update_node_labels.add(label)
        update_node_json = {
            self.DEFAULT_KEY_NODE_ID: update_node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties,
            self.DEFAULT_KEY_NODE_LABELS: update_node_labels
        }
        self.graph.add_node(update_node_id, **update_node_json)
        self.add_labels(*update_node_labels)
        for label in update_node_labels:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(node_id=update_node_id,
                                       node_properties=update_node_properties)
        return update_node_id

    def update_node_property_value_by_node_id(self, node_id,
                                              node_property_name,
                                              node_proprty_value):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID
        if node_property_name == "":
            return node_id
        node_property = {node_property_name: node_proprty_value}
        return self.update_node_property_by_node_id(node_id, node_property)

    def remove_node(self, node_id):
        if node_id not in self.graph.nodes:
            return None
        # print(type(self.graph.nodes))
        node_json = self.graph.nodes[node_id]
        in_relations = set(self.graph.in_edges(node_id, keys=True))
        out_relations = set(self.graph.out_edges(node_id, keys=True))
        self.graph.remove_node(node_id)

        for label in node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].remove(node_id)

        self.index_collection.remove_node(node_id)

        return node_json, out_relations, in_relations

    def remove_all_nodes(self):
        ids = self.get_node_ids()
        for id in ids:
            self.remove_node(id)
        return True

    def merge_node(self, node_labels, node_properties, primary_property_name):
        """
        merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node.
        we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node.
        properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used.

        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_name: The name of the property to check, the merged node and the new node are the same on this property.
        :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change.
        """

        if not primary_property_name:
            print("primary_property_name must given on merge")
            return GraphData.UNASSIGNED_NODE_ID

        if primary_property_name not in node_properties:
            print(
                "node json must have a primary_property_name ( %r ) in properties "
                % primary_property_name)
            return self.UNASSIGNED_NODE_ID

        node_json = self.find_one_node_by_property(
            property_name=primary_property_name,
            property_value=node_properties[primary_property_name])

        if not node_json:
            return self.add_node(node_labels=node_labels,
                                 node_properties=node_properties,
                                 node_id=GraphData.UNASSIGNED_NODE_ID)

        merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        for k, v in node_properties.items():
            merge_properties[k] = v

        merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in node_labels:
            merge_labels.add(label)

        return self.add_node(node_labels=merge_labels,
                             node_properties=merge_properties,
                             node_id=merge_node_id)

    def add_node_with_multi_primary_property(self,
                                             node_labels,
                                             node_properties,
                                             node_id=UNASSIGNED_NODE_ID,
                                             primary_property_names=None):
        """
        add a node json to the graph
        :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id
        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_names:a list of primary properties. make sure the node_json["properties"][primary_property_name] is unique in GraphData.
         if no passing, the node json will be add to graph without check. otherwise, only the node json
        with unique property value ( property value is got by primary_property_name ) will be added to the GraphData.
                :return:-1, means that adding node json fail. otherwise, return the id of the newly added node
        """

        if primary_property_names is None:
            primary_property_names = []

        match_properties = {}

        for primary_property_name in primary_property_names:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID
            match_properties[primary_property_name] = node_properties[
                primary_property_name]

        node_json = self.find_one_node_by_properties(**match_properties)
        if node_json:
            return node_json[self.DEFAULT_KEY_NODE_ID]

        if node_id == self.UNASSIGNED_NODE_ID:
            node_id = self.max_node_id + 1

        new_node_json = {
            self.DEFAULT_KEY_NODE_ID: node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: node_properties,
            self.DEFAULT_KEY_NODE_LABELS: set(node_labels)
        }

        self.graph.add_node(node_id, **new_node_json)
        if self.max_node_id < node_id:
            self.max_node_id = node_id

        self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(
            node_id=node_id,
            node_properties=new_node_json[
                GraphData.DEFAULT_KEY_NODE_PROPERTIES])

        return node_id

    def merge_node_with_multi_primary_property(self,
                                               node_labels,
                                               node_properties,
                                               primary_property_names=None):
        """
        merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node.
        we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node.
        properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used.

        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_names: The list of name of the property to check, the merged node and the new node are the same on this property.
        :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change.
        """

        if not primary_property_names:
            print("primary_property_names must given on merge")
            return GraphData.UNASSIGNED_NODE_ID
        match_properties = {}

        for primary_property_name in primary_property_names:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID
            match_properties[primary_property_name] = node_properties[
                primary_property_name]

        node_json = self.find_one_node_by_properties(**match_properties)
        if not node_json:
            return self.add_node(node_labels=node_labels,
                                 node_properties=node_properties,
                                 node_id=GraphData.UNASSIGNED_NODE_ID)

        merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        for k, v in node_properties.items():
            merge_properties[k] = v

        merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in node_labels:
            merge_labels.add(label)

        return self.add_node(node_labels=merge_labels,
                             node_properties=merge_properties,
                             node_id=merge_node_id)

    def refresh_indexer(self):
        """
        refresh the index on all properties.
        :return:
        """
        index_properties = self.index_collection.get_index_property()
        index_properties = list(index_properties)
        del self.index_collection
        self.index_collection = GraphIndexCollection()

        self.create_index_on_property(*index_properties)
        for node_id, node_json in self.graph.nodes(data=True):
            if node_json is None:
                continue
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            self.index_collection.add_node(node_id, node_properties_json)

    def find_one_node_by_property(self, property_name, property_value):
        if self.index_collection.is_property_indexed(property_name):
            candidate_node_ids = list(
                self.index_collection.find_ids(property_name,
                                               property_value=property_value))
            if len(candidate_node_ids) == 0:
                return None
            return self.get_node_info_dict(candidate_node_ids[0])

        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name in node_properties_json.keys(
            ) and node_properties_json[property_name] == property_value:
                return node_json
        return None

    def find_nodes_by_ids(self, *ids):
        result = []
        for node_id in ids:
            node_json = self.get_node_info_dict(node_id)
            if node_json:
                result.append(node_json)
        return result

    def find_nodes_by_property(self, property_name, property_value):
        if self.index_collection.is_property_indexed(property_name):
            candidate_node_ids = list(
                self.index_collection.find_ids(property_name,
                                               property_value=property_value))

            return self.find_nodes_by_ids(*candidate_node_ids)

        nodes = []
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name in node_properties_json.keys(
            ) and node_properties_json[property_name] == property_value:
                nodes.append(node_json)
        return nodes

    def find_one_node_by_property_value_starts_with(self, property_name,
                                                    property_value_starter):
        """
        find a node which its property value is string and the string is startswith a given string
        :param property_name:
        :param property_value_starter:
        :return:
        """
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name not in node_properties_json.keys():
                continue

            property_value = node_properties_json[property_name]
            if type(property_value) != str:
                continue
            if property_value.startswith(property_value_starter):
                return node_json
        return None

    def find_nodes_by_property_value_starts_with(self, property_name,
                                                 property_value_starter):
        """
        find all nodes which its property value is string and the string is startswith a given string
        :param property_name:
        :param property_value_starter:
        :return:
        """
        nodes = []
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name not in node_properties_json.keys():
                continue

            property_value = node_properties_json[property_name]
            if type(property_value) != str:
                continue
            if property_value.startswith(property_value_starter):
                nodes.append(node_json)
        return nodes

    def __find_node_ids_by_index_properties(self, **index_properties):
        result_ids = self.get_node_ids()

        for property_name, property_value in index_properties.items():
            result_ids = result_ids.intersection(
                self.index_collection.find_ids(property_name=property_name,
                                               property_value=property_value))

        return result_ids

    def find_one_node_by_properties(self, **properties):
        indexed_properties = {}
        unindexed_properties = {}
        for property_name, property_value in properties.items():
            if self.index_collection.is_property_indexed(
                    property_name=property_name):
                indexed_properties[property_name] = property_value
            else:
                unindexed_properties[property_name] = property_value

        candidate_node_ids = self.__find_node_ids_by_index_properties(
            **indexed_properties)

        if len(candidate_node_ids) == 0:
            return None

        if len(unindexed_properties) == 0:
            return self.get_node_info_dict(list(candidate_node_ids)[0])

        for node_id in candidate_node_ids:
            node_json = self.get_node_info_dict(node_id=node_id)
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]

            is_match = True
            for property_name, property_value in unindexed_properties.items():
                if property_name not in node_properties_json.keys(
                ) or node_properties_json[property_name] != property_value:
                    is_match = False
                    break
            if is_match:
                return node_json

        return None

    def set_relations(self, relations):
        for t in relations:
            self.add_relation(startId=t[self.DEFAULT_KEY_RELATION_START_ID],
                              relationType=t[self.DEFAULT_KEY_RELATION_TYPE],
                              endId=t[self.DEFAULT_KEY_RELATION_END_ID])

    def add_relation(self, startId, relationType, endId):
        """
        add a new relation to graphData, if exist, not add.
        :param startId:
        :param relationType:
        :param endId:
        :return:False, the relation is already exist adding fail, True, add the relation successsful
        """
        # if startId == GraphData.UNASSIGNED_NODE_ID:
        #     return False
        # if endId == GraphData.UNASSIGNED_NODE_ID:
        #     return False

        if startId not in self.graph.nodes or endId not in self.graph.nodes:
            return False

        if self.exist_relation(startId=startId,
                               relationType=relationType,
                               endId=endId):
            return False

        self.__add_one_relation_count(relationType)

        self.graph.add_edge(startId, endId, relationType)
        return True

    def __add_one_relation_count(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        relation_type_to_num_map[relation_type] = relation_type_to_num_map.get(
            relation_type, 0) + 1

    def __remove_one_relation_count(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        relation_type_to_num_map[relation_type] = max(
            0,
            relation_type_to_num_map.get(relation_type, 0) - 1)

    def add_relation_with_property(self, startId, relationType, endId,
                                   **kwargs):
        if startId not in self.graph.nodes or endId not in self.graph.nodes:
            return False

        if self.exist_relation(startId=startId,
                               relationType=relationType,
                               endId=endId):
            return False

        self.__add_one_relation_count(relationType)
        self.graph.add_edge(startId, endId, relationType, **kwargs)
        return True

    def remove_relation(self, startId, relationType, endId):
        if not self.exist_relation(
                startId=startId, relationType=relationType, endId=endId):
            return False
        self.__remove_one_relation_count(relationType)

        self.graph.remove_edge(startId, endId, relationType)
        return True

    def remove_all_relations(self):
        relation_pairs = self.get_relation_pairs()
        for relation_pair in relation_pairs:
            relations = self.get_relations(start_id=relation_pair[0],
                                           end_id=relation_pair[1])
            for relation in relations:
                self.remove_relation(relation[0], relation[1], relation[2])
        return True

    def exist_relation(self, startId, relationType, endId):
        return self.graph.has_edge(startId, endId, relationType)

    def exist_any_relation(self, startId, endId):
        return self.graph.has_edge(startId, endId)

    def get_relations(self, start_id=None, relation_type=None, end_id=None):
        candidates = None
        if start_id is not None:
            candidates = self.get_all_out_relations(start_id)
        if end_id is not None:
            tmp = self.get_all_in_relations(end_id)
            if candidates is not None:
                candidates &= tmp
            else:
                candidates = tmp
        candidates = self.get_relation_pairs_with_type(
        ) if candidates is None else candidates

        if relation_type is not None:
            candidates = set(
                filter(lambda r: r[1] == relation_type, candidates))
        return candidates

    def get_all_relations(self, id_1, id_2):
        result = set([])
        result = result | self.get_relations(start_id=id_1, end_id=id_2)
        result = result | self.get_relations(start_id=id_2, end_id=id_1)
        return result

    def get_edge_extra_info(self, start_id, end_id, relation_name, extra_key):
        relation_dict = self.graph.get_edge_data(start_id, end_id)
        if relation_name in relation_dict:
            if extra_key in relation_dict[relation_name]:
                return relation_dict[relation_name][extra_key]
        return ""

    def get_node_num(self):
        return len(self.graph.nodes)

    def get_relation_num(self):
        return len(self.graph.edges)

    def get_node_ids(self):
        return set(self.graph.nodes)

    def get_relation_pairs(self):
        # todo:cache the result?
        """
        get the relation list in [(startId,endId)] format
        :return:
        """
        pairs = set(self.graph.edges(keys=False))

        return pairs

    def get_relation_pairs_with_type(self):
        """
        get the relation list in [(startId,endId)] format
        :return:
        """
        pairs = {(r[0], r[2], r[1]) for r in self.graph.edges(keys=True)}
        return pairs

    def get_all_out_relations(self, node_id):
        if node_id not in self.graph.nodes:
            return set()
        return {(r[0], r[2], r[1])
                for r in self.graph.out_edges(node_id, keys=True)}

    def get_all_in_relations(self, node_id):
        if node_id not in self.graph.nodes:
            return set()
        return {(r[0], r[2], r[1])
                for r in self.graph.in_edges(node_id, keys=True)}

    def update_node_index(self, node_id):

        node_info = self.get_node_info_dict(node_id=node_id)
        node_properties = node_info[self.DEFAULT_KEY_NODE_PROPERTIES]
        self.index_collection.add_node(node_id=node_id,
                                       node_properties=node_properties)

    def get_node_info_dict(self, node_id):
        """
        get the node info dict,
        :param node_id: the node id
        :return:
        """
        return self.graph.nodes.get(node_id, None)

    def get_properties_for_node(self,
                                node_id,
                                key_node_properties=DEFAULT_KEY_NODE_PROPERTIES
                                ):
        """
        get the node properties part from node info dict
        :param key_node_properties: specify the key of key_node_properties, default is "properties"
        :param node_id: the node id
        :return: {} if the node not exist
        """
        node_info_dict = self.get_node_info_dict(node_id)
        if node_info_dict is None:
            return {}

        return node_info_dict[key_node_properties]

    def get_labels_for_node(self,
                            node_id,
                            key_node_labels=DEFAULT_KEY_NODE_LABELS):
        """
        get the node properties part from node info dict
        :param key_node_labels: specify the key of node_labels, default is "labels"
        :param node_id: the node id
        :return: [] if the node not exist
        """
        node_info_dict = self.get_node_info_dict(node_id)
        if node_info_dict is None:
            return []

        return node_info_dict[key_node_labels]

    def get_all_labels(self):
        """
        get all labels as set for current node.
        :return: a set of labels.
        """
        return set(self.label_to_ids_map.keys())

    def get_all_relation_types(self):
        """
        get all relation types in graph data
        :return: a set of relation type strings
        """

        return set(self.get_relation_type_to_num_map().keys())

    def get_relation_count_by_type(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        return relation_type_to_num_map.get(relation_type, 0)

    def get_relation_type_to_num_map(self):
        return self.relation_type_to_num_map

    def __count_relation_type_to_num_map(self):
        relation_type_to_num_map = {}
        relation_type_to_relation = {}
        for r in self.get_relation_pairs_with_type():
            if r[1] not in relation_type_to_relation:
                relation_type_to_relation[r[1]] = set()
            relation_type_to_relation[r[1]].add(r)
        for k, v in relation_type_to_relation.items():
            relation_type_to_num_map[k] = len(v)
        return relation_type_to_num_map

    def print_label_count(self):
        print("Label Num=%d" % len(self.label_to_ids_map.keys()))
        for k, v in self.label_to_ids_map.items():
            print("<Label:%r Num:%d>" % (k, len(v)))

    def print_graph_info(self):
        print("----- Graph Info ------")
        print(self)
        self.print_label_count()
        self.print_relation_info()
        print("-----------------------")

    def print_relation_info(self):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        print("Relation Num=%d" % len(relation_type_to_num_map.keys()))
        for k, v in relation_type_to_num_map.items():
            print("<Relation:%r Num:%d>" % (k, v))

    def __repr__(self):
        return "<GraphData nodeNum=%d relNum=%d maxNodeId=%d>" % (
            self.get_node_num(), self.get_relation_num(), self.max_node_id)

    def subgraph(self, node_ids):
        """
        get a sub graph of graph data which keep only given nodes and relations between nodes
        :param node_ids: the kept node ids in graph
        :return: a graph that keep all things.
        """
        graph_data = deepcopy(self)

        remove_nodes = set(self.get_node_ids()) - node_ids
        for node_id in remove_nodes:
            graph_data.remove_node(node_id)

        return graph_data