예제 #1
0
def load_ogbn_arxiv(sess=None, prefix=None):
    """Load ogbn_arxiv graph.
     The ogbn-arxiv dataset is a directed graph, representing the citation network
     between all Computer Science (CS) arXiv papers indexed by Microsoft Academic Graph (MAG).
     See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-arxiv

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_arxiv(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arxiv
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_arxiv(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_arxiv.tar.gz"
        origin = f"{DATA_SITE}/ogbn_arxiv.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "d920922681e8369da5dc8e0f28fffae2eb0db056dc626097f4159351d4ea4389",
        )
        # assumed dirname is ogbn_arxiv after extracting from ogbn_arxiv.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "paper").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "citation")

    return graph
예제 #2
0
    def __init__(self, incoming_graph_data=None, default_label=None, **attr):

        self.graph_attr_dict_factory = self.graph_attr_dict_factory
        self.node_dict_factory = self.node_dict_factory
        self.adjlist_outer_dict_factory = self.adjlist_outer_dict_factory
        self.cache = self.graph_cache_factory(self)

        # init node and adj (must be after cache)
        self.graph = self.graph_attr_dict_factory()
        self._node = self.node_dict_factory(self)
        self._adj = self.adjlist_outer_dict_factory(self)
        self._succ = self._adj
        self._pred = self.adjlist_outer_dict_factory(self, pred=True)

        self._key = None
        self._op = None
        self._graph_type = self._graph_type
        self._schema = GraphSchema()

        # cache for add_node and add_edge
        self._add_node_cache = []
        self._add_edge_cache = []
        self._remove_node_cache = []
        self._remove_edge_cache = []

        create_empty_in_engine = attr.pop(
            "create_empty_in_engine", True
        )  # a hidden parameter
        self._distributed = attr.pop("dist", False)
        if incoming_graph_data is not None and self._is_gs_graph(incoming_graph_data):
            # convert from gs graph always use distributed mode
            self._distributed = True
            if self._session is None:
                self._session = get_session_by_id(incoming_graph_data.session_id)
        self._default_label = default_label
        self._default_label_id = -1

        if self._session is None:
            self._session = get_default_session()

        if not self._is_gs_graph(incoming_graph_data) and create_empty_in_engine:
            graph_def = init_empty_graph_in_engine(
                self, self.is_directed(), self._distributed
            )
            self._key = graph_def.key

        # attempt to load graph with data
        if incoming_graph_data is not None:
            to_networkx_graph(incoming_graph_data, create_using=self)
            self.cache.warmup()

        # load graph attributes (must be after to_networkx_graph)
        self.graph.update(attr)
        self._saved_signature = self.signature
        self._is_client_view = False
    def test_import(self):
        import graphscope.nx as nx_default

        nx1 = self.session1.nx()
        nx2 = self.session2.nx()
        G = nx_default.Graph()
        G1 = nx1.Graph()
        G2 = nx2.Graph()
        assert G.session_id == get_default_session().session_id
        assert G1.session_id == self.session1.session_id
        assert G2.session_id == self.session2.session_id

        self.session1.close()
        self.session2.close()
예제 #4
0
 def finish(self):
     if self.finished:
         return
     if self.preprocessor is not None:
         self.protocol, self.source = self.preprocessor(
             self.source,
             self.storage_options,
             self.options.to_dict(),
             get_default_session(),
         )
         logger.debug(
             f"processed protocol = {self.protocol}, source = {self.source}"
         )
     self.finished = True
예제 #5
0
    def process_vineyard(self, source):
        if vineyard is None:
            raise RuntimeError("Vineyard is not installed")
        if source.startswith("vineyard://"):
            source = source[len("vineyard://"):]
        if not urlparse(source).scheme:
            source = "file://%s" % source
        if "#" in source:
            source = "%s&%s" % (source, str(self.options))
        else:
            source = "%s#%s" % (source, str(self.options))
        if self.session is not None:
            sess = self.session
        else:
            sess = get_default_session()
        info = sess.info
        conf = info["engine_config"]
        vineyard_endpoint = conf["vineyard_rpc_endpoint"]
        vineyard_ipc_socket = conf["vineyard_socket"]
        hosts = info["engine_hosts"].split(",")
        if "namespace" in info:
            deployment = "kubernetes"
            hosts = ["%s:%s" % (info["namespace"], host) for host in hosts]
        else:
            deployment = "ssh"
        num_workers = info["num_workers"]

        self.protocol = "vineyard"
        self.source = repr(
            vineyard.io.open(
                source,
                mode="r",
                vineyard_endpoint=vineyard_endpoint,
                vineyard_ipc_socket=vineyard_ipc_socket,
                hosts=hosts,
                num_workers=num_workers,
                deployment=deployment,
            ))
        logger.debug("opened vineyard stream id = %s", self.source)
예제 #6
0
def load_ogbn_proteins(sess=None, prefix=None):
    """Load ogbn_proteins graph.
     The ogbn-proteins dataset is an undirected, weighted, and typed (according to species) graph. Nodes represent
     proteins, and edges indicate different types of biologically meaningful associations between proteins, e.g.,
     physical interactions, co-expression or homology [1,2]. All edges come with 8-dimensional features, where each
     dimension represents the approximate confidence of a single association type and takes values between 0 and 1 (the
     larger the value is, the more confident we are about the association). The proteins come from 8 species.
     See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-proteins

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_proteins(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_proteins
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_proteins(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_proteins.tar.gz"
        origin = f"{DATA_SITE}/ogbn_proteins.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="ea427e520bf068f3d6788d940b3bdc6773b965d792f2fa4a52311eab478acbde",
        )
        # assumed dirname is ogbn_proteins after extracting from ogbn_proteins.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"), "proteins").add_edges(
        os.path.join(prefix, "edge.csv"), "associations"
    )

    return graph
예제 #7
0
def load_u2i(sess=None, prefix=None, directed=True):
    """Load user2item datasets.

    The user-2-item datasets consists of 5241 nodes, which represents both user and item node,
    42876 edges represents with buying relationship. And this dataset is owned by graphlearn, you
    can downloads from here:

        https://github.com/alibaba/graph-learn/blob/graphscope/examples/data/u2i.py

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_u2i
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_u2i(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_u2i
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_u2i(sess, "/path/to/dataset")

    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "u2i.tar.gz"
        origin = f"{DATA_SITE}/u2i.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "b5475a0f6f13b0964ba0c38804d06003a44627653df3371d938e47fb9eedced6",
        )
        # assumed dirname is u2i after extracting from u2i.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = (graph.add_vertices(
        Loader(os.path.join(prefix, "node.csv"), delimiter="\t"),
        label="u",
        properties=[("feature", "str")],
        vid_field="id",
    ).add_vertices(
        Loader(os.path.join(prefix, "node.csv"), delimiter="\t"),
        label="i",
        properties=[("feature", "str")],
        vid_field="id",
    ).add_edges(
        Loader(os.path.join(prefix, "edge.csv"), delimiter="\t"),
        label="u-i",
        properties=["weight"],
        src_label="u",
        dst_label="i",
        src_field="src_id",
        dst_field="dst_id",
    ).add_edges(
        Loader(os.path.join(prefix, "edge.csv"), delimiter="\t"),
        label="u-i_reverse",
        properties=["weight"],
        src_label="i",
        dst_label="u",
        src_field="dst_id",
        dst_field="src_id",
    ))
    return graph
예제 #8
0
def load_from(
    edges: Union[Mapping[str, Union[LoaderVariants, Sequence, Mapping]],
                 LoaderVariants, Sequence],
    vertices: Union[Mapping[str, Union[LoaderVariants, Sequence, Mapping]],
                    LoaderVariants, Sequence, None, ] = None,
    directed=True,
    oid_type="int64_t",
    generate_eid=True,
    vformat=None,
    eformat=None,
) -> Graph:
    """Load a Arrow property graph using a list of vertex/edge specifications.

    .. deprecated:: version 0.3
       Use :class:`graphscope.Graph()` instead.

    - Use Dict of tuples to setup a graph.
        We can use a dict to set vertex and edge configurations,
        which can be used to build graphs.

        Examples:

        .. code:: ipython

            g = graphscope_session.load_from(
                edges={
                    "group": [
                        (
                            "file:///home/admin/group.e",
                            ["group_id", "member_size"],
                            ("leader_student_id", "student"),
                            ("member_student_id", "student"),
                        ),
                        (
                            "file:///home/admin/group_for_teacher_student.e",
                            ["group_id", "group_name", "establish_date"],
                            ("teacher_in_charge_id", "teacher"),
                            ("member_student_id", "student"),
                        ),
                    ]
                },
                vertices={
                    "student": (
                        "file:///home/admin/student.v",
                        ["name", "lesson_nums", "avg_score"],
                        "student_id",
                    ),
                    "teacher": (
                        "file:///home/admin/teacher.v",
                        ["name", "salary", "age"],
                        "teacher_id",
                    ),
                },
            )

        'e' is the label of edges, and 'v' is the label for vertices, edges are stored in the 'both_in_out' format
        edges with label 'e' linking from 'v' to 'v'.

    - Use Dict of dict to setup a graph.
        We can also give each element inside the tuple a meaningful name,
        makes it more understandable.

        Examples:

        .. code:: ipython

            g = graphscope_session.load_from(
                edges={
                    "group": [
                        {
                            "loader": "file:///home/admin/group.e",
                            "properties": ["group_id", "member_size"],
                            "source": ("leader_student_id", "student"),
                            "destination": ("member_student_id", "student"),
                        },
                        {
                            "loader": "file:///home/admin/group_for_teacher_student.e",
                            "properties": ["group_id", "group_name", "establish_date"],
                            "source": ("teacher_in_charge_id", "teacher"),
                            "destination": ("member_student_id", "student"),
                        },
                    ]
                },
                vertices={
                    "student": {
                        "loader": "file:///home/admin/student.v",
                        "properties": ["name", "lesson_nums", "avg_score"],
                        "vid": "student_id",
                    },
                    "teacher": {
                        "loader": "file:///home/admin/teacher.v",
                        "properties": ["name", "salary", "age"],
                        "vid": "teacher_id",
                    },
                },
            )

    Args:
        edges: Edge configuration of the graph
        vertices (optional): Vertices configurations of the graph. Defaults to None.
            If None, we assume all edge's src_label and dst_label are deduced and unambiguous.
        directed (bool, optional): Indicate whether the graph
            should be treated as directed or undirected.
        oid_type (str, optional): ID type of graph. Can be "int64_t" or "string". Defaults to "int64_t".
        generate_eid (bool, optional): Whether to generate a unique edge id for each edge. Generated eid will be placed
            in third column. This feature is for cooperating with interactive engine.
            If you only need to work with analytical engine, set it to False. Defaults to False.
    """

    # Don't import the :code:`nx` in top-level statments to improve the
    # performance of :code:`import graphscope`.
    from graphscope import nx

    sess = get_default_session()
    if isinstance(edges, (Graph, nx.Graph, *VineyardObjectTypes)):
        return sess.g(edges)
    oid_type = utils.normalize_data_type_str(oid_type)
    if oid_type not in ("int64_t", "std::string"):
        raise ValueError("oid_type can only be int64_t or string.")
    v_labels = normalize_parameter_vertices(vertices, oid_type, vformat)
    e_labels = normalize_parameter_edges(edges, oid_type, eformat)
    # generate and add a loader op to dag
    loader_op = dag_utils.create_loader(v_labels + e_labels)
    sess.dag.add_op(loader_op)
    # construct create graph op
    config = {
        types_pb2.DIRECTED: utils.b_to_attr(directed),
        types_pb2.OID_TYPE: utils.s_to_attr(oid_type),
        types_pb2.GENERATE_EID: utils.b_to_attr(generate_eid),
        types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"),
        types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False),
    }
    op = dag_utils.create_graph(sess.session_id,
                                graph_def_pb2.ARROW_PROPERTY,
                                inputs=[loader_op],
                                attrs=config)
    graph = sess.g(op)
    return graph
예제 #9
0
    def __init__(self, incoming_graph_data=None, **attr):
        """Initialize a graph with edges, name, or graph attributes

        Parameters
        ----------
        incoming_graph_data : input graph (optional, default: None)
            Data to initialize graph. If None (default) an empty
            graph is created.  The data can be any format that is supported
            by the to_nx_graph() function, currently including edge list,
            dict of dicts, dict of lists, NetworkX graph, NumPy matrix
            or 2d ndarray, Pandas DataFrame, SciPy sparse matrix, or a graphscope
            graph.

        attr : keyword arguments, optional (default= no attributes)
            Attributes to add to graph as key=value pairs.

        See Also
        --------
        convert

        Examples
        --------
        >>> G = nx.Graph()  # or DiGraph
        >>> G = nx.Graph(name='my graph')
        >>> e = [(1, 2), (2, 3), (3, 4)]  # list of edges
        >>> G = nx.Graph(e)

        Arbitrary graph attribute pairs (key=value) may be assigned

        >>> G = nx.Graph(e, day="Friday")
        >>> G.graph
        {'day': 'Friday'}

        """
        sess = get_default_session()
        if sess is None:
            raise ValueError(
                "Cannot find a default session. "
                "Please register a session using graphscope.session(...).as_default()"
            )
        self._session_id = sess.session_id

        self._key = None
        self._op = None
        self._graph_type = self._graph_type
        self._schema = GraphSchema()
        self._schema.init_nx_schema()
        create_empty_in_engine = attr.pop("create_empty_in_engine",
                                          True)  # a hidden parameter
        if not self.is_gs_graph(
                incoming_graph_data) and create_empty_in_engine:
            graph_def = empty_graph_in_engine(self, self.is_directed())
            self._key = graph_def.key

        self.graph_attr_dict_factory = self.graph_attr_dict_factory
        self.node_dict_factory = self.node_dict_factory
        self.adjlist_dict_factory = self.adjlist_dict_factory

        self.graph = self.graph_attr_dict_factory()
        self._node = self.node_dict_factory(self)
        self._adj = self.adjlist_dict_factory(self)
        self._pred = self.adjlist_dict_factory(self, types_pb2.PREDS_BY_NODE)
        self._succ = self._adj
        # attempt to load graph with data
        if incoming_graph_data is not None:
            if self.is_gs_graph(incoming_graph_data):
                graph_def = from_gs_graph(incoming_graph_data, self)
                self._key = graph_def.key
                self._schema.init_nx_schema(incoming_graph_data.schema)
            else:
                to_nx_graph(incoming_graph_data, create_using=self)
        # load graph attributes (must be after to_nx_graph)
        self.graph.update(attr)
        self._saved_signature = self.signature
예제 #10
0
def load_p2p_network(sess=None, prefix=None, directed=False):
    """Load p2p graph.
    A peer-to-peer dataset derived from Gnutella peer-to-peer network, August 31 2002,
    with generated data on vertices and edges. See more details here:

        http://snap.stanford.edu/data/p2p-Gnutella31.html

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_p2p_network
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_p2p_network(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_p2p_network
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_p2p_network(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "p2p_network.tar.gz"
        origin = f"{DATA_SITE}/p2p_network.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "117131735186caff23ea127beec61b5396662c0815fc7918186451fe957e8c2f",
        )
        # assumed dirname is p2p_network after extracting from p2p_network.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "p2p-31_property_v_0"),
                               "host").add_edges(
                                   os.path.join(prefix, "p2p-31_property_e_0"),
                                   "connect",
                                   src_label="host",
                                   dst_label="host",
                               )

    return graph
예제 #11
0
    def __init__(
        self,
        session=None,
        incoming_data=None,
        oid_type="int64",
        directed=True,
        generate_eid=True,
    ):
        """Construct a :class:`Graph` object.

        Args:
            session_id (str): Session id of the session the graph is created in.
            incoming_data: Graph can be initialized through various type of sources,
                which can be one of:
                    - :class:`Operation`
                    - :class:`nx.Graph`
                    - :class:`Graph`
                    - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName`
        """

        self._key = None
        self._graph_type = types_pb2.ARROW_PROPERTY
        self._vineyard_id = 0
        self._schema = GraphSchema()
        if session is None:
            session = get_default_session()
        self._session = session
        self._detached = False

        self._interactive_instance_launching_thread = None
        self._interactive_instance_list = []
        self._learning_instance_list = []

        # Hold uncompleted operation for lazy evaluation
        self._pending_op = None
        # Hold a reference to base graph of modify operation,
        # to avoid being garbage collected
        self._base_graph = None

        oid_type = utils.normalize_data_type_str(oid_type)
        if oid_type not in ("int64_t", "std::string"):
            raise ValueError("oid_type can only be int64_t or string.")
        self._oid_type = oid_type
        self._directed = directed
        self._generate_eid = generate_eid

        self._unsealed_vertices = {}
        self._unsealed_edges = {}
        # Used to isplay schema without load into vineyard,
        # and do sanity checking for newly added vertices and edges.
        self._v_labels = []
        self._e_labels = []
        self._e_relationships = []

        if incoming_data is not None:
            # Don't import the :code:`NXGraph` in top-level statements to improve the
            # performance of :code:`import graphscope`.
            from graphscope.experimental import nx

            if isinstance(incoming_data, Operation):
                self._pending_op = incoming_data
                if self._pending_op.type == types_pb2.PROJECT_GRAPH:
                    self._graph_type = types_pb2.ARROW_PROJECTED
            elif isinstance(incoming_data, nx.Graph):
                self._pending_op = self._from_nx_graph(incoming_data)
            elif isinstance(incoming_data, Graph):
                self._pending_op = self._copy_from(incoming_data)
            elif isinstance(
                    incoming_data,
                (vineyard.Object, vineyard.ObjectID, vineyard.ObjectName)):
                self._pending_op = self._from_vineyard(incoming_data)
            else:
                raise RuntimeError("Not supported incoming data.")
예제 #12
0
def load_ogbn_mag(sess=None, prefix=None):
    """Load ogbn_mag graph.
    The ogbn-mag dataset is a heterogeneous network composed of a subset
    of the Microsoft Academic Graph (MAG). See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_mag
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_mag(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_mag
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_mag(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_mag_small.tar.gz"
        origin = f"{DATA_SITE}/ogbn_mag_small.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="ccd128ab673e5d7dd1cceeaa4ba5d65b67a18212c4a27b0cd090359bd7042b10",
        )
        # assumed dirname is ogbn_mag_small after extracting from ogbn_mag_small.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = (
        graph.add_vertices(os.path.join(prefix, "paper.csv"), "paper")
        .add_vertices(os.path.join(prefix, "author.csv"), "author")
        .add_vertices(os.path.join(prefix, "institution.csv"), "institution")
        .add_vertices(os.path.join(prefix, "field_of_study.csv"), "field_of_study")
        .add_edges(
            os.path.join(prefix, "author_affiliated_with_institution.csv"),
            "affiliated",
            src_label="author",
            dst_label="institution",
        )
        .add_edges(
            os.path.join(prefix, "paper_has_topic_field_of_study.csv"),
            "hasTopic",
            src_label="paper",
            dst_label="field_of_study",
        )
        .add_edges(
            os.path.join(prefix, "paper_cites_paper.csv"),
            "cites",
            src_label="paper",
            dst_label="paper",
        )
        .add_edges(
            os.path.join(prefix, "author_writes_paper.csv"),
            "writes",
            src_label="author",
            dst_label="paper",
        )
    )

    return graph
예제 #13
0
def load_cora(sess=None, prefix=None, directed=False):
    """Load cora datasets.

    The Cora dataset consists of 2708 scientific publications classified into one of seven classes.
    The citation network consists of 5429 links. Each publication in the dataset is described by a
    0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary.
    See more details here:

        https://linqs.soe.ucsc.edu/data

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_cora
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_cora(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_cora
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_cora(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "cora.tar.gz"
        origin = f"{DATA_SITE}/cora.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "2dae0c5ec6eca4321fc94614381d6c74a216726b930e4de228bc15fa1ab504e8",
        )
        # assumed dirname is ppi after extracting from ppi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "node.csv"),
                               "paper").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "cites",
                                   src_label="paper",
                                   dst_label="paper",
                               )

    return graph
예제 #14
0
def load_ldbc(sess=None, prefix=None, directed=True):
    """Load ldbc dataset as a ArrowProperty Graph.

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.
    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ldbc
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ldbc(sess, "/path/to/dataset", True)
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ldbc
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ldbc(sess, "/path/to/dataset", True)

    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ldbc_sample.tar.gz"
        origin = f"{DATA_SITE}/ldbc_sample.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="1a3d3c36fbf416c2a02ca4163734192eed602649220d7ceef2735fc11173fc6c",
        )
        # assumed dirname is ldbc_sample after extracting from ldbc_sample.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    vertices = {
        "comment": (
            Loader(
                os.path.join(prefix, "comment_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["creationDate", "locationIP", "browserUsed", "content", "length"],
            "id",
        ),
        "organisation": (
            Loader(
                os.path.join(prefix, "organisation_0_0.csv"),
                header_row=True,
                delimiter="|",
            ),
            ["type", "name", "url"],
            "id",
        ),
        "tagclass": (
            Loader(
                os.path.join(prefix, "tagclass_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["name", "url"],
            "id",
        ),
        "person": (
            Loader(
                os.path.join(prefix, "person_0_0.csv"), header_row=True, delimiter="|"
            ),
            [
                "firstName",
                "lastName",
                "gender",
                "birthday",
                "creationDate",
                "locationIP",
                "browserUsed",
            ],
            "id",
        ),
        "forum": (
            Loader(
                os.path.join(prefix, "forum_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["title", "creationDate"],
            "id",
        ),
        "place": (
            Loader(
                os.path.join(prefix, "place_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["name", "url", "type"],
            "id",
        ),
        "post": (
            Loader(
                os.path.join(prefix, "post_0_0.csv"), header_row=True, delimiter="|"
            ),
            [
                "imageFile",
                "creationDate",
                "locationIP",
                "browserUsed",
                "language",
                "content",
                "length",
            ],
            "id",
        ),
        "tag": (
            Loader(os.path.join(prefix, "tag_0_0.csv"), header_row=True, delimiter="|"),
            ["name", "url"],
            "id",
        ),
    }
    edges = {
        "replyOf": [
            (
                Loader(
                    os.path.join(prefix, "comment_replyOf_comment_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Comment.id.1", "comment"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_replyOf_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Post.id", "post"),
            ),
        ],
        "isPartOf": [
            (
                Loader(
                    os.path.join(prefix, "place_isPartOf_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Place.id", "place"),
                ("Place.id.1", "place"),
            )
        ],
        "isSubclassOf": [
            (
                Loader(
                    os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("TagClass.id", "tagclass"),
                ("TagClass.id.1", "tagclass"),
            )
        ],
        "hasTag": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Tag.id", "tag"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Tag.id", "tag"),
            ),
            (
                Loader(
                    os.path.join(prefix, "post_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Tag.id", "tag"),
            ),
        ],
        "knows": [
            (
                Loader(
                    os.path.join(prefix, "person_knows_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Person.id.1", "person"),
            )
        ],
        "hasModerator": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasModerator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Person.id", "person"),
            )
        ],
        "hasInterest": [
            (
                Loader(
                    os.path.join(prefix, "person_hasInterest_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Person.id", "person"),
                ("Tag.id", "tag"),
            )
        ],
        "isLocatedIn": [
            (
                Loader(
                    os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Organisation.id", "organisation"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Person.id", "person"),
                ("Place.id", "place"),
            ),
        ],
        "hasType": [
            (
                Loader(
                    os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Tag.id", "tag"),
                ("TagClass.id", "tagclass"),
            )
        ],
        "hasCreator": [
            (
                Loader(
                    os.path.join(prefix, "post_hasCreator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Person.id", "person"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_hasCreator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Person.id", "person"),
            ),
        ],
        "containerOf": [
            (
                Loader(
                    os.path.join(prefix, "forum_containerOf_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Post.id", "post"),
            )
        ],
        "hasMember": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasMember_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["joinDate"],
                ("Forum.id", "forum"),
                ("Person.id", "person"),
            )
        ],
        "workAt": [
            (
                Loader(
                    os.path.join(prefix, "person_workAt_organisation_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["workFrom"],
                ("Person.id", "person"),
                ("Organisation.id", "organisation"),
            )
        ],
        "likes": [
            (
                Loader(
                    os.path.join(prefix, "person_likes_comment_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Comment.id", "comment"),
            ),
            (
                Loader(
                    os.path.join(prefix, "person_likes_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Post.id", "post"),
            ),
        ],
        "studyAt": [
            (
                Loader(
                    os.path.join(prefix, "person_studyAt_organisation_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["classYear"],
                ("Person.id", "person"),
                ("Organisation.id", "organisation"),
            )
        ],
    }
    return sess.load_from(edges, vertices, directed, generate_eid=True)
예제 #15
0
def load_ogbl_collab(sess=None, prefix=None):
    """Load ogbl_collab graph.
     The ogbl-collab dataset is an undirected graph, representing a subset of the collaboration network between authors
     indexed by MAG. Each node represents an author and edges indicate the collaboration between authors. All nodes
     come with 128-dimensional features, obtained by averaging the word embeddings of papers that are published by the
     authors. All edges are associated with two meta-information: the year and the edge weight, representing the number
     of co-authored papers published in that year. The graph can be viewed as a dynamic multi-graph since there can be
     multiple edges between two nodes if they collaborate in more than one year.
     See more details here:

        https://ogb.stanford.edu/docs/linkprop/#ogbl-collab

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbl_collab(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbl_collab
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbl_collab(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbl_collab.tar.gz"
        origin = f"{DATA_SITE}/ogbl_collab.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "abb49a2f7c6c16ed355ea83ec7ce65ece1278eec40e6fef6ee9918b4383ae459",
        )
        # assumed dirname is ogbl_collab after extracting from ogbl_collab.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "author").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "collaboration")

    return graph
예제 #16
0
def load_from(
    edges: Union[Mapping[str, Union[LoaderVariants, Sequence, Mapping]],
                 LoaderVariants, Sequence],
    vertices: Union[Mapping[str, Union[LoaderVariants, Sequence, Mapping]],
                    LoaderVariants, Sequence, None, ] = None,
    directed=True,
    oid_type="int64_t",
    generate_eid=True,
) -> Graph:
    """Load a Arrow property graph using a list of vertex/edge specifications.

    - Use Dict of tuples to setup a graph.
        We can use a dict to set vertex and edge configurations,
        which can be used to build graphs.

        Examples:

        .. code:: ipython

            g = graphscope_session.load_from(
                edges={
                    "group": [
                        (
                            "file:///home/admin/group.e",
                            ["group_id", "member_size"],
                            ("leader_student_id", "student"),
                            ("member_student_id", "student"),
                        ),
                        (
                            "file:///home/admin/group_for_teacher_student.e",
                            ["group_id", "group_name", "establish_date"],
                            ("teacher_in_charge_id", "teacher"),
                            ("member_student_id", "student"),
                        ),
                    ]
                },
                vertices={
                    "student": (
                        "file:///home/admin/student.v",
                        ["name", "lesson_nums", "avg_score"],
                        "student_id",
                    ),
                    "teacher": (
                        "file:///home/admin/teacher.v",
                        ["name", "salary", "age"],
                        "teacher_id",
                    ),
                },
            )

        'e' is the label of edges, and 'v' is the label for vertices, edges are stored in the 'both_in_out' format
        edges with label 'e' linking from 'v' to 'v'.

    - Use Dict of dict to setup a graph.
        We can also give each element inside the tuple a meaningful name,
        makes it more understandable.

        Examples:

        .. code:: ipython

            g = graphscope_session.load_from(
                edges={
                    "group": [
                        {
                            "loader": "file:///home/admin/group.e",
                            "properties": ["group_id", "member_size"],
                            "source": ("leader_student_id", "student"),
                            "destination": ("member_student_id", "student"),
                        },
                        {
                            "loader": "file:///home/admin/group_for_teacher_student.e",
                            "properties": ["group_id", "group_name", "establish_date"],
                            "source": ("teacher_in_charge_id", "teacher"),
                            "destination": ("member_student_id", "student"),
                        },
                    ]
                },
                vertices={
                    "student": {
                        "loader": "file:///home/admin/student.v",
                        "properties": ["name", "lesson_nums", "avg_score"],
                        "vid": "student_id",
                    },
                    "teacher": {
                        "loader": "file:///home/admin/teacher.v",
                        "properties": ["name", "salary", "age"],
                        "vid": "teacher_id",
                    },
                },
            )

    Args:
        edges: Edge configuration of the graph
        vertices (optional): Vertices configurations of the graph. Defaults to None.
            If None, we assume all edge's src_label and dst_label are deduced and unambiguous.
        directed (bool, optional): Indicate whether the graph
            should be treated as directed or undirected.
        oid_type (str, optional): ID type of graph. Can be "int64_t" or "string". Defaults to "int64_t".
        generate_eid (bool, optional): Whether to generate a unique edge id for each edge. Generated eid will be placed
            in third column. This feature is for cooperating with interactive engine.
            If you only need to work with analytical engine, set it to False. Defaults to False.
    """

    # Don't import the :code:`nx` in top-level statments to improve the
    # performance of :code:`import graphscope`.
    from graphscope.experimental import nx

    sess = get_default_session()
    if sess is None:
        raise ValueError("No default session found.")
    if isinstance(edges, (Graph, nx.Graph, *VineyardObjectTypes)):
        return Graph(sess.session_id, edges)
    oid_type = utils.normalize_data_type_str(oid_type)
    e_labels = normalize_parameter_edges(edges)
    v_labels = normalize_parameter_vertices(vertices)
    e_labels, v_labels = _sanity_check(e_labels, v_labels)
    config = _get_config(e_labels, v_labels, directed, oid_type, generate_eid)
    op = dag_utils.create_graph(sess.session_id,
                                types_pb2.ARROW_PROPERTY,
                                attrs=config)
    graph_def = sess.run(op)
    graph = Graph(sess.session_id, graph_def)
    return graph
예제 #17
0
def load_ogbl_ddi(sess=None, prefix=None):
    """Load ogbl_ddi graph.
     The ogbl-ddi dataset is a homogeneous, unweighted, undirected graph, representing the drug-drug interaction
     network [1]. Each node represents an FDA-approved or experimental drug. Edges represent interactions between drugs
     and can be interpreted as a phenomenon where the joint effect of taking the two drugs together is considerably
     different from the expected effect in which drugs act independently of each other.
     See more details here:

        https://ogb.stanford.edu/docs/linkprop/#ogbl-ddi

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbl_ddi(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbl_ddi
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbl_ddi(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbl_ddi.tar.gz"
        origin = f"{DATA_SITE}/ogbl_ddi.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "2a66bf265a217fd6148ba1f0ed9c9a297e778bf539b2b7262edf4a0dc1f4c8b9",
        )
        # assumed dirname is ogbl_ddi after extracting from ogbl_ddi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "drug").add_edges(
                                   os.path.join(prefix, "edge.csv"), "effect")

    return graph
예제 #18
0
def load_ppi(sess=None, prefix=None, directed=False):
    """Load protein-protein links datasets.

    In protein-protein links graph, every node represents a protein,and edges represent
    the links between them. See more details here:

        https://humgenomics.biomedcentral.com/articles/10.1186/1479-7364-3-3-291

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ppi
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ppi(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ppi
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ppi(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ppi.tar.gz"
        origin = f"{DATA_SITE}/ppi.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="2ffe7207626f5b177cb05871b65ee7c95fc9ebc45cc9f628d36efef8b5c0b642",
        )
        # assumed dirname is ppi after extracting from ppi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "node.csv"), "protein").add_edges(
        os.path.join(prefix, "edge.csv"),
        "link",
        src_label="protein",
        dst_label="protein",
    )

    return graph
예제 #19
0
def load_modern_graph(sess=None, prefix=None, directed=True):
    """Load modern graph.
    Modern graph consist 6 vertices and 6 edges, useful to test the basic
    functionalities.

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix (str): `PathLike` object that represents a path.
            With standalone mode, set prefix to None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset. modern_graph import load_modern_graph
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_modern_graph(sess, "/path/to/dataset", True)
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset. modern_graph import load_modern_graph
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_modern_graph(sess, "/path/to/dataset", True)
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "modern_graph.tar.gz"
        origin = f"{DATA_SITE}/modern_graph.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="a67c02191ea9dfa618a83d94087349a25937b92973f42206a28fdf6fa5299dec",
        )
        # assumed dirname is modern_graph after extracting from modern_graph.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = (
        graph.add_vertices(
            Loader(os.path.join(prefix, "person.csv"), delimiter="|"),
            "person",
            ["name", ("age", "int")],
            "id",
        )
        .add_vertices(
            Loader(os.path.join(prefix, "software.csv"), delimiter="|"),
            "software",
            ["name", "lang"],
            "id",
        )
        .add_edges(
            Loader(os.path.join(prefix, "knows.csv"), delimiter="|"),
            "knows",
            ["weight"],
            src_label="person",
            dst_label="person",
            src_field="src_id",
            dst_field="dst_id",
        )
        .add_edges(
            Loader(os.path.join(prefix, "created.csv"), delimiter="|"),
            "created",
            ["weight"],
            src_label="person",
            dst_label="software",
            src_field="src_id",
            dst_field="dst_id",
        )
    )
    return graph