Beispiel #1
0
def load_ogbn_arxiv(sess=None, prefix=None):
    """Load ogbn_arxiv graph.
     The ogbn-arxiv dataset is a directed graph, representing the citation network
     between all Computer Science (CS) arXiv papers indexed by Microsoft Academic Graph (MAG).
     See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-arxiv

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_arxiv(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arxiv
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_arxiv(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_arxiv.tar.gz"
        origin = f"{DATA_SITE}/ogbn_arxiv.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "d920922681e8369da5dc8e0f28fffae2eb0db056dc626097f4159351d4ea4389",
        )
        # assumed dirname is ogbn_arxiv after extracting from ogbn_arxiv.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "paper").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "citation")

    return graph
Beispiel #2
0
def load_ppi(sess=None, prefix=None, directed=False):
    """Load protein-protein links datasets.

    In protein-protein links graph, every node represents a protein,and edges represent
    the links between them. See more details here:

        https://humgenomics.biomedcentral.com/articles/10.1186/1479-7364-3-3-291

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ppi
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ppi(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ppi
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ppi(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ppi.tar.gz"
        origin = f"{DATA_SITE}/ppi.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="2ffe7207626f5b177cb05871b65ee7c95fc9ebc45cc9f628d36efef8b5c0b642",
        )
        # assumed dirname is ppi after extracting from ppi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "node.csv"), "protein").add_edges(
        os.path.join(prefix, "edge.csv"),
        "link",
        src_label="protein",
        dst_label="protein",
    )

    return graph
Beispiel #3
0
def load_ogbn_proteins(sess=None, prefix=None):
    """Load ogbn_proteins graph.
     The ogbn-proteins dataset is an undirected, weighted, and typed (according to species) graph. Nodes represent
     proteins, and edges indicate different types of biologically meaningful associations between proteins, e.g.,
     physical interactions, co-expression or homology [1,2]. All edges come with 8-dimensional features, where each
     dimension represents the approximate confidence of a single association type and takes values between 0 and 1 (the
     larger the value is, the more confident we are about the association). The proteins come from 8 species.
     See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-proteins

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_proteins(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_proteins
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_proteins(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_proteins.tar.gz"
        origin = f"{DATA_SITE}/ogbn_proteins.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="ea427e520bf068f3d6788d940b3bdc6773b965d792f2fa4a52311eab478acbde",
        )
        # assumed dirname is ogbn_proteins after extracting from ogbn_proteins.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"), "proteins").add_edges(
        os.path.join(prefix, "edge.csv"), "associations"
    )

    return graph
Beispiel #4
0
def load_u2i(sess=None, prefix=None, directed=True):
    """Load user2item datasets.

    The user-2-item datasets consists of 5241 nodes, which represents both user and item node,
    42876 edges represents with buying relationship. And this dataset is owned by graphlearn, you
    can downloads from here:

        https://github.com/alibaba/graph-learn/blob/graphscope/examples/data/u2i.py

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_u2i
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_u2i(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_u2i
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_u2i(sess, "/path/to/dataset")

    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "u2i.tar.gz"
        origin = f"{DATA_SITE}/u2i.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "b5475a0f6f13b0964ba0c38804d06003a44627653df3371d938e47fb9eedced6",
        )
        # assumed dirname is u2i after extracting from u2i.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = (graph.add_vertices(
        Loader(os.path.join(prefix, "node.csv"), delimiter="\t"),
        label="u",
        properties=[("feature", "str")],
        vid_field="id",
    ).add_vertices(
        Loader(os.path.join(prefix, "node.csv"), delimiter="\t"),
        label="i",
        properties=[("feature", "str")],
        vid_field="id",
    ).add_edges(
        Loader(os.path.join(prefix, "edge.csv"), delimiter="\t"),
        label="u-i",
        properties=["weight"],
        src_label="u",
        dst_label="i",
        src_field="src_id",
        dst_field="dst_id",
    ).add_edges(
        Loader(os.path.join(prefix, "edge.csv"), delimiter="\t"),
        label="u-i_reverse",
        properties=["weight"],
        src_label="i",
        dst_label="u",
        src_field="dst_id",
        dst_field="src_id",
    ))
    return graph
Beispiel #5
0
def load_ogbl_ddi(sess=None, prefix=None):
    """Load ogbl_ddi graph.
     The ogbl-ddi dataset is a homogeneous, unweighted, undirected graph, representing the drug-drug interaction
     network [1]. Each node represents an FDA-approved or experimental drug. Edges represent interactions between drugs
     and can be interpreted as a phenomenon where the joint effect of taking the two drugs together is considerably
     different from the expected effect in which drugs act independently of each other.
     See more details here:

        https://ogb.stanford.edu/docs/linkprop/#ogbl-ddi

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbl_ddi(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbl_ddi
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbl_ddi(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbl_ddi.tar.gz"
        origin = f"{DATA_SITE}/ogbl_ddi.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "2a66bf265a217fd6148ba1f0ed9c9a297e778bf539b2b7262edf4a0dc1f4c8b9",
        )
        # assumed dirname is ogbl_ddi after extracting from ogbl_ddi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "drug").add_edges(
                                   os.path.join(prefix, "edge.csv"), "effect")

    return graph
Beispiel #6
0
def load_ogbl_collab(sess=None, prefix=None):
    """Load ogbl_collab graph.
     The ogbl-collab dataset is an undirected graph, representing a subset of the collaboration network between authors
     indexed by MAG. Each node represents an author and edges indicate the collaboration between authors. All nodes
     come with 128-dimensional features, obtained by averaging the word embeddings of papers that are published by the
     authors. All edges are associated with two meta-information: the year and the edge weight, representing the number
     of co-authored papers published in that year. The graph can be viewed as a dynamic multi-graph since there can be
     multiple edges between two nodes if they collaborate in more than one year.
     See more details here:

        https://ogb.stanford.edu/docs/linkprop/#ogbl-collab

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_arsiv
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbl_collab(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbl_collab
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbl_collab(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbl_collab.tar.gz"
        origin = f"{DATA_SITE}/ogbl_collab.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "abb49a2f7c6c16ed355ea83ec7ce65ece1278eec40e6fef6ee9918b4383ae459",
        )
        # assumed dirname is ogbl_collab after extracting from ogbl_collab.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = graph.add_vertices(os.path.join(prefix, "nodes.csv"),
                               "author").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "collaboration")

    return graph
Beispiel #7
0
def load_ogbn_mag(sess=None, prefix=None):
    """Load ogbn_mag graph.
    The ogbn-mag dataset is a heterogeneous network composed of a subset
    of the Microsoft Academic Graph (MAG). See more details here:

        https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_mag
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ogbn_mag(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ogbn_mag
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ogbn_mag(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ogbn_mag_small.tar.gz"
        origin = f"{DATA_SITE}/ogbn_mag_small.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="ccd128ab673e5d7dd1cceeaa4ba5d65b67a18212c4a27b0cd090359bd7042b10",
        )
        # assumed dirname is ogbn_mag_small after extracting from ogbn_mag_small.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g()
    graph = (
        graph.add_vertices(os.path.join(prefix, "paper.csv"), "paper")
        .add_vertices(os.path.join(prefix, "author.csv"), "author")
        .add_vertices(os.path.join(prefix, "institution.csv"), "institution")
        .add_vertices(os.path.join(prefix, "field_of_study.csv"), "field_of_study")
        .add_edges(
            os.path.join(prefix, "author_affiliated_with_institution.csv"),
            "affiliated",
            src_label="author",
            dst_label="institution",
        )
        .add_edges(
            os.path.join(prefix, "paper_has_topic_field_of_study.csv"),
            "hasTopic",
            src_label="paper",
            dst_label="field_of_study",
        )
        .add_edges(
            os.path.join(prefix, "paper_cites_paper.csv"),
            "cites",
            src_label="paper",
            dst_label="paper",
        )
        .add_edges(
            os.path.join(prefix, "author_writes_paper.csv"),
            "writes",
            src_label="author",
            dst_label="paper",
        )
    )

    return graph
Beispiel #8
0
def load_cora(sess=None, prefix=None, directed=False):
    """Load cora datasets.

    The Cora dataset consists of 2708 scientific publications classified into one of seven classes.
    The citation network consists of 5429 links. Each publication in the dataset is described by a
    0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary.
    See more details here:

        https://linqs.soe.ucsc.edu/data

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_cora
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_cora(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_cora
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_cora(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "cora.tar.gz"
        origin = f"{DATA_SITE}/cora.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "2dae0c5ec6eca4321fc94614381d6c74a216726b930e4de228bc15fa1ab504e8",
        )
        # assumed dirname is ppi after extracting from ppi.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "node.csv"),
                               "paper").add_edges(
                                   os.path.join(prefix, "edge.csv"),
                                   "cites",
                                   src_label="paper",
                                   dst_label="paper",
                               )

    return graph
Beispiel #9
0
def load_ldbc(sess=None, prefix=None, directed=True):
    """Load ldbc dataset as a ArrowProperty Graph.

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.
    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ldbc
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_ldbc(sess, "/path/to/dataset", True)
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_ldbc
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_ldbc(sess, "/path/to/dataset", True)

    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "ldbc_sample.tar.gz"
        origin = f"{DATA_SITE}/ldbc_sample.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="1a3d3c36fbf416c2a02ca4163734192eed602649220d7ceef2735fc11173fc6c",
        )
        # assumed dirname is ldbc_sample after extracting from ldbc_sample.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    vertices = {
        "comment": (
            Loader(
                os.path.join(prefix, "comment_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["creationDate", "locationIP", "browserUsed", "content", "length"],
            "id",
        ),
        "organisation": (
            Loader(
                os.path.join(prefix, "organisation_0_0.csv"),
                header_row=True,
                delimiter="|",
            ),
            ["type", "name", "url"],
            "id",
        ),
        "tagclass": (
            Loader(
                os.path.join(prefix, "tagclass_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["name", "url"],
            "id",
        ),
        "person": (
            Loader(
                os.path.join(prefix, "person_0_0.csv"), header_row=True, delimiter="|"
            ),
            [
                "firstName",
                "lastName",
                "gender",
                "birthday",
                "creationDate",
                "locationIP",
                "browserUsed",
            ],
            "id",
        ),
        "forum": (
            Loader(
                os.path.join(prefix, "forum_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["title", "creationDate"],
            "id",
        ),
        "place": (
            Loader(
                os.path.join(prefix, "place_0_0.csv"), header_row=True, delimiter="|"
            ),
            ["name", "url", "type"],
            "id",
        ),
        "post": (
            Loader(
                os.path.join(prefix, "post_0_0.csv"), header_row=True, delimiter="|"
            ),
            [
                "imageFile",
                "creationDate",
                "locationIP",
                "browserUsed",
                "language",
                "content",
                "length",
            ],
            "id",
        ),
        "tag": (
            Loader(os.path.join(prefix, "tag_0_0.csv"), header_row=True, delimiter="|"),
            ["name", "url"],
            "id",
        ),
    }
    edges = {
        "replyOf": [
            (
                Loader(
                    os.path.join(prefix, "comment_replyOf_comment_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Comment.id.1", "comment"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_replyOf_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Post.id", "post"),
            ),
        ],
        "isPartOf": [
            (
                Loader(
                    os.path.join(prefix, "place_isPartOf_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Place.id", "place"),
                ("Place.id.1", "place"),
            )
        ],
        "isSubclassOf": [
            (
                Loader(
                    os.path.join(prefix, "tagclass_isSubclassOf_tagclass_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("TagClass.id", "tagclass"),
                ("TagClass.id.1", "tagclass"),
            )
        ],
        "hasTag": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Tag.id", "tag"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Tag.id", "tag"),
            ),
            (
                Loader(
                    os.path.join(prefix, "post_hasTag_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Tag.id", "tag"),
            ),
        ],
        "knows": [
            (
                Loader(
                    os.path.join(prefix, "person_knows_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Person.id.1", "person"),
            )
        ],
        "hasModerator": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasModerator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Person.id", "person"),
            )
        ],
        "hasInterest": [
            (
                Loader(
                    os.path.join(prefix, "person_hasInterest_tag_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Person.id", "person"),
                ("Tag.id", "tag"),
            )
        ],
        "isLocatedIn": [
            (
                Loader(
                    os.path.join(prefix, "post_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "organisation_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Organisation.id", "organisation"),
                ("Place.id", "place"),
            ),
            (
                Loader(
                    os.path.join(prefix, "person_isLocatedIn_place_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Person.id", "person"),
                ("Place.id", "place"),
            ),
        ],
        "hasType": [
            (
                Loader(
                    os.path.join(prefix, "tag_hasType_tagclass_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Tag.id", "tag"),
                ("TagClass.id", "tagclass"),
            )
        ],
        "hasCreator": [
            (
                Loader(
                    os.path.join(prefix, "post_hasCreator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Post.id", "post"),
                ("Person.id", "person"),
            ),
            (
                Loader(
                    os.path.join(prefix, "comment_hasCreator_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Comment.id", "comment"),
                ("Person.id", "person"),
            ),
        ],
        "containerOf": [
            (
                Loader(
                    os.path.join(prefix, "forum_containerOf_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                [],
                ("Forum.id", "forum"),
                ("Post.id", "post"),
            )
        ],
        "hasMember": [
            (
                Loader(
                    os.path.join(prefix, "forum_hasMember_person_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["joinDate"],
                ("Forum.id", "forum"),
                ("Person.id", "person"),
            )
        ],
        "workAt": [
            (
                Loader(
                    os.path.join(prefix, "person_workAt_organisation_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["workFrom"],
                ("Person.id", "person"),
                ("Organisation.id", "organisation"),
            )
        ],
        "likes": [
            (
                Loader(
                    os.path.join(prefix, "person_likes_comment_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Comment.id", "comment"),
            ),
            (
                Loader(
                    os.path.join(prefix, "person_likes_post_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["creationDate"],
                ("Person.id", "person"),
                ("Post.id", "post"),
            ),
        ],
        "studyAt": [
            (
                Loader(
                    os.path.join(prefix, "person_studyAt_organisation_0_0.csv"),
                    header_row=True,
                    delimiter="|",
                ),
                ["classYear"],
                ("Person.id", "person"),
                ("Organisation.id", "organisation"),
            )
        ],
    }
    return sess.load_from(edges, vertices, directed, generate_eid=True)
Beispiel #10
0
def load_p2p_network(sess=None, prefix=None, directed=False):
    """Load p2p graph.
    A peer-to-peer dataset derived from Gnutella peer-to-peer network, August 31 2002,
    with generated data on vertices and edges. See more details here:

        http://snap.stanford.edu/data/p2p-Gnutella31.html

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix: `PathLike` object that represents a path.
            With standalone mode, set prefix None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

    Examples:
        .. code:: python

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset import load_p2p_network
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_p2p_network(sess, "/path/to/dataset")
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset import load_p2p_network
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_p2p_network(sess, "/path/to/dataset")
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "p2p_network.tar.gz"
        origin = f"{DATA_SITE}/p2p_network.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash=
            "117131735186caff23ea127beec61b5396662c0815fc7918186451fe957e8c2f",
        )
        # assumed dirname is p2p_network after extracting from p2p_network.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = graph.add_vertices(os.path.join(prefix, "p2p-31_property_v_0"),
                               "host").add_edges(
                                   os.path.join(prefix, "p2p-31_property_e_0"),
                                   "connect",
                                   src_label="host",
                                   dst_label="host",
                               )

    return graph
Beispiel #11
0
def load_modern_graph(sess=None, prefix=None, directed=True):
    """Load modern graph.
    Modern graph consist 6 vertices and 6 edges, useful to test the basic
    functionalities.

    Args:
        sess (:class:`graphscope.Session`): Load graph within the session.
            Default session will be used when setting to None. Defaults to None.
        prefix (str): `PathLike` object that represents a path.
            With standalone mode, set prefix to None will try to download from
            source URL. Defaults to None.
        directed (bool, optional): Determine to load a directed or undirected graph.
            Defaults to True.

    Returns:
        :class:`graphscope.framework.graph.GraphDAGNode`:
            A Graph node which graph type is ArrowProperty, evaluated in eager mode.

        >>> # lazy mode
        >>> import graphscope
        >>> from graphscope.dataset. modern_graph import load_modern_graph
        >>> sess = graphscope.session(mode="lazy")
        >>> g = load_modern_graph(sess, "/path/to/dataset", True)
        >>> g1 = sess.run(g)

        >>> # eager mode
        >>> import graphscope
        >>> from graphscope.dataset. modern_graph import load_modern_graph
        >>> sess = graphscope.session(mode="eager")
        >>> g = load_modern_graph(sess, "/path/to/dataset", True)
    """
    if prefix is not None:
        prefix = os.path.expandvars(prefix)
    else:
        fname = "modern_graph.tar.gz"
        origin = f"{DATA_SITE}/modern_graph.tar.gz"
        fpath = download_file(
            fname,
            origin=origin,
            extract=True,
            file_hash="a67c02191ea9dfa618a83d94087349a25937b92973f42206a28fdf6fa5299dec",
        )
        # assumed dirname is modern_graph after extracting from modern_graph.tar.gz
        prefix = fpath[0:-7]

    if sess is None:
        sess = get_default_session()

    graph = sess.g(directed=directed)
    graph = (
        graph.add_vertices(
            Loader(os.path.join(prefix, "person.csv"), delimiter="|"),
            "person",
            ["name", ("age", "int")],
            "id",
        )
        .add_vertices(
            Loader(os.path.join(prefix, "software.csv"), delimiter="|"),
            "software",
            ["name", "lang"],
            "id",
        )
        .add_edges(
            Loader(os.path.join(prefix, "knows.csv"), delimiter="|"),
            "knows",
            ["weight"],
            src_label="person",
            dst_label="person",
            src_field="src_id",
            dst_field="dst_id",
        )
        .add_edges(
            Loader(os.path.join(prefix, "created.csv"), delimiter="|"),
            "created",
            ["weight"],
            src_label="person",
            dst_label="software",
            src_field="src_id",
            dst_field="dst_id",
        )
    )
    return graph