Ejemplo n.º 1
0
 def test_with_two_labels(self, node_data_dicts):
     q, p = unwind_create_nodes_query(node_data_dicts,
                                      labels=["Person", "Employee"])
     assert q == ("UNWIND $data AS r\n"
                  "CREATE (_:Employee:Person)\n"
                  "SET _ = r")
     assert p == {"data": node_data_dicts}
Ejemplo n.º 2
0
def create_subgraph(tx, subgraph):
    """ Create new data in a remote :class:`.Graph` from a local
    :class:`.Subgraph`.

    :param tx:
    :param subgraph:
    :return:
    """
    graph = tx.graph
    for labels, nodes in _node_create_dict(n for n in subgraph.nodes
                                           if n.graph is None).items():
        pq = unwind_create_nodes_query(list(map(dict, nodes)), labels=labels)
        pq = cypher_join(pq, "RETURN id(_)")
        records = tx.run(*pq)
        for i, record in enumerate(records):
            node = nodes[i]
            node.graph = graph
            node.identity = record[0]
            node._remote_labels = labels
    for r_type, relationships in _rel_create_dict(
            r for r in subgraph.relationships if r.graph is None).items():
        data = map(
            lambda r: [r.start_node.identity,
                       dict(r), r.end_node.identity], relationships)
        pq = unwind_merge_relationships_query(data, r_type)
        pq = cypher_join(pq, "RETURN id(_)")
        for i, record in enumerate(tx.run(*pq)):
            relationship = relationships[i]
            relationship.graph = graph
            relationship.identity = record[0]
Ejemplo n.º 3
0
Archivo: data.py Proyecto: motey/py2neo
    def __db_create__(self, tx):
        """ Create new data in a remote :class:`.Graph` from this
        :class:`.Subgraph`.

        :param tx:
        """
        graph = tx.graph

        # Convert nodes into a dictionary of
        #   {frozenset(labels): [Node, Node, ...]}
        node_dict = {}
        for node in self.nodes:
            if not self._is_bound(node, tx.graph):
                key = frozenset(node.labels)
                node_dict.setdefault(key, []).append(node)

        # Convert relationships into a dictionary of
        #   {rel_type: [Rel, Rel, ...]}
        rel_dict = {}
        for relationship in self.relationships:
            if not self._is_bound(relationship, tx.graph):
                key = type(relationship).__name__
                rel_dict.setdefault(key, []).append(relationship)

        for labels, nodes in node_dict.items():
            pq = unwind_create_nodes_query(list(map(dict, nodes)),
                                           labels=labels)
            pq = cypher_join(pq, "RETURN id(_)")
            records = tx.run(*pq)
            for i, record in enumerate(records):
                node = nodes[i]
                node.graph = graph
                node.identity = record[0]
                node._remote_labels = labels
        for r_type, relationships in rel_dict.items():
            data = map(
                lambda r:
                [r.start_node.identity,
                 dict(r), r.end_node.identity], relationships)
            pq = unwind_merge_relationships_query(data, r_type)
            pq = cypher_join(pq, "RETURN id(_)")
            for i, record in enumerate(tx.run(*pq)):
                relationship = relationships[i]
                relationship.graph = graph
                relationship.identity = record[0]
Ejemplo n.º 4
0
 def test_with_one_label(self, node_data_dicts):
     q, p = unwind_create_nodes_query(node_data_dicts, labels=["Person"])
     assert q == ("UNWIND $data AS r\n"
                  "CREATE (_:Person)\n"
                  "SET _ += r")
     assert p == {"data": node_data_dicts}
Ejemplo n.º 5
0
 def test_list_data(self, node_data_lists, node_keys):
     q, p = unwind_create_nodes_query(node_data_lists, keys=node_keys)
     assert q == ("UNWIND $data AS r\n"
                  "CREATE (_)\n"
                  "SET _ += {name: r[0], `family name`: r[1], age: r[2]}")
     assert p == {"data": node_data_lists}
Ejemplo n.º 6
0
 def test_dict_data(self, node_data_dicts):
     q, p = unwind_create_nodes_query(node_data_dicts)
     assert q == ("UNWIND $data AS r\n"
                  "CREATE (_)\n"
                  "SET _ += r")
     assert p == {"data": node_data_dicts}
Ejemplo n.º 7
0
def create_nodes(tx, data, labels=None, keys=None):
    """ Create nodes from an iterable sequence of raw node data.

    The raw node `data` is supplied as either a list of lists or a list
    of dictionaries. If the former, then a list of `keys` must also be
    provided in the same order as the values. This option will also
    generally require fewer bytes to be sent to the server, since key
    duplication is removed. An iterable of extra `labels` can also be
    supplied, which will be attached to all new nodes.

    The example code below shows how to pass raw node data as a list of
    lists:

        >>> from py2neo import Graph
        >>> from py2neo.bulk import create_nodes
        >>> g = Graph()
        >>> keys = ["name", "age"]
        >>> data = [
            ["Alice", 33],
            ["Bob", 44],
            ["Carol", 55],
        ]
        >>> create_nodes(g.auto(), data, labels={"Person"}, keys=keys)
        >>> g.nodes.match("Person").count()
        3

    This second example shows how to pass raw node data as a list of
    dictionaries. This alternative can be particularly useful if the
    fields are not uniform across records.

        >>> data = [
            {"name": "Dave", "age": 66},
            {"name": "Eve", "date_of_birth": "1943-10-01"},
            {"name": "Frank"},
        ]
        >>> create_nodes(g.auto(), data, labels={"Person"})
        >>> g.nodes.match("Person").count()
        6

    There are obviously practical limits to the amount of data that
    should be included in a single bulk load of this type. For that
    reason, it is advisable to batch the input data into chunks, and
    carry out each in a separate transaction.

    The code below shows how batching can be achieved using a simple
    loop. This assumes that `data` is an iterable of raw node data
    (lists of values) and steps through that data in chunks of size
    `batch_size` until everything has been consumed.

        >>> from itertools import islice
        >>> stream = iter(data)
        >>> batch_size = 10000
        >>> while True:
        ...     batch = islice(stream, batch_size)
        ...     if batch:
        ...         create_nodes(g.auto(), batch, labels={"Person"})
        ...     else:
        ...         break

    There is no universal `batch_size` that performs optimally for all
    use cases. It is recommended to experiment with this value to
    discover what size works best.

    :param tx: :class:`.Transaction` in which to carry out this
        operation
    :param data: node data supplied as a list of lists (if `keys` are
        provided) or a list of dictionaries (if `keys` is :const:`None`)
    :param labels: labels to apply to the created nodes
    :param keys: optional set of keys for the supplied `data` (if
        supplied as value lists)
    """
    list(tx.run(*unwind_create_nodes_query(data, labels, keys)))