def test11_bidirectional_multiple_edge_type(self): # Construct a simple graph: # (a)-[E1]->(b), (c)-[E2]->(d) g = Graph("multi_edge_type", redis_con) a = Node(properties={'val': 'a'}) b = Node(properties={'val': 'b'}) c = Node(properties={'val': 'c'}) d = Node(properties={'val': 'd'}) g.add_node(a) g.add_node(b) g.add_node(c) g.add_node(d) ab = Edge(a, "E1", b) cd = Edge(c, "E2", d) g.add_edge(ab) g.add_edge(cd) g.flush() query = """MATCH (a)-[:E1|:E2]-(z) RETURN a.val, z.val ORDER BY a.val, z.val""" actual_result = g.query(query) expected_result = [['a', 'b'], ['b', 'a'], ['c', 'd'], ['d', 'c']] self.env.assertEquals(actual_result.result_set, expected_result)
def test07_transposed_multi_hop(self): redis_con = self.env.getConnection() g = Graph("tran_multi_hop", redis_con) # (a)-[R]->(b)-[R]->(c)<-[R]-(d)<-[R]-(e) a = Node(properties={"val": 'a'}) b = Node(properties={"val": 'b'}) c = Node(properties={"val": 'c'}) d = Node(properties={"val": 'd'}) e = Node(properties={"val": 'e'}) g.add_node(a) g.add_node(b) g.add_node(c) g.add_node(d) g.add_node(e) ab = Edge(a, "R", b) bc = Edge(b, "R", c) ed = Edge(e, "R", d) dc = Edge(d, "R", c) g.add_edge(ab) g.add_edge(bc) g.add_edge(ed) g.add_edge(dc) g.flush() q = """MATCH (a)-[*2]->(b)<-[*2]-(c) RETURN a.val, b.val, c.val ORDER BY a.val, b.val, c.val""" actual_result = g.query(q) expected_result = [['a', 'c', 'a'], ['a', 'c', 'e'], ['e', 'c', 'a'], ['e', 'c', 'e']] self.env.assertEquals(actual_result.result_set, expected_result)
def test15_update_deleted_entities(self): self.env.flush() redis_con = self.env.getConnection() redis_graph = Graph("delete_test", redis_con) src = Node() dest = Node() edge = Edge(src, "R", dest) redis_graph.add_node(src) redis_graph.add_node(dest) redis_graph.add_edge(edge) redis_graph.flush() # Attempt to update entities after deleting them. query = """MATCH (a)-[e]->(b) DELETE a, b SET a.v = 1, e.v = 2, b.v = 3""" actual_result = redis_graph.query(query) self.env.assertEquals(actual_result.nodes_deleted, 2) self.env.assertEquals(actual_result.relationships_deleted, 1) # No properties should be set. # (Note that this behavior is left unspecified by Cypher.) self.env.assertEquals(actual_result.properties_set, 0) # Validate that the graph is empty. query = """MATCH (a) RETURN a""" actual_result = redis_graph.query(query) expected_result = [] self.env.assertEquals(actual_result.result_set, expected_result)
def test_multi_hashjoins(self): # See issue https://github.com/RedisGraph/RedisGraph/issues/1124 # Construct a 4 node graph, (v1),(v2),(v3),(v4) graph = Graph(GRAPH_ID, self.env.getConnection()) a = Node(properties={"val": 1}) b = Node(properties={"val": 2}) c = Node(properties={"val": 3}) d = Node(properties={"val": 4}) graph.add_node(a) graph.add_node(b) graph.add_node(c) graph.add_node(d) graph.flush() # Find nodes a,b,c such that a.v = 1, a.v = b.v-1 and b.v = c.v-1 q = "MATCH (a {val:1}), (b), (c) WHERE a.val = b.val-1 AND b.val = c.val-1 RETURN a.val, b.val, c.val" plan = graph.execution_plan(q) # Make sure plan contains 2 Value Hash Join operations self.env.assertEquals(plan.count("Value Hash Join"), 2) # Validate results expected_result = [[1, 2, 3]] actual_result = graph.query(q) self.env.assertEquals(actual_result.result_set, expected_result)
def test_CRUD_replication(self): # create a simple graph env = self.env source_con = env.getConnection() replica_con = env.getSlaveConnection() # enable write commands on slave, required as all RedisGraph # commands are registered as write commands replica_con.config_set("slave-read-only", "no") # perform CRUD operations # create a simple graph graph = Graph(GRAPH_ID, source_con) replica = Graph(GRAPH_ID, replica_con) s = Node(label='L', properties={'id': 0, 'name': 'a'}) t = Node(label='L', properties={'id': 1, 'name': 'b'}) e = Edge(s, 'R', t) graph.add_node(s) graph.add_node(t) graph.add_edge(e) graph.flush() # create index q = "CREATE INDEX ON :L(id)" graph.query(q) # update entity q = "MATCH (n:L {id:0}) SET n.id = 2, n.name = 'c'" graph.query(q) # delete entity q = "MATCH (n:L {id:1}) DELETE n" graph.query(q) # give replica some time to catch up time.sleep(1) # make sure index is available on replica q = "MATCH (s:L {id:2}) RETURN s.name" plan = graph.execution_plan(q) replica_plan = replica.execution_plan(q) env.assertIn("Index Scan", plan) self.env.assertEquals(replica_plan, plan) # issue query on both source and replica # make sure results are the same result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result)
def populate_dense_graph(self, dense_graph_name): dense_graph = Graph(dense_graph_name, redis_con) if not redis_con.exists(dense_graph_name): nodes = [] for i in range(10): node = Node(label="n", properties={"val": i}) dense_graph.add_node(node) nodes.append(node) for n_idx, n in enumerate(nodes): for m_idx, m in enumerate(nodes[:n_idx]): dense_graph.add_edge(Edge(n, "connected", m)) dense_graph.flush() return dense_graph
def introduction_2(): import redis from redisgraph import Graph from class4pgm import ClassManager # connect to the local redis r = redis.Redis() # create a redis graph named example graph = Graph("example", r) manager = ClassManager(graph) succeeded = manager.insert_defined_class([Person, Student, Teacher, Teach]) # [True, True, True, True] print(succeeded) manager.model_to_db_object(john, auto_add=True) manager.model_to_db_object(kate, auto_add=True) manager.model_to_db_object(teach, auto_add=True) graph.flush() """ Open new client to retrieve classes and instances. """ r = redis.Redis() # the defined classes will be retrieved automatically during the construction of the graph client. manager = ClassManager(graph) # retrieve every nodes results = graph.query("""Match (a) return a""") for result in results.result_set: print(manager.db_object_to_model(result[0])) # (:ClassDefinitionWrapper {...}) # ... # (:IntlStudent:Student:Person) # (:Teacher:Person) # acquire Teach class. T = manager.get('Teach') # query every edges belonging to Teach class result = graph.query(f"Match ()-[a:{Teach.__name__}]->() return a") for row in result.result_set: print(manager.edge_to_model(row[0])) # ()-[:Teach]->() graph.delete()
def test00_test_data_valid_after_rename(self): global graph node0 = Node(node_id=0, label="L", properties={'name':'x', 'age':1}) graph.add_node(node0) graph.flush() redis_con.rename(GRAPH_ID, NEW_GRAPH_ID) graph = Graph(NEW_GRAPH_ID, redis_con) node1 = Node(node_id=0, label="L", properties={'name':'x', 'age':1}) graph.add_node(node1) graph.flush() query = "MATCH (n) return n" expected_results = [[node0], [node1]] query_info = QueryInfo(query = query, description="Tests data is valid after renaming", expected_result = expected_results) self._assert_resultset_and_expected_mutually_included(graph.query(query), query_info)
def test13_delete_path_elements(self): self.env.flush() redis_con = self.env.getConnection() redis_graph = Graph("delete_test", redis_con) src = Node() dest = Node() edge = Edge(src, "R", dest) redis_graph.add_node(src) redis_graph.add_node(dest) redis_graph.add_edge(edge) redis_graph.flush() # Delete projected # Unwind path nodes. query = """MATCH p = (src)-[e]->(dest) WITH nodes(p)[0] AS node, relationships(p)[0] as edge DELETE node, edge""" actual_result = redis_graph.query(query) self.env.assertEquals(actual_result.nodes_deleted, 1) self.env.assertEquals(actual_result.relationships_deleted, 1)
def test_optional_match(self): redis_graph = Graph('optional', self.r) # Build a graph of form (a)-[R]->(b) node0 = Node(node_id=0, label="L1", properties={'value': 'a'}) node1 = Node(node_id=1, label="L1", properties={'value': 'b'}) edge01 = Edge(node0, "R", node1, edge_id=0) redis_graph.add_node(node0) redis_graph.add_node(node1) redis_graph.add_edge(edge01) redis_graph.flush() # Issue a query that collects all outgoing edges from both nodes (the second has none). query = """MATCH (a) OPTIONAL MATCH (a)-[e]->(b) RETURN a, e, b ORDER BY a.value""" expected_results = [[node0, edge01, node1], [node1, None, None]] result = redis_graph.query(query) self.assertEqual(expected_results, result.result_set) redis_graph.delete()
def test_path(self): redis_graph = Graph('social', self.r) node0 = Node(node_id=0, label="L1") node1 = Node(node_id=1, label="L1") edge01 = Edge(node0, "R1", node1, edge_id=0, properties={'value': 1}) redis_graph.add_node(node0) redis_graph.add_node(node1) redis_graph.add_edge(edge01) redis_graph.flush() path01 = Path.new_empty_path().add_node(node0).add_edge( edge01).add_node(node1) expected_results = [[path01]] query = "MATCH p=(:L1)-[:R1]->(:L1) RETURN p ORDER BY p" result = redis_graph.query(query) self.assertEqual(expected_results, result.result_set) # All done, remove graph. redis_graph.delete()
def test04_repeated_edges(self): graph_names = ["repeated_edges", "{tag}_repeated_edges"] for graph_name in graph_names: graph = Graph(graph_name, redis_con) src = Node(label='p', properties={'name': 'src'}) dest = Node(label='p', properties={'name': 'dest'}) edge1 = Edge(src, 'e', dest, properties={'val': 1}) edge2 = Edge(src, 'e', dest, properties={'val': 2}) graph.add_node(src) graph.add_node(dest) graph.add_edge(edge1) graph.add_edge(edge2) graph.flush() # Verify the new edge q = """MATCH (a)-[e]->(b) RETURN e.val, a.name, b.name ORDER BY e.val""" actual_result = graph.query(q) expected_result = [[ edge1.properties['val'], src.properties['name'], dest.properties['name'] ], [ edge2.properties['val'], src.properties['name'], dest.properties['name'] ]] self.env.assertEquals(actual_result.result_set, expected_result) # Save RDB & Load from RDB self.env.dumpAndReload() # Verify that the latest edge was properly saved and loaded actual_result = graph.query(q) self.env.assertEquals(actual_result.result_set, expected_result)
def test12_delete_unwind_entity(self): redis_con = self.env.getConnection() redis_graph = Graph("delete_test", redis_con) # Create 10 nodes. for i in range(10): redis_graph.add_node(Node()) redis_graph.flush() # Unwind path nodes. query = """MATCH p = () UNWIND nodes(p) AS node DELETE node""" actual_result = redis_graph.query(query) self.env.assertEquals(actual_result.nodes_deleted, 10) self.env.assertEquals(actual_result.relationships_deleted, 0) for i in range(10): redis_graph.add_node(Node()) redis_graph.flush() # Unwind collected nodes. query = """MATCH (n) WITH collect(n) AS nodes UNWIND nodes AS node DELETE node""" actual_result = redis_graph.query(query) self.env.assertEquals(actual_result.nodes_deleted, 10) self.env.assertEquals(actual_result.relationships_deleted, 0)
def test07_index_scan_and_id(self): redis_con = self.env.getConnection() redis_graph = Graph("G", redis_con) nodes = [] for i in range(10): node = Node(node_id=i, label='person', properties={'age': i}) nodes.append(node) redis_graph.add_node(node) redis_graph.flush() query = """CREATE INDEX ON :person(age)""" query_result = redis_graph.query(query) self.env.assertEqual(1, query_result.indices_created) query = """MATCH (n:person) WHERE id(n)>=7 AND n.age<9 RETURN n ORDER BY n.age""" plan = redis_graph.execution_plan(query) query_result = redis_graph.query(query) self.env.assertIn('Index Scan', plan) self.env.assertIn('Filter', plan) query_result = redis_graph.query(query) self.env.assertEqual(2, len(query_result.result_set)) expected_result = [[nodes[7]], [nodes[8]]] self.env.assertEquals(expected_result, query_result.result_set)
class GraphUtils: """Provides low level functions to interact with Redis Graph""" def __init__(self, redis_proxy: RedisProxy, graph_name="apigraph") -> None: """Initialize Graph Utils module :param redis_proxy: RedisProxy object created from redis_proxy module :param graph_name: Graph Key name to be created in Redis :return: None """ self.redis_proxy = redis_proxy self.redis_connection = redis_proxy.get_connection() self.graph_name = graph_name self.redis_graph = Graph(graph_name, self.redis_connection) def read(self, match: str, ret: str, where: Optional[str] = None) -> Union[list, None]: """ Run query to read nodes in Redis and return the result :param match: Relationship between queried entities. :param ret: Defines which property/ies will be returned. :param where: Used to filter results, not mandatory. :return: Corresponding Nodes """ query = "MATCH(p{})".format(match) if where: query += " WHERE(p.{})".format(where) query += " RETURN p{}".format(ret) query_result = self.redis_graph.query(query) # Processing Redis-set response format query_result = self.process_result(query_result) # if not query_result: # query_result = None return query_result def update(self, match: str, set: str, where: Optional[str] = None) -> list: """ Run query to update nodes in Redis and return the result :param match: Relationship between queried entities. :param set: The property to be updated. :param where: Used to filter results, not mandatory. :return: Query results """ query = "MATCH(p{})".format(match) if where is not None: query += " WHERE(p.{})".format(where) query += " SET p.{}".format(set) return self.redis_connection.execute_command("GRAPH.QUERY", self.graph_name, query) def delete(self, where: str, match: str = "") -> list: """ Run query to update nodes in Redis and return the result :param match: Relationship between queried entities. :param set: The property to be updated. :param where: Used to filter results, not mandatory. :return: Query results """ query = "MATCH(p{})".format(match) if where is not None: query += " WHERE(p.{})".format(where) query += " DELETE p" return self.redis_connection.execute_command("GRAPH.QUERY", self.graph_name, query) def create_relation(self, label_source: str, where_source: str, relation_type: str, label_dest: str, where_dest: str) -> list: """ Create a relation(edge) between nodes according to WHERE filters. :param label_source: Source node label. :param where_source: Where statement to filter source node. :param relation_type: The name of the relation type to assign. :param label_dest: Label name for the destination node. :param where_dest: Where statement to filter destination node """ query = "MATCH(s:{} {{{}}}), ".format(label_source, where_source) query += "(d:{} {{{}}})".format(label_dest, where_dest) query += " CREATE (s)-[:{}]->(d)".format(relation_type) return self.redis_connection.execute_command("GRAPH.QUERY", self.graph_name, query) def add_node(self, label: str, alias: str, properties: dict) -> Node: """ Add node to the redis graph :param label: label for the node. :param alias: alias for the node. :param properties: properties for the node. :return: Created Node """ node = Node(label=label, alias=alias, properties=properties) self.redis_graph.add_node(node) return node def add_edge(self, source_node: Node, predicate: str, dest_node: str) -> None: """Add edge between nodes in redis graph :param source_node: source node of the edge. :param predicate: relationship between the source and destination node :param dest_node: destination node of the edge. :return: None """ edge = Edge(source_node, predicate, dest_node) self.redis_graph.add_edge(edge) def flush(self) -> None: """Commit the changes made to the Graph to Redis and reset/flush the Nodes and Edges to be added in the next commit""" self.redis_graph.flush() def process_result(self, result: list) -> list: """ Partial data processing for results redis-sets :param get_data: data get from the Redis memory. """ response_json_list = [] if not result.result_set: return [] for return_alias in result.result_set: for record in return_alias[:]: new_record = {} if record is None: return new_record = record.properties if new_record: if 'id' in new_record: new_record['@id'] = new_record.pop('id') new_record['@type'] = new_record.pop('type') if 'context' in new_record: new_record['@context'] = new_record.pop('context') response_json_list.append(new_record) return response_json_list
with open("./data/words_alpha.txt") as file: content = file.read() words = content.split() words = [w.lower() for w in words if len(w) > 1] unique_words = set(words) max_node_count = len(unique_words) node_count = 0 with progressbar.ProgressBar(max_value=max_node_count) as bar: for word in unique_words: n = Node(label='word', properties={'value': word}) redis_graph.add_node(n) node_count += 1 bar.update(node_count) if (node_count % 100) == 0: redis_graph.flush() # Flush left-overs. redis_graph.flush() with open("./data/TwitterConvCorpus.txt") as file: content = file.read() for c in REMOVE_CHARS: content = content.replace(c, ' ') words = content.split() words = [w.lower() for w in words if len(w) > 1] max_edge_count = len(words) - 1 with progressbar.ProgressBar(max_value=max_edge_count) as bar: for i in range(len(words) - 1):
def test_CRUD_replication(self): # create a simple graph env = self.env source_con = env.getConnection() replica_con = env.getSlaveConnection() # enable write commands on slave, required as all RedisGraph # commands are registered as write commands replica_con.config_set("slave-read-only", "no") # perform CRUD operations # create a simple graph graph = Graph(GRAPH_ID, source_con) replica = Graph(GRAPH_ID, replica_con) s = Node(label='L', properties={'id': 0, 'name': 'abcd'}) t = Node(label='L', properties={'id': 1, 'name': 'efgh'}) e = Edge(s, 'R', t) graph.add_node(s) graph.add_node(t) graph.add_edge(e) graph.flush() # create index q = "CREATE INDEX ON :L(id)" graph.query(q) # create full-text index q = "CALL db.idx.fulltext.createNodeIndex('L', 'name')" graph.query(q) # add fields to existing index q = "CALL db.idx.fulltext.createNodeIndex('L', 'title', 'desc')" graph.query(q) # create full-text index with index config q = "CALL db.idx.fulltext.createNodeIndex({label: 'L1', language: 'german', stopwords: ['a', 'b'] }, 'title', 'desc')" graph.query(q) # update entity q = "MATCH (n:L {id:1}) SET n.id = 2" graph.query(q) # delete entity q = "MATCH (n:L {id:0}) DELETE n" graph.query(q) # give replica some time to catch up time.sleep(1) # make sure index is available on replica q = "MATCH (s:L {id:2}) RETURN s.name" plan = graph.execution_plan(q) replica_plan = replica.execution_plan(q) env.assertIn("Index Scan", plan) self.env.assertEquals(replica_plan, plan) # issue query on both source and replica # make sure results are the same result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result) # make sure node count on both primary and replica is the same q = "MATCH (n) RETURN count(n)" result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result) # make sure nodes are in sync q = "MATCH (n) RETURN n ORDER BY n" result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result) # make sure both primary and replica have the same set of indexes q = "CALL db.indexes()" result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result) # drop fulltext index q = "CALL db.idx.fulltext.drop('L')" graph.query(q) # give replica some time to catch up time.sleep(1) # make sure both primary and replica have the same set of indexes q = "CALL db.indexes()" result = graph.query(q).result_set replica_result = replica.query(q).result_set self.env.assertEquals(replica_result, result)
class rgraph(absgraph): def __init__(self, server, schema): host, port = server.split(':') self._client_stub = redis.Redis(host=host, port=int(port)) self._graph = Graph('test', self._client_stub) self._maxquery = 100 self.cypher_no_exists = True super(rgraph, self).__init__(schema) def drop_all(self): self._graph.delete() def get_schema(self): pass def load_schema(self, schema): pass def set_index(self): for p, d in self.schema.items(): if not 'index' in d: continue elif not d['index']: continue query = 'CREATE INDEX ON :node (%s)' % p self.query(query) def _is_intinstance(self, p, v): return self._int_type(p) and (isinstance(v, np.float_) or isinstance( v, float) or isinstance(v, str)) def add_node(self, node): self._graph.add_node(node) def add_edge(self, edge): self._graph.add_edge(edge) def add_nodes(self, nodes): for i in nodes: node = Node(label='node', properties=i) self.add_node(node) def nquads(self, df, predicates, i): #d = self.id_predicate #nodes = [Node(label='node', properties={'%s'%d: df.iloc[i]['%s'%d], # 'numeric': srlz(df.iloc[i]['numeric'])}) for i in range(n, n+m)] plen = len(predicates) lim = min(i + self._maxquery, len(df)) nquads = [] while i < lim: properties = {} for p in predicates: try: s = df.iloc[i][p] except: s = i s = self.serialize(s) properties[p] = s nquads.append(properties) i += 1 return nquads, i def nquads_edges(self, graph, label, i=0, nodes=None, neighbors=None, j=0): if nodes is None: nodes = list(graph.nodes()) edges = [] budget = self._maxquery for node in nodes[i:]: if neighbors is None: neighbors = list(graph.neighbors(node)) for k, neigh in enumerate(neighbors[j:]): if budget == 0: return edges, i, neighbors, j + k, self._maxquery edges.append((node, neigh)) budget -= 1 i += 1 neighbors = None j = 0 return edges, i, neighbors, j + k, self._maxquery - budget def nquads_edges2(self, edges): """ GRAPH.QUERY test 'MATCH (a:node {name: "acc-tight5.mps.pkl__v998"}), (b:node {name: "acc-tight5.mps.pkl__v998"}), (c:node {name: "acc-tight5.mps.pkl__v10"}), (d:node {name: "acc-tight5.mps.pkl__slack-min"}) CREATE (a)-[:edge]->(b), (c)-[:edge]->(d)' """ # get all the nodes if isinstance(edges[0][0], str): base = '(s%d:node {%s: "%s"}), (d%d:node {%s: "%s"})' else: base = '(s%d:node {%s: %s}), (d%d:node {%s: %s})' l = [] #q = 'MATCH ' #for i, e in enumerate(edges): # if i > 0: # q += ', ' # q += base%(i, self.id_predicate, e[0], i, self.id_predicate, e[1]) # l.append('(s%d)-[:%s]->(d%d)'%(i, self.edge_attribute, i)) #q += ' CREATE ' + ', '.join(l) m = [] for i, e in enumerate(edges): m.append(base % (i, self.id_predicate, e[0], i, self.id_predicate, e[1])) l.append('(s%d)-[:%s]->(d%d)' % (i, self.edge_attribute, i)) q = 'MATCH ' + ', '.join(m) + ' CREATE ' + ', '.join(l) return q def parse_neighbors(self, res, ret): if len(res.result_set) == 0: return p = res.result_set[0][0].decode().split('.')[1] # XXX for k, v in res.result_set[1:]: k = k.decode() v = v.decode() if self._is_intinstance(p, k): k = int(float(k)) if self._is_intinstance(p, v): v = int(float(v)) if k in ret: ret[k].append(v) else: ret[k] = [v] def neighbors(self, identities, pred=None, id_pred=None): """ We want something like GRAPH.QUERY test 'MATCH (n: node {name:"acc-tight5.mps.pkl__v998"})-[:edge]->(m) RETURN m.numeric, m.name' or better solution like: GRAPH.QUERY test 'MATCH (n0: node)-[:edge]->(m) WHERE n0.name = "acc-tight5.mps.pkl__v998" OR n0.name = "acc-tight5.mps.pkl__v10" RETURN n0.name, m.name' """ if not id_pred: id_pred = self.id_predicate if not pred: pred = self.edge_attribute ret = OrderedDict() # # For small numbers, this is faster # if len(identities) < 1: for i in identities: q = 'MATCH (s: node {%s: "%s"})-[:%s]->(d) RETURN d.%s' % ( id_pred, i, pred, id_pred) res = self.query(q) ret.update({i: [j[0].decode() for j in res.result_set[1:]]}) return ret # # Otherwise batch requests # if isinstance(identities[0], str): where_base = 'n.%s = "%s"' else: where_base = 'n.%s = %s' step = 8 l = 0 h = step lim = len(identities) ret = OrderedDict() while l < lim: b = identities[l:h] #where = ['n.%s = "%s"'%(id_pred, i) for i in b] where = [where_base % (id_pred, i) for i in b] where = ' OR '.join(where) query = ('MATCH (n: node)-[:%s]->(m) WHERE %s RETURN n.%s, m.%s' % (pred, where, id_pred, id_pred)) res = self.query(query) self.parse_neighbors(res, ret) l = h h = h + step return ret def _int_type(self, predicate): return self.schema[predicate]['type'] == 'int' def parse_batch(self, res, ret, predicates): r = res.result_set[1] npreds = len(predicates) for i, p in enumerate(predicates): l = [j.decode() for j in r[i::npreds]] t = self.deserialize_type(p) if t: l = self.deserialize(l, t) # XXX if self._int_type(p) and (isinstance(l[0], np.float_) or isinstance(l[0], float) or isinstance(l[0], str)): l = [int(float(i)) for i in l] #if p in ret: # ret[p].extend(l) #else: # ret[p] = l ret[p].extend(l) def batch(self, identities, predicates, identities_predicate=None): if identities_predicate is None: identities_predicate = self.id_predicate if isinstance(identities[0], str): mtch_base = '(n%d:node {%s: "%s"})' else: mtch_base = '(n%d:node {%s: %s})' step = 1000 l = 0 h = step lim = len(identities) ret = OrderedDict({p: [] for p in predicates}) while l < lim: nodes = identities[l:h] mtch = [ mtch_base % (j, identities_predicate, i) for j, i in enumerate(nodes) ] mtch = ', '.join(mtch) rtrn = [ 'n%d.%s' % (j, p) for j in range(len(nodes)) for p in predicates ] rtrn = ', '.join(rtrn) query = 'MATCH ' + mtch + ' RETURN ' + rtrn res = self.query(query) # We get something like # [[b'n0.numeric', b'n0.name', b'n1.numeric', b'n1.name'], ... # res.result_set[1][0] is like b'xazf' self.parse_batch(res, ret, predicates) l = h h = h + step return ret def missing_values(self, predicate, low, high): return [] def _one_cypher(self, predicate, identity): #query = 'MATCH (n:node {%s: "%s"}) RETURN n.%s'%( # self.sorted_predicate, identity, predicate) if identity: if isinstance(identity, str): query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % ( self.id_predicate, identity, predicate) else: query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % ( self.id_predicate, identity, predicate) else: if self.cypher_no_exists: whr = 'n.%s != ""' else: whr = 'exists(n.%s)' query = ('MATCH (n:node) WHERE ' + whr + ' RETURN n.%s LIMIT 1') % (predicate, predicate) return query def parse_one(self, res, predicate): return res.result_set[1][0].decode() def one(self, predicate, identity=None): query = self._one_cypher(predicate, identity) res = self.query(query) r = self.parse_one(res, predicate) t = self.deserialize_type(predicate) if t: r = self.deserialize([r], t)[0] return r def _count_cypher(self, name=None): if name is None: name = self.id_predicate return 'MATCH (n:node) RETURN COUNT(n)' def parse_count(self, res): return (int(float(res.result_set[1][0].decode()))) def count(self, name=None): query = self._count_cypher(name) res = self.query(query) return self.parse_count(res) def merge(self): query = '' for _, node in self._graph.nodes.items(): query += str(node) + ',' for edge in self._graph.edges: query += str(edge) + ',' # Discard leading comma. if query[-1] is ',': query = query[:-1] self._graph.merge(query) def commit(self): self._graph.commit() def flush(self): self._graph.flush() def query(self, query): return self._graph.query(query) def range_cypher(self, low, high, predicates, id_predicate, expand): unsortable = False if id_predicate is None: id_predicate = self.id_predicate if self.id_predicate_unsortable: unsortable = True rtrn = ['n.%s' % p for p in predicates] rtrn = ', '.join(rtrn) if unsortable: query = ('MATCH (n: node) RETURN %s ORDER BY n.%s LIMIT %d' % (rtrn, self.sorted_predicate, high - low)) else: pred = self.sorted_predicate query = ('MATCH (n: node) WHERE n.%s >= %d AND n.%s < %d ' 'RETURN %s ORDER BY n.%s' % (pred, low, pred, high, rtrn, pred)) return query def _range_xform(self, ret, predicates): ret2 = [{} for i in range(len(ret[predicates[0]]))] for k, vs in ret.items(): for j, v in enumerate(vs): ret2[j][k] = v return ret2 def _range(self, low, high, predicates, id_predicate=None, expand=False): query = self.range_cypher(low, high, predicates, id_predicate, expand) ret = OrderedDict() res = self.query(query) res_predicates = [p.decode().split('.')[1] for p in res.result_set[0]] for pi, p in enumerate(predicates): tmp = [i[pi].decode() for i in res.result_set[1:]] ret[p] = tmp # for RETURN n.numeric, n.name, n.identity, ret[1:] is like # [[b'numericvalue0', b'namevalue0', b'identityvalue0'], ..] # Now ret looks like [{'name': [name values]}, {'numeric': []} ..] # To be compatible with _dataframe, we transform this to # [{'name': 'namevalue0', 'numeric': 'numericvalue0'}, {}, {} ... ] ret2 = self._range_xform(ret, predicates) return ret2 def load_df(self, df, predicates, n=0): print('loading nodes') while n < len(df): nquads, n = self.nquads(df, predicates, n) self.add_nodes(nquads) self.flush() def load_graph(self, g, edge): nodes = list(g.nodes()) print('loading edges') n = 0 j = 0 nbrs = None num_nodes = len(nodes) n_prev = 0 while n < num_nodes: nquads, n, nbrs, j, c = self.nquads_edges(g, edge, n, nodes=nodes, neighbors=nbrs, j=j) query = self.nquads_edges2(nquads) self.query(query) if n > n_prev + 10000: print('%d / %d' % (n, num_nodes)) n_prev = n