class rgraph(absgraph): def __init__(self, server, schema): host, port = server.split(':') self._client_stub = redis.Redis(host=host, port=int(port)) self._graph = Graph('test', self._client_stub) self._maxquery = 100 self.cypher_no_exists = True super(rgraph, self).__init__(schema) def drop_all(self): self._graph.delete() def get_schema(self): pass def load_schema(self, schema): pass def set_index(self): for p, d in self.schema.items(): if not 'index' in d: continue elif not d['index']: continue query = 'CREATE INDEX ON :node (%s)' % p self.query(query) def _is_intinstance(self, p, v): return self._int_type(p) and (isinstance(v, np.float_) or isinstance( v, float) or isinstance(v, str)) def add_node(self, node): self._graph.add_node(node) def add_edge(self, edge): self._graph.add_edge(edge) def add_nodes(self, nodes): for i in nodes: node = Node(label='node', properties=i) self.add_node(node) def nquads(self, df, predicates, i): #d = self.id_predicate #nodes = [Node(label='node', properties={'%s'%d: df.iloc[i]['%s'%d], # 'numeric': srlz(df.iloc[i]['numeric'])}) for i in range(n, n+m)] plen = len(predicates) lim = min(i + self._maxquery, len(df)) nquads = [] while i < lim: properties = {} for p in predicates: try: s = df.iloc[i][p] except: s = i s = self.serialize(s) properties[p] = s nquads.append(properties) i += 1 return nquads, i def nquads_edges(self, graph, label, i=0, nodes=None, neighbors=None, j=0): if nodes is None: nodes = list(graph.nodes()) edges = [] budget = self._maxquery for node in nodes[i:]: if neighbors is None: neighbors = list(graph.neighbors(node)) for k, neigh in enumerate(neighbors[j:]): if budget == 0: return edges, i, neighbors, j + k, self._maxquery edges.append((node, neigh)) budget -= 1 i += 1 neighbors = None j = 0 return edges, i, neighbors, j + k, self._maxquery - budget def nquads_edges2(self, edges): """ GRAPH.QUERY test 'MATCH (a:node {name: "acc-tight5.mps.pkl__v998"}), (b:node {name: "acc-tight5.mps.pkl__v998"}), (c:node {name: "acc-tight5.mps.pkl__v10"}), (d:node {name: "acc-tight5.mps.pkl__slack-min"}) CREATE (a)-[:edge]->(b), (c)-[:edge]->(d)' """ # get all the nodes if isinstance(edges[0][0], str): base = '(s%d:node {%s: "%s"}), (d%d:node {%s: "%s"})' else: base = '(s%d:node {%s: %s}), (d%d:node {%s: %s})' l = [] #q = 'MATCH ' #for i, e in enumerate(edges): # if i > 0: # q += ', ' # q += base%(i, self.id_predicate, e[0], i, self.id_predicate, e[1]) # l.append('(s%d)-[:%s]->(d%d)'%(i, self.edge_attribute, i)) #q += ' CREATE ' + ', '.join(l) m = [] for i, e in enumerate(edges): m.append(base % (i, self.id_predicate, e[0], i, self.id_predicate, e[1])) l.append('(s%d)-[:%s]->(d%d)' % (i, self.edge_attribute, i)) q = 'MATCH ' + ', '.join(m) + ' CREATE ' + ', '.join(l) return q def parse_neighbors(self, res, ret): if len(res.result_set) == 0: return p = res.result_set[0][0].decode().split('.')[1] # XXX for k, v in res.result_set[1:]: k = k.decode() v = v.decode() if self._is_intinstance(p, k): k = int(float(k)) if self._is_intinstance(p, v): v = int(float(v)) if k in ret: ret[k].append(v) else: ret[k] = [v] def neighbors(self, identities, pred=None, id_pred=None): """ We want something like GRAPH.QUERY test 'MATCH (n: node {name:"acc-tight5.mps.pkl__v998"})-[:edge]->(m) RETURN m.numeric, m.name' or better solution like: GRAPH.QUERY test 'MATCH (n0: node)-[:edge]->(m) WHERE n0.name = "acc-tight5.mps.pkl__v998" OR n0.name = "acc-tight5.mps.pkl__v10" RETURN n0.name, m.name' """ if not id_pred: id_pred = self.id_predicate if not pred: pred = self.edge_attribute ret = OrderedDict() # # For small numbers, this is faster # if len(identities) < 1: for i in identities: q = 'MATCH (s: node {%s: "%s"})-[:%s]->(d) RETURN d.%s' % ( id_pred, i, pred, id_pred) res = self.query(q) ret.update({i: [j[0].decode() for j in res.result_set[1:]]}) return ret # # Otherwise batch requests # if isinstance(identities[0], str): where_base = 'n.%s = "%s"' else: where_base = 'n.%s = %s' step = 8 l = 0 h = step lim = len(identities) ret = OrderedDict() while l < lim: b = identities[l:h] #where = ['n.%s = "%s"'%(id_pred, i) for i in b] where = [where_base % (id_pred, i) for i in b] where = ' OR '.join(where) query = ('MATCH (n: node)-[:%s]->(m) WHERE %s RETURN n.%s, m.%s' % (pred, where, id_pred, id_pred)) res = self.query(query) self.parse_neighbors(res, ret) l = h h = h + step return ret def _int_type(self, predicate): return self.schema[predicate]['type'] == 'int' def parse_batch(self, res, ret, predicates): r = res.result_set[1] npreds = len(predicates) for i, p in enumerate(predicates): l = [j.decode() for j in r[i::npreds]] t = self.deserialize_type(p) if t: l = self.deserialize(l, t) # XXX if self._int_type(p) and (isinstance(l[0], np.float_) or isinstance(l[0], float) or isinstance(l[0], str)): l = [int(float(i)) for i in l] #if p in ret: # ret[p].extend(l) #else: # ret[p] = l ret[p].extend(l) def batch(self, identities, predicates, identities_predicate=None): if identities_predicate is None: identities_predicate = self.id_predicate if isinstance(identities[0], str): mtch_base = '(n%d:node {%s: "%s"})' else: mtch_base = '(n%d:node {%s: %s})' step = 1000 l = 0 h = step lim = len(identities) ret = OrderedDict({p: [] for p in predicates}) while l < lim: nodes = identities[l:h] mtch = [ mtch_base % (j, identities_predicate, i) for j, i in enumerate(nodes) ] mtch = ', '.join(mtch) rtrn = [ 'n%d.%s' % (j, p) for j in range(len(nodes)) for p in predicates ] rtrn = ', '.join(rtrn) query = 'MATCH ' + mtch + ' RETURN ' + rtrn res = self.query(query) # We get something like # [[b'n0.numeric', b'n0.name', b'n1.numeric', b'n1.name'], ... # res.result_set[1][0] is like b'xazf' self.parse_batch(res, ret, predicates) l = h h = h + step return ret def missing_values(self, predicate, low, high): return [] def _one_cypher(self, predicate, identity): #query = 'MATCH (n:node {%s: "%s"}) RETURN n.%s'%( # self.sorted_predicate, identity, predicate) if identity: if isinstance(identity, str): query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % ( self.id_predicate, identity, predicate) else: query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % ( self.id_predicate, identity, predicate) else: if self.cypher_no_exists: whr = 'n.%s != ""' else: whr = 'exists(n.%s)' query = ('MATCH (n:node) WHERE ' + whr + ' RETURN n.%s LIMIT 1') % (predicate, predicate) return query def parse_one(self, res, predicate): return res.result_set[1][0].decode() def one(self, predicate, identity=None): query = self._one_cypher(predicate, identity) res = self.query(query) r = self.parse_one(res, predicate) t = self.deserialize_type(predicate) if t: r = self.deserialize([r], t)[0] return r def _count_cypher(self, name=None): if name is None: name = self.id_predicate return 'MATCH (n:node) RETURN COUNT(n)' def parse_count(self, res): return (int(float(res.result_set[1][0].decode()))) def count(self, name=None): query = self._count_cypher(name) res = self.query(query) return self.parse_count(res) def merge(self): query = '' for _, node in self._graph.nodes.items(): query += str(node) + ',' for edge in self._graph.edges: query += str(edge) + ',' # Discard leading comma. if query[-1] is ',': query = query[:-1] self._graph.merge(query) def commit(self): self._graph.commit() def flush(self): self._graph.flush() def query(self, query): return self._graph.query(query) def range_cypher(self, low, high, predicates, id_predicate, expand): unsortable = False if id_predicate is None: id_predicate = self.id_predicate if self.id_predicate_unsortable: unsortable = True rtrn = ['n.%s' % p for p in predicates] rtrn = ', '.join(rtrn) if unsortable: query = ('MATCH (n: node) RETURN %s ORDER BY n.%s LIMIT %d' % (rtrn, self.sorted_predicate, high - low)) else: pred = self.sorted_predicate query = ('MATCH (n: node) WHERE n.%s >= %d AND n.%s < %d ' 'RETURN %s ORDER BY n.%s' % (pred, low, pred, high, rtrn, pred)) return query def _range_xform(self, ret, predicates): ret2 = [{} for i in range(len(ret[predicates[0]]))] for k, vs in ret.items(): for j, v in enumerate(vs): ret2[j][k] = v return ret2 def _range(self, low, high, predicates, id_predicate=None, expand=False): query = self.range_cypher(low, high, predicates, id_predicate, expand) ret = OrderedDict() res = self.query(query) res_predicates = [p.decode().split('.')[1] for p in res.result_set[0]] for pi, p in enumerate(predicates): tmp = [i[pi].decode() for i in res.result_set[1:]] ret[p] = tmp # for RETURN n.numeric, n.name, n.identity, ret[1:] is like # [[b'numericvalue0', b'namevalue0', b'identityvalue0'], ..] # Now ret looks like [{'name': [name values]}, {'numeric': []} ..] # To be compatible with _dataframe, we transform this to # [{'name': 'namevalue0', 'numeric': 'numericvalue0'}, {}, {} ... ] ret2 = self._range_xform(ret, predicates) return ret2 def load_df(self, df, predicates, n=0): print('loading nodes') while n < len(df): nquads, n = self.nquads(df, predicates, n) self.add_nodes(nquads) self.flush() def load_graph(self, g, edge): nodes = list(g.nodes()) print('loading edges') n = 0 j = 0 nbrs = None num_nodes = len(nodes) n_prev = 0 while n < num_nodes: nquads, n, nbrs, j, c = self.nquads_edges(g, edge, n, nodes=nodes, neighbors=nbrs, j=j) query = self.nquads_edges2(nquads) self.query(query) if n > n_prev + 10000: print('%d / %d' % (n, num_nodes)) n_prev = n