Example #1
0
class rgraph(absgraph):
    def __init__(self, server, schema):
        host, port = server.split(':')
        self._client_stub = redis.Redis(host=host, port=int(port))
        self._graph = Graph('test', self._client_stub)
        self._maxquery = 100
        self.cypher_no_exists = True

        super(rgraph, self).__init__(schema)

    def drop_all(self):
        self._graph.delete()

    def get_schema(self):
        pass

    def load_schema(self, schema):
        pass

    def set_index(self):
        for p, d in self.schema.items():
            if not 'index' in d:
                continue
            elif not d['index']:
                continue
            query = 'CREATE INDEX ON :node (%s)' % p
            self.query(query)

    def _is_intinstance(self, p, v):
        return self._int_type(p) and (isinstance(v, np.float_) or isinstance(
            v, float) or isinstance(v, str))

    def add_node(self, node):
        self._graph.add_node(node)

    def add_edge(self, edge):
        self._graph.add_edge(edge)

    def add_nodes(self, nodes):
        for i in nodes:
            node = Node(label='node', properties=i)
            self.add_node(node)

    def nquads(self, df, predicates, i):
        #d = self.id_predicate
        #nodes = [Node(label='node', properties={'%s'%d: df.iloc[i]['%s'%d],
        #    'numeric': srlz(df.iloc[i]['numeric'])}) for i in range(n, n+m)]
        plen = len(predicates)
        lim = min(i + self._maxquery, len(df))
        nquads = []
        while i < lim:
            properties = {}
            for p in predicates:
                try:
                    s = df.iloc[i][p]
                except:
                    s = i
                s = self.serialize(s)
                properties[p] = s
            nquads.append(properties)
            i += 1
        return nquads, i

    def nquads_edges(self, graph, label, i=0, nodes=None, neighbors=None, j=0):
        if nodes is None:
            nodes = list(graph.nodes())

        edges = []
        budget = self._maxquery
        for node in nodes[i:]:
            if neighbors is None:
                neighbors = list(graph.neighbors(node))
            for k, neigh in enumerate(neighbors[j:]):
                if budget == 0:
                    return edges, i, neighbors, j + k, self._maxquery
                edges.append((node, neigh))
                budget -= 1
            i += 1
            neighbors = None
            j = 0
        return edges, i, neighbors, j + k, self._maxquery - budget

    def nquads_edges2(self, edges):
        """
        GRAPH.QUERY test 'MATCH (a:node {name: "acc-tight5.mps.pkl__v998"}),
        (b:node {name: "acc-tight5.mps.pkl__v998"}), (c:node {name:
                "acc-tight5.mps.pkl__v10"}), (d:node {name:
                    "acc-tight5.mps.pkl__slack-min"}) CREATE (a)-[:edge]->(b),
                (c)-[:edge]->(d)'
        """
        # get all the nodes
        if isinstance(edges[0][0], str):
            base = '(s%d:node {%s: "%s"}), (d%d:node {%s: "%s"})'
        else:
            base = '(s%d:node {%s: %s}), (d%d:node {%s: %s})'
        l = []
        #q = 'MATCH '
        #for i, e in enumerate(edges):
        #    if i > 0:
        #        q += ', '
        #    q += base%(i, self.id_predicate, e[0], i, self.id_predicate, e[1])
        #    l.append('(s%d)-[:%s]->(d%d)'%(i, self.edge_attribute, i))
        #q += ' CREATE ' + ', '.join(l)
        m = []
        for i, e in enumerate(edges):
            m.append(base %
                     (i, self.id_predicate, e[0], i, self.id_predicate, e[1]))
            l.append('(s%d)-[:%s]->(d%d)' % (i, self.edge_attribute, i))
        q = 'MATCH ' + ', '.join(m) + ' CREATE ' + ', '.join(l)
        return q

    def parse_neighbors(self, res, ret):
        if len(res.result_set) == 0:
            return
        p = res.result_set[0][0].decode().split('.')[1]  # XXX
        for k, v in res.result_set[1:]:
            k = k.decode()
            v = v.decode()
            if self._is_intinstance(p, k):
                k = int(float(k))
            if self._is_intinstance(p, v):
                v = int(float(v))
            if k in ret:
                ret[k].append(v)
            else:
                ret[k] = [v]

    def neighbors(self, identities, pred=None, id_pred=None):
        """
        We want something like
        GRAPH.QUERY test
        'MATCH (n: node {name:"acc-tight5.mps.pkl__v998"})-[:edge]->(m)
        RETURN m.numeric, m.name'

        or better solution like:

        GRAPH.QUERY test
        'MATCH (n0: node)-[:edge]->(m)
           WHERE n0.name = "acc-tight5.mps.pkl__v998"
           OR n0.name = "acc-tight5.mps.pkl__v10"
         RETURN n0.name, m.name'
        """
        if not id_pred:
            id_pred = self.id_predicate
        if not pred:
            pred = self.edge_attribute

        ret = OrderedDict()
        #
        # For small numbers, this is faster
        #
        if len(identities) < 1:
            for i in identities:
                q = 'MATCH (s: node {%s: "%s"})-[:%s]->(d) RETURN d.%s' % (
                    id_pred, i, pred, id_pred)
                res = self.query(q)
                ret.update({i: [j[0].decode() for j in res.result_set[1:]]})
            return ret
        #
        # Otherwise batch requests
        #
        if isinstance(identities[0], str):
            where_base = 'n.%s = "%s"'
        else:
            where_base = 'n.%s = %s'

        step = 8
        l = 0
        h = step
        lim = len(identities)
        ret = OrderedDict()
        while l < lim:
            b = identities[l:h]
            #where = ['n.%s = "%s"'%(id_pred, i) for i in b]
            where = [where_base % (id_pred, i) for i in b]
            where = ' OR '.join(where)
            query = ('MATCH (n: node)-[:%s]->(m) WHERE %s RETURN n.%s, m.%s' %
                     (pred, where, id_pred, id_pred))
            res = self.query(query)
            self.parse_neighbors(res, ret)
            l = h
            h = h + step

        return ret

    def _int_type(self, predicate):
        return self.schema[predicate]['type'] == 'int'

    def parse_batch(self, res, ret, predicates):
        r = res.result_set[1]
        npreds = len(predicates)
        for i, p in enumerate(predicates):
            l = [j.decode() for j in r[i::npreds]]
            t = self.deserialize_type(p)
            if t:
                l = self.deserialize(l, t)
            # XXX
            if self._int_type(p) and (isinstance(l[0], np.float_)
                                      or isinstance(l[0], float)
                                      or isinstance(l[0], str)):
                l = [int(float(i)) for i in l]
            #if p in ret:
            #    ret[p].extend(l)
            #else:
            #    ret[p] = l
            ret[p].extend(l)

    def batch(self, identities, predicates, identities_predicate=None):
        if identities_predicate is None:
            identities_predicate = self.id_predicate

        if isinstance(identities[0], str):
            mtch_base = '(n%d:node {%s: "%s"})'
        else:
            mtch_base = '(n%d:node {%s: %s})'

        step = 1000
        l = 0
        h = step
        lim = len(identities)
        ret = OrderedDict({p: [] for p in predicates})
        while l < lim:
            nodes = identities[l:h]
            mtch = [
                mtch_base % (j, identities_predicate, i)
                for j, i in enumerate(nodes)
            ]
            mtch = ', '.join(mtch)
            rtrn = [
                'n%d.%s' % (j, p) for j in range(len(nodes))
                for p in predicates
            ]
            rtrn = ', '.join(rtrn)
            query = 'MATCH ' + mtch + ' RETURN ' + rtrn
            res = self.query(query)
            # We get something like
            # [[b'n0.numeric', b'n0.name', b'n1.numeric', b'n1.name'], ...
            # res.result_set[1][0] is like b'xazf'

            self.parse_batch(res, ret, predicates)
            l = h
            h = h + step
        return ret

    def missing_values(self, predicate, low, high):
        return []

    def _one_cypher(self, predicate, identity):
        #query = 'MATCH (n:node {%s: "%s"}) RETURN n.%s'%(
        #        self.sorted_predicate, identity, predicate)
        if identity:
            if isinstance(identity, str):
                query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % (
                    self.id_predicate, identity, predicate)
            else:
                query = 'MATCH (n:node) WHERE n.%s = "%s" RETURN n.%s' % (
                    self.id_predicate, identity, predicate)
        else:
            if self.cypher_no_exists:
                whr = 'n.%s != ""'
            else:
                whr = 'exists(n.%s)'
            query = ('MATCH (n:node) WHERE ' + whr +
                     ' RETURN n.%s LIMIT 1') % (predicate, predicate)
        return query

    def parse_one(self, res, predicate):
        return res.result_set[1][0].decode()

    def one(self, predicate, identity=None):
        query = self._one_cypher(predicate, identity)
        res = self.query(query)
        r = self.parse_one(res, predicate)
        t = self.deserialize_type(predicate)
        if t:
            r = self.deserialize([r], t)[0]
        return r

    def _count_cypher(self, name=None):
        if name is None:
            name = self.id_predicate
        return 'MATCH (n:node) RETURN COUNT(n)'

    def parse_count(self, res):
        return (int(float(res.result_set[1][0].decode())))

    def count(self, name=None):
        query = self._count_cypher(name)
        res = self.query(query)
        return self.parse_count(res)

    def merge(self):
        query = ''
        for _, node in self._graph.nodes.items():
            query += str(node) + ','

        for edge in self._graph.edges:
            query += str(edge) + ','

        # Discard leading comma.
        if query[-1] is ',':
            query = query[:-1]
        self._graph.merge(query)

    def commit(self):
        self._graph.commit()

    def flush(self):
        self._graph.flush()

    def query(self, query):
        return self._graph.query(query)

    def range_cypher(self, low, high, predicates, id_predicate, expand):
        unsortable = False
        if id_predicate is None:
            id_predicate = self.id_predicate
            if self.id_predicate_unsortable:
                unsortable = True

        rtrn = ['n.%s' % p for p in predicates]
        rtrn = ', '.join(rtrn)
        if unsortable:
            query = ('MATCH (n: node) RETURN %s ORDER BY n.%s LIMIT %d' %
                     (rtrn, self.sorted_predicate, high - low))
        else:
            pred = self.sorted_predicate
            query = ('MATCH (n: node) WHERE n.%s >= %d AND n.%s < %d '
                     'RETURN %s ORDER BY n.%s' %
                     (pred, low, pred, high, rtrn, pred))
        return query

    def _range_xform(self, ret, predicates):
        ret2 = [{} for i in range(len(ret[predicates[0]]))]
        for k, vs in ret.items():
            for j, v in enumerate(vs):
                ret2[j][k] = v
        return ret2

    def _range(self, low, high, predicates, id_predicate=None, expand=False):
        query = self.range_cypher(low, high, predicates, id_predicate, expand)
        ret = OrderedDict()
        res = self.query(query)
        res_predicates = [p.decode().split('.')[1] for p in res.result_set[0]]

        for pi, p in enumerate(predicates):
            tmp = [i[pi].decode() for i in res.result_set[1:]]
            ret[p] = tmp
        # for RETURN n.numeric, n.name, n.identity, ret[1:] is like
        # [[b'numericvalue0', b'namevalue0', b'identityvalue0'], ..]
        # Now ret looks like [{'name': [name values]}, {'numeric': []} ..]
        # To be compatible with _dataframe, we transform this to
        # [{'name': 'namevalue0', 'numeric': 'numericvalue0'}, {}, {} ... ]
        ret2 = self._range_xform(ret, predicates)
        return ret2

    def load_df(self, df, predicates, n=0):
        print('loading nodes')
        while n < len(df):
            nquads, n = self.nquads(df, predicates, n)
            self.add_nodes(nquads)
            self.flush()

    def load_graph(self, g, edge):
        nodes = list(g.nodes())
        print('loading edges')
        n = 0
        j = 0
        nbrs = None
        num_nodes = len(nodes)
        n_prev = 0
        while n < num_nodes:
            nquads, n, nbrs, j, c = self.nquads_edges(g,
                                                      edge,
                                                      n,
                                                      nodes=nodes,
                                                      neighbors=nbrs,
                                                      j=j)
            query = self.nquads_edges2(nquads)
            self.query(query)
            if n > n_prev + 10000:
                print('%d / %d' % (n, num_nodes))
                n_prev = n