Example #1
0
    def __init__(self, graph):

        nodes = graph.nodes
        edges = graph.edges
        self.n_node = len(nodes)

        ''' add phantom label if none exists to facilitate C++ interop '''
        if len(nodes.columns) == 0:
            nodes = nodes.assign(labeled=lambda _: False)

        if len(edges.columns) == 1:
            assert(edges.columns[0] == '!ij')
            edges = edges.assign(labeled=lambda _: False)

        ''' determine node type '''
        self.node_type = node_type = rowtype(nodes)
        self.node = umlike(nodes[list(node_type.names)]
                           .to_records(index=False).astype(node_type))

        ''' determine whether graph is weighted, determine edge type,
            and compute node degrees '''
        self.degree = degree = umzeros(self.padded_size, dtype=np.float32)
        edge_label_type = rowtype(edges, exclude=['!ij', '!w'])
        if '!w' in edges.columns:  # weighted graph
            self.weighted = True
            edge_type = np.dtype([('weight', np.float32),
                                  ('label', edge_label_type)], align=True)
            self.edge_type = edge_type
            for (i, j), w in zip(edges['!ij'], edges['!w']):
                degree[i] += w
                degree[j] += w
        else:
            self.weighted = False
            self.edge_type = edge_type = edge_label_type
            for i, j in edges['!ij']:
                degree[i] += 1.0
                degree[j] += 1.0

        ''' collect non-zero edge octiles '''
        uniq_oct = np.unique([(i - i % 8, j - j % 8)
                              for i, j in edges['!ij']], axis=0)
        uniq_oct = np.unique(np.vstack((uniq_oct, uniq_oct[:, -1::-1])),
                             axis=0)
        octile_dict = {(upper, left): [np.uint64(), umzeros(64, edge_type)]
                       for upper, left in uniq_oct}

        for index, row in edges.iterrows():
            i, j = row['!ij']
            if self.weighted:
                edge = (row['!w'], tuple(row[key]
                                         for key in edge_type['label'].names))
            else:
                edge = tuple(row[key] for key in edge_type.names)
            r = i % 8
            c = j % 8
            upper = i - r
            left = j - c
            octile_dict[(upper, left)][0] |= np.uint64(1 << (r * 8 + c))
            octile_dict[(upper, left)][1][r + c * 8] = edge
            octile_dict[(left, upper)][0] |= np.uint64(1 << (c * 8 + r))
            octile_dict[(left, upper)][1][c + r * 8] = edge

        ''' create edge octiles on GPU '''
        self.octile_list = [Octile(upper, left, nzmask, elements)
                            for (upper, left), (nzmask, elements)
                            in octile_dict.items()]
        self.n_octile = len(self.octile_list)

        ''' collect edge octile structures into continuous buffer '''
        self.octile_hdr = umlike(np.array([x.state for x in self.octile_list],
                                          Octile.dtype))
Example #2
0
    def __init__(self, graph):

        self.nodes = nodes = graph.nodes.copy(deep=False)
        self.edges = edges = graph.edges.copy(deep=False)
        self.n_node = len(nodes)
        ''' substitute columns corresponding to object-type node/edge
        attributes to their GPU counterparts '''
        for df in [nodes, edges]:
            for key in list(df.columns):
                if not np.issctype(df[key].dtype):
                    if issubclass(df[key].concrete_type,
                                  (list, tuple, np.ndarray)):
                        inner_type = common_min_type.of_types([
                            x.dtype if isinstance(x, np.ndarray) else
                            common_min_type.of_values(x) for x in df[key]
                        ])
                        if not np.issctype(inner_type):
                            raise (TypeError(
                                f'Expect scalar elements in tuple or list'
                                f'atttributes, got {inner_type}.'))
                        if not np.issctype(inner_type):
                            raise TypeError(
                                f'List-like graphs attribute must have scalar'
                                f'elements. Attribute {key} is {inner_type}.')
                        buffer = memoryview(
                            umlike(
                                np.fromiter(it.chain.from_iterable(df[key]),
                                            dtype=inner_type)))
                        size = np.fromiter(map(len, df[key]), dtype=np.int)
                        head = np.cumsum(size) - size
                        # mangle key with type information
                        tag = '${key}::frozen_array::{dtype}'.format(
                            key=key, dtype=inner_type.str)
                        data = np.empty_like(df[key], dtype=np.object)
                        for i, (h, s) in enumerate(zip(head, size)):
                            data[i] = np.frombuffer(
                                buffer[h:h + s], dtype=inner_type).view(
                                    self.CustomType.FrozenArray)
                        df[tag] = data
                        df.drop([key], inplace=True)
                    else:
                        raise TypeError(
                            f'Unsupported non-scalar attribute {key} '
                            f'of type {df[key].concrete_type}')
        ''' add phantom label if none exists to facilitate C++ interop '''
        assert (len(nodes.columns) >= 1)
        if len(nodes.columns) == 1:
            nodes['labeled'] = np.zeros(len(nodes), np.bool_)

        assert (len(edges.columns) >= 2)
        if len(edges.columns) == 2:
            assert ('!i' in edges and '!j' in edges)
            edges['labeled'] = np.zeros(len(edges), np.bool_)
        ''' determine node type '''
        i = nodes['!i']
        nodes.drop(['!i'], inplace=True)
        self.node_t = node_t = nodes.rowtype()
        self.nodes_aos = umempty(len(nodes), dtype=node_t)
        self.nodes_aos[i] = list(nodes.iterstates())
        ''' determine whether graph is weighted, determine edge type,
            and compute node degrees '''
        self.degree = degree = umzeros(self.n_node, dtype=np.float32)
        edge_t = edges.drop(['!i', '!j', '!w']).rowtype()
        self_loops = edges[edges['!i'] == edges['!j']]
        nnz = len(edges)
        if '!w' in edges:  # weighted graph
            self.weighted = True
            np.add.at(degree, edges['!i'], edges['!w'])
            np.add.at(degree, edges['!j'], edges['!w'])
            np.subtract.at(degree, self_loops['!i'], self_loops['!w'])

            if edge_t.itemsize != 0:
                labels = list(edges[edge_t.names].iterstates())
            else:
                labels = [None] * len(edges)

            edge_t = np.dtype([('weight', np.float32), ('label', edge_t)],
                              align=True)

            edges_aos = np.fromiter(zip(edges['!w'], labels),
                                    dtype=edge_t,
                                    count=nnz)
        else:
            self.weighted = False
            np.add.at(degree, edges['!i'], 1.0)
            np.add.at(degree, edges['!j'], 1.0)
            np.subtract.at(degree, self_loops['!i'], 1.0)
            edges_aos = np.fromiter(edges[edge_t.names].iterstates(),
                                    dtype=edge_t,
                                    count=nnz)
        self.edge_t = edge_t
        degree[degree == 0] = 1.0
        ''' collect non-zero edge octiles '''
        indices = np.empty((4, nnz * 2), dtype=np.uint32, order='C')
        i, j, up, lf = indices
        i[:nnz] = edges['!i']
        j[:nnz] = edges['!j']
        # replicate & swap i and j for the lower triangular part
        i[nnz:], j[nnz:] = j[:nnz], i[:nnz]
        # get upper left corner of owner octiles
        up[:] = i - i % 8
        lf[:] = j - j % 8

        # np.unique implies lexical sort
        (lf, up, j, i), perm = np.unique(indices[-1::-1, :],
                                         axis=1,
                                         return_index=True)
        self.edges_aos = umempty(len(i), edge_t)
        self.edges_aos[:] = edges_aos[perm % nnz]  # mod nnz due to symmetry

        diff = np.empty_like(up)
        diff[1:] = (up[:-1] != up[1:]) | (lf[:-1] != lf[1:])
        diff[:1] = True
        oct_offset = np.flatnonzero(diff)
        self.n_octile = len(oct_offset)

        nzmasks = np.bitwise_or.reduceat(
            1 << (i - up + (j - lf) * 8).astype(np.uint64), oct_offset)
        nzmasks_r = np.bitwise_or.reduceat(
            1 << (j - lf + (i - up) * 8).astype(np.uint64), oct_offset)

        self.octiles = octiles = umempty(self.n_octile, self.Octile.dtype)
        octiles[:] = list(
            zip(
                int(self.edges_aos.base) + oct_offset * edge_t.itemsize,
                nzmasks, nzmasks_r, up[oct_offset], lf[oct_offset]))
Example #3
0
    def __call__(self, graphs, diags, node_kernel, edge_kernel, p, q, eps,
                 ftol, gtol, jobs, starts, gramian, active, gradient, nX, nY,
                 nJ, traits, timer):
        ''' transfer graphs and starting probabilities to GPU '''
        timer.tic('transferring graphs to GPU')

        og_last = None
        graphs_d = umempty(len(graphs), dtype=OctileGraph.dtype)
        for i, g in enumerate(graphs):
            og, ogstate = self._register_graph(g)
            if i > 0:
                self._assert_homogeneous(og_last, og)
            og_last = og
            graphs_d[i] = ogstate

        weighted = og_last.weighted
        node_t = og_last.node_t
        edge_t = og_last.edge_t

        timer.toc('transferring graphs to GPU')

        ''' allocate global job counter '''
        timer.tic('allocate global job counter')
        i_job_global = umzeros(1, np.uint32)
        timer.toc('allocate global job counter')

        ''' code generation '''
        timer.tic('code generation')
        if weighted:
            edge_kernel = TensorProduct(weight=Product(),
                                        label=edge_kernel)

        use_theta_grid = traits.eval_gradient is True
        node_kernel_src = self.gencode_kernel(node_kernel, 'node_kernel')
        edge_kernel_src = self.gencode_kernel(edge_kernel, 'edge_kernel')
        p_start_src = self.gencode_probability(p, 'p_start')

        with self.template.context(traits=traits) as template:
            self.source = template.render(
                node_kernel=node_kernel_src,
                edge_kernel=edge_kernel_src,
                p_start=p_start_src,
                node_t=decltype(node_t),
                edge_t=decltype(edge_t)
            )
        timer.toc('code generation')

        ''' JIT '''
        timer.tic('JIT')
        kernel = self.module.get_function('graph_maximin_distance')
        timer.toc('JIT')

        ''' calculate launch configuration '''
        timer.tic('calculating launch configuration')
        launch_block_count = (self.device.MULTIPROCESSOR_COUNT
                              * self.block_per_sm)
        shmem_bytes_per_warp = self.module.get_global(
            'shmem_bytes_per_warp'
        )[1]
        shmem_bytes_per_block = (shmem_bytes_per_warp * self.block_size
                                 // self.device.WARP_SIZE)

        max_graph_size = np.max([len(g.nodes) for g in graphs])
        scratch_pcg = self.allocate_pcg_scratch(
            launch_block_count, max_graph_size
        )

        ''' copy micro kernel parameters to GPU '''
        for name, uker in [('node_kernel', node_kernel),
                           ('edge_kernel', edge_kernel)]:
            states = np.array(
                self.pack_state(uker, diff_grid=use_theta_grid, diff_eps=eps),
                dtype=uker.dtype
            )

            p_uker, _ = self.module.get_global(name)
            cuda.memcpy_htod(p_uker, states[:1])

            if use_theta_grid:
                p_diff_grid, _ = self.module.get_global(f'{name}_diff_grid')
                p_flat_theta, _ = self.module.get_global(f'{name}_flat_theta')
                cuda.memcpy_htod(p_diff_grid, states[1:])
                cuda.memcpy_htod(
                    p_flat_theta,
                    np.fromiter(flatten(uker.theta), dtype=np.float32)
                )

        p_p_start, _ = self.module.get_global('p_start')
        cuda.memcpy_htod(
            p_p_start, np.array([p.state], dtype=p.dtype)
        )

        timer.toc('calculating launch configuration')

        ''' GPU kernel execution '''
        timer.tic('GPU kernel execution')
        kernel(
            graphs_d,
            diags,
            scratch_pcg,
            jobs,
            starts,
            gramian,
            active,
            gradient if gradient is not None else np.uintp(0),
            i_job_global,
            np.uint32(len(jobs)),
            np.uint32(nX),
            np.uint32(nY),
            np.uint32(nJ),
            np.float32(q),
            np.float32(q),  # placeholder for q0
            np.float32(eps),
            np.float32(ftol),
            np.float32(gtol),
            grid=(launch_block_count, 1, 1),
            block=(self.block_size, 1, 1),
            shared=shmem_bytes_per_block,
        )
        self.ctx.synchronize()
        timer.toc('GPU kernel execution')
Example #4
0
 def zeros(size, dtype=np.float32):
     return umzeros(size, dtype)