def ious_gpu_1(boxes, query_boxes): """Kernel function IOU computation.""" # TODO: Fix, does not work. Not using ElementwiseKernel correct. n_boxes = boxes.shape[0] n_query_boxes = query_boxes.shape[0] print(n_boxes) print(n_query_boxes) print(boxes) print(query_boxes) ious = cp.zeros((n_query_boxes, n_boxes), dtype=cp.float32) print(ious) cp.ElementwiseKernel( '''raw float32 boxes, float32 query_boxes, raw int32 num_boxes, raw int32 num_query_boxes ''', 'raw float32 ious', ''' for (int q = 0; q < num_query_boxes; ++q) { float box_area = (query_boxes[q, 2] - query_boxes[q, 0] + 1.0) * (query_boxes[q, 3] - query_boxes[q, 1] + 1.0); ious[q, 0] = q; for (int b = 0; b < num_boxes; ++b) { float iw = min(boxes[b, 2], query_boxes[q, 2]) - max(boxes[b, 0], query_boxes[q, 0]) + 1.0; if (iw > 0.0) { float ih = min(boxes[b, 3], query_boxes[q, 3]) - max(boxes[b, 1], query_boxes[q, 1]) + 1.0; if (ih > 0.0) { float ua = (boxes[b, 2] - boxes[b, 0] + 1.0) * (boxes[b, 3] - boxes[b, 1] + 1.0) + box_area - (iw * ih); // ious[q, b] = q; //ious[q, b] = (iw * ih) / ua; } } else { ious[q, b] = -1.1; } } } ''', 'intersecion_over_unions')(boxes, query_boxes, n_boxes, n_query_boxes, ious, size=1) return ious
def iou_gpu_0(anchor, gt_box): """Compute the intersection over union rate for the given anchor and a gt_box. Not very fast, but works... """ return cp.ElementwiseKernel( 'raw float32 anchor, raw float32 gt_box', 'float32 iou', ''' float inters = max(0.0, min(anchor[2], gt_box[2]) - max(anchor[0], gt_box[0])) * max(0.0, min(anchor[3], gt_box[3]) - max(anchor[1], gt_box[1])); float anchor_area = (anchor[2] - anchor[0]) * (anchor[3] - anchor[1]); float gt_area = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1]); float union_area = anchor_area + gt_area - inters; iou = inters / union_area; ''', 'intersection_over_union')(anchor, gt_box, size=1) # Is size=1 fine?
for i in six.moves.range(n_batch): C[i, 1] = LmI.dot(C[i, 0]) for k in six.moves.range(2, K): for i in six.moves.range(n_batch): C[i, k] = 2 * LmI.dot(C[i, k - 1]) - C[i, k - 2] if chainer.cuda.available: # Computes y = Lx # x will be flattened in C-order # y will be flattened in C-order csr_matvec = cupy.ElementwiseKernel( 'I p, raw T data, raw I indices, raw I indptr, raw T x', 'T y', ''' y = 0; int n_cols = _ind.size() / p; int row_idx = i / n_cols; int col_idx = i % n_cols; for(I j = indptr[row_idx]; j < indptr[(row_idx+1)]; j++) { y += data[j] * x[indices[j] * n_cols + col_idx]; } ''', 'csr_matvec') def chebyshev_matvec_gpu(C, x, K, n_batch, LmI_data, LmI_indices, LmI_indptr): C[0] = x.transpose((2, 1, 0)) N = C.shape[1] if K > 1: csr_matvec(N, LmI_data, LmI_indices, LmI_indptr, C[0], C[1]) for k in six.moves.range(2, K): csr_matvec(N, LmI_data, LmI_indices, LmI_indptr, C[k - 1], C[k]) C[k] = 2 * C[k] - C[k - 2]
import numpy as np import chainer from chainer import cuda from chainer.cuda import cupy from chainer import function if chainer.cuda.available: # x will be flattened in C-order # y will be flattened in C-order gpu_graphpool_fwd = cupy.ElementwiseKernel( 'I p, I p_dim, raw I pooling_inds, raw T x', 'T y, I max_ind', ''' int n_cols = _ind.size() / p; int row_idx = i / n_cols; int col_idx = i % n_cols; int idx0 = pooling_inds[row_idx * p_dim + 0]; int idx1 = pooling_inds[row_idx * p_dim + 1]; T x0 = x[idx0 * n_cols + col_idx]; T x1 = x[idx1 * n_cols + col_idx]; y = max(x0, x1); max_ind = x0 > x1 ? idx0 : idx1; ''', 'gpu_graphpool_fwd') gpu_graphpool_bwd = cupy.ElementwiseKernel( 'I p, I q, raw I max_inds, raw T gy', 'T gx', ''' int n_cols = _ind.size() / p; int row_idx = i / n_cols; int col_idx = i % n_cols; T val = 0; for (int j=0; j < q; j++) { int offset = j * n_cols + col_idx; if (max_inds[offset] == row_idx) {