Example #1
0
def plan_bcm2_threshold_diagonal1(queue, delta, weights, max_weight, tag=None):
    N = len(delta)

    for arr in (delta,):  # matrices
        assert (arr.stride1s == 1).all()

    text = """
    __kernel void bcm2_threshold_diagonal1(
        __global const int *shape0s,
        __global const int *shape1s,
        __global const int *delta_stride0s,
        __global const int *delta_starts,
        __global ${type} *delta_data,
        __global const int *weights_stride0s,
        __global const int *weights_starts,
        __global const ${type} *weights_data,
        __global const ${type} *max_weights
    )
    {
        const int ij = get_global_id(0);
        const int k = get_global_id(1);
        const int shape0 = shape0s[k];
        const int shape1 = shape1s[k];
        const int i = ij / shape1;
        const int j = ij % shape1;
        __global ${type} *delta = delta_data + delta_starts[k];
        __global const ${type} *weights = weights_data + weights_starts[k];
        const ${type} max_weight = max_weights[k];

        if (i < shape0) {            
           if (fabs(weights[i*weights_stride0s[k] + j] + delta[i*delta_stride0s[k] + j]) > max_weight) { 
                delta[i*delta_stride0s[k] + j] = 0;
            }
        }
    }
    """

    textconf = dict(type=delta.ctype)
    text = as_ascii(Template(text, output_encoding='ascii').render(**textconf))

    full_args = (
        delta.cl_shape0s, delta.cl_shape1s,
        delta.cl_stride0s, delta.cl_starts, delta.cl_buf,
        weights.cl_stride0s, weights.cl_starts, weights.cl_buf,
        max_weight,
    )
    _fn = cl.Program(queue.context, text).build().bcm2_threshold_diagonal1
    _fn.set_args(*[arr.data for arr in full_args])

    lsize = None
    gsize = (delta.sizes.max(), N)
    plan = Plan(queue, _fn, gsize, lsize=lsize, name="cl_bcm2_threshold_diagonal1", tag=tag)
    plan.full_args = full_args     # prevent garbage-collection
    plan.flops_per_call = 4 * delta.sizes.sum()
    plan.bw_per_call = (delta.nbytes + weights.nbytes + max_weight.nbytes)
    return plan 
Example #2
0
def plan_stp(queue,
             calcium,
             resources,
             weights,
             delta,
             alpha,
             init_weights,
             tag=None):
    assert (len(calcium) == len(resources) == len(weights) == len(delta) ==
            alpha.size == len(init_weights))
    N = len(calcium)

    for arr in (calcium, resources):  # vectors
        assert (arr.shape1s == 1).all()
    for arr in (delta, weights, init_weights):  # matrices
        assert (arr.stride1s == 1).all()

    #assert (resources.shape0s == weights.shape0s).all()
    #assert (calcium.shape0s == weights.shape1s).all()
    assert (weights.shape0s == delta.shape0s).all()
    assert (weights.shape1s == delta.shape1s).all()
    assert (weights.shape0s == init_weights.shape0s).all()
    assert (weights.shape1s == init_weights.shape1s).all()

    assert (calcium.ctype == resources.ctype == weights.ctype == delta.ctype ==
            alpha.ctype == init_weights.ctype)

    text = """
    __kernel void stp(
        __global const int *shape0s,
        __global const int *shape1s,
        __global const int *calcium_stride0s,
        __global const int *calcium_starts,
        __global const ${type} *calcium_data,
        __global const int *resources_stride0s,
        __global const int *resources_starts,
        __global const ${type} *resources_data,
        __global const int *weights_stride0s,
        __global const int *weights_starts,
        __global const ${type} *weights_data,
        __global const int *delta_stride0s,
        __global const int *delta_starts,
        __global ${type} *delta_data,
        __global const ${type} *alphas,
        __global const int *init_weights_stride0s,
        __global const int *init_weights_starts,
        __global const ${type} *init_weights_data
        
    )
    {
        const int ij = get_global_id(0);
        const int k = get_global_id(1);
        const int shape0 = shape0s[k];
        const int shape1 = shape1s[k];
        const int i = ij / shape1;
        const int j = ij % shape1;
        __global ${type} *delta = delta_data + delta_starts[k];
        const ${type} calcium = calcium_data[calcium_starts[k] + i*calcium_stride0s[k]];
        const ${type} resources = resources_data[resources_starts[k] + i*resources_stride0s[k]];
        const ${type} weight = weights_data[
            weights_starts[k] + i*weights_stride0s[k]+j];
        const ${type} alpha = alphas[k];
        const ${type} init_weights = init_weights_data[init_weights_starts[k] + i*init_weights_stride0s[k]+j];
        if (i < shape0) {
            delta[i*delta_stride0s[k] + j] =
               ((calcium*resources/0.2)*init_weights)-weight;
        }
    }
    """

    textconf = dict(type=calcium.ctype)
    text = as_ascii(Template(text, output_encoding='ascii').render(**textconf))

    full_args = (
        delta.cl_shape0s,
        delta.cl_shape1s,
        calcium.cl_stride0s,
        calcium.cl_starts,
        calcium.cl_buf,
        resources.cl_stride0s,
        resources.cl_starts,
        resources.cl_buf,
        weights.cl_stride0s,
        weights.cl_starts,
        weights.cl_buf,
        delta.cl_stride0s,
        delta.cl_starts,
        delta.cl_buf,
        alpha,
        init_weights.cl_stride0s,
        init_weights.cl_starts,
        init_weights.cl_buf,
    )
    _fn = cl.Program(queue.context, text).build().stp
    _fn.set_args(*[arr.data for arr in full_args])

    lsize = None
    gsize = (delta.sizes.max(), N)
    plan = Plan(queue, _fn, gsize, lsize=lsize, name="cl_stp", tag=tag)
    plan.full_args = full_args  # prevent garbage-collection
    plan.flops_per_call = 6 * delta.sizes.sum()
    plan.bw_per_call = (calcium.nbytes + resources.nbytes + weights.nbytes +
                        delta.nbytes + alpha.nbytes + init_weights.nbytes)
    return plan
Example #3
0
def plan_bcm2(queue, pre, post, theta, delta, alpha, tag=None): #weights, max_weight,
    assert len(pre) == len(post) == len(theta) == len(delta) == alpha.size
    N = len(pre)

    for arr in (pre, post, theta):  # vectors
        assert (arr.shape1s == 1).all()
    for arr in (delta,):  # matrices
        assert (arr.stride1s == 1).all()

    assert (post.shape0s == delta.shape0s).all()
    assert (pre.shape0s == delta.shape1s).all()
    assert (post.shape0s == theta.shape0s).all()

    assert (pre.ctype == post.ctype == theta.ctype == delta.ctype ==
            alpha.ctype)

    text = """
    __kernel void bcm2(
        __global const int *shape0s,
        __global const int *shape1s,
        __global const int *pre_stride0s,
        __global const int *pre_starts,
        __global const ${type} *pre_data,
        __global const int *post_stride0s,
        __global const int *post_starts,
        __global const ${type} *post_data,
        __global const int *theta_stride0s,
        __global const int *theta_starts,
        __global const ${type} *theta_data,
        __global const int *delta_stride0s,
        __global const int *delta_starts,
        __global ${type} *delta_data,
        __global const ${type} *alphas
        //__global const int *weights_stride0s,
        //__global const int *weights_starts,
        //__global const ${type} *weights_data,
        //__global const ${type} *max_weights
    )
    {
        const int ij = get_global_id(0);
        const int k = get_global_id(1);
        const int shape0 = shape0s[k];
        const int shape1 = shape1s[k];
        const int i = ij / shape1;
        const int j = ij % shape1;
        __global ${type} *delta = delta_data + delta_starts[k];
        const ${type} pre = pre_data[pre_starts[k] + j*pre_stride0s[k]];
        const ${type} post = post_data[post_starts[k] + i*post_stride0s[k]];
        const ${type} theta = theta_data[
            theta_starts[k] + i*theta_stride0s[k]];
        const ${type} alpha = alphas[k];
        
        //__global const ${type} *weights = weights_data + weights_starts[k];

        //const ${type} max_weight = max_weights[k];

        if (i < shape0) {
            delta[i*delta_stride0s[k] + j]  =
                alpha * post * (post - theta) * pre;

  
            //if (i==j) {
            //    delta[i*delta_stride0s[k] + j]  = 0;
            //} else {
            //    
            //    delta[i*delta_stride0s[k] + j]  = alpha * post * (post - theta) * pre;
            //
            //   if (fabs(weights[i*weights_stride0s[k] + j] + delta[i*delta_stride0s[k] + j]) > max_weight) { 
            //        delta[i*delta_stride0s[k] + j] = 0;
            //    }
            //}
        }
    }
    """

    textconf = dict(type=pre.ctype)
    text = as_ascii(Template(text, output_encoding='ascii').render(**textconf))

    full_args = (
        delta.cl_shape0s, delta.cl_shape1s,
        pre.cl_stride0s, pre.cl_starts, pre.cl_buf,
        post.cl_stride0s, post.cl_starts, post.cl_buf,
        theta.cl_stride0s, theta.cl_starts, theta.cl_buf,
        delta.cl_stride0s, delta.cl_starts, delta.cl_buf,
        alpha,
    )
        #weights.cl_stride0s, weights.cl_starts, weights.cl_buf, #max_weight,
    
    _fn = cl.Program(queue.context, text).build().bcm2
    _fn.set_args(*[arr.data for arr in full_args])

    lsize = None
    gsize = (delta.sizes.max(), N)
    plan = Plan(queue, _fn, gsize, lsize=lsize, name="cl_bcm2", tag=tag)
    plan.full_args = full_args     # prevent garbage-collection
    plan.flops_per_call = 4 * delta.sizes.sum()
    plan.bw_per_call = (pre.nbytes + post.nbytes + theta.nbytes +
                        delta.nbytes + alpha.nbytes) # + weights.nbytes + max_weight.nbytes)
    return plan
Example #4
0
def plan_sparse_dot_inc(queue,
                        A_indices,
                        A_indptr,
                        A_data,
                        X,
                        Y,
                        inc=False,
                        tag=None):
    """Implements a sparse matrix-vector multiply: Y += A * X or Y = A * X

    Parameters
    ----------
    A_indices, A_indptr : PyOpenCL array
        Column sparse row index specifications
    A_data : PyOpenCL array
        Matrix values at those indices
    X, Y : CLRaggedArrays of length 1
        Input/output data.
    inc : bool
        Whether to increment ``Y`` (True), or set it (False).

    Notes
    -----
    This function crashes when there are >10M nonzero weights. A potential solution
    would be some way to tell each work item to do multiple rows.
    """
    assert len(X) == len(Y) == 1

    for arr in [X, Y]:
        assert (arr.stride1s == 1).all()
        if not ((arr.shape1s == 1).all() and (arr.stride0s == 1).all()):
            raise NotImplementedError(
                "OCL SparseDot only supports matrix-vector currently, not matrix-matrix"
            )

    for arr in [A_indices, A_indptr, A_data]:
        assert len(arr.shape) == 1
        assert arr.strides[0] == arr.dtype.itemsize  # contiguous

    assert A_indices.size == A_data.size

    assert A_data.ctype == X.ctype == Y.ctype
    assert A_indices.ctype == A_indptr.ctype == "int"

    kern = """
    __kernel void sparsedot_inc(
        __global const int *A_indices,
        __global const int *A_indptr,
        __global const ${dtype} *A_data,
        __global const int *Xstarts,
        __global const ${dtype} *Xdata,
        __global const int *Ystarts,
        __global ${dtype} *Ydata
    )
    {
        // n can later be used to keep track of multiple arrays
        const int n = 0;
        const int irow = get_global_id(0);

        __global const ${dtype} *x = Xdata + Xstarts[n];
        __global ${dtype} *y = Ydata + Ystarts[n];

    %if not inc:
        y[irow] = 0;
    %endif
        const int end = A_indptr[irow + 1];
        for (int k = A_indptr[irow]; k < end; k++) {
            y[irow] += A_data[k] * x[A_indices[k]];
        }
    }
    """
    textconf = dict(dtype=A_data.ctype, IndType=A_indices.ctype, inc=inc)
    text = as_ascii(Template(kern, output_encoding="ascii").render(**textconf))
    full_args = (
        A_indices.base_data,
        A_indptr.base_data,
        A_data.base_data,
        X.cl_starts.data,
        X.cl_buf.data,
        Y.cl_starts.data,
        Y.cl_buf.data,
    )
    _fn = cl.Program(queue.context, text).build().sparsedot_inc
    _fn.set_args(*full_args)

    gsize = (Y.sizes[0], 1)  # this only works for a single operation
    lsize = None
    plan = Plan(queue, _fn, gsize, lsize=lsize, name="cl_sparsedot", tag=tag)
    plan.full_args = full_args  # prevent garbage-collection
    plan.flops_per_call = 2 * A_data.size
    plan.bw_per_call = A_data.nbytes * 3 + A_indices.nbytes + A_indptr.nbytes
    plan.description = "groups: %d; shape: (%d, %d); nonzeros: %d" % (
        1,
        Y.sizes[0],
        X.sizes[0],
        A_data.size,
    )
    return plan