コード例 #1
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def softplus(self, activations, bias, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = activations

        key = (self.softplus, activations.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert activations.shape[1] == bias.shape[0]

            kernel = PureParallel([
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${activations.ctype} a = ${activations.load_same};
            ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});
            
            a += b;   
            a = min(max(-45.0f, a), 45.0f);     
            a = log(1.0f + exp(a));
            
            ${dest.store_same}(a);
            """,
                                  guiding_array='activations')

            kernel_cache[key] = kernel.compile(thread, fast_math=True)

        # Run kernel
        kernel_cache[key](activations, bias, dest)

        return dest
コード例 #2
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def nan_to_zeros(self, array, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = array

        key = (self.nan_to_zeros, array.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))

            kernel = PureParallel([
                Parameter('array', Annotation(array, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${array.ctype} a = ${array.load_same};
            if (isnan(a)) {
                ${dest.store_same}(0.0f);
            }        
            """,
                                  guiding_array='array')

            kernel_cache[key] = kernel.compile(thread, fast_math=True)

        # Run kernel
        kernel_cache[key](array, dest)

        return dest
コード例 #3
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def sub(self, mat1, mat2, dest):
        """
        Subtract mat2 from mat1.
        ATTENTION: if a value is nan, the result will be zero.
        """
        kernel_cache = self.kernel_cache
        thread = self.thread
        key = (self.sub, mat1.dtype, mat1.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert mat1.shape == mat2.shape == dest.shape
            kernel_delta_output = PureParallel([
                Parameter('mat1', Annotation(mat1, 'i')),
                Parameter('mat2', Annotation(mat2, 'i')),
                Parameter('dest', Annotation(dest, 'o'))
            ],
                                               """
            // Delta ( for the output layer )
            ${mat1.ctype} m1 = ${mat1.load_same};
            ${mat2.ctype} m2 = ${mat2.load_same};
            if (isnan(m1) || isnan(m2)) {
                ${dest.store_same}(0.0f);
            } else {                
                ${dest.ctype} d = m1 - m2;
                ${dest.store_same}(d);
            }
            """,
                                               guiding_array='dest')

            kernel_cache[key] = kernel_delta_output.compile(thread)

        kernel_cache[key](mat1, mat2, dest)
コード例 #4
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def add(self, mat1, mat2, dest):
        kernel_cache = self.kernel_cache
        thread = self.thread
        key = (self.add, mat1.dtype, mat1.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert mat1.shape == mat2.shape == dest.shape
            kernel_delta_output = PureParallel([
                Parameter('mat1', Annotation(mat1, 'i')),
                Parameter('mat2', Annotation(mat2, 'i')),
                Parameter('dest', Annotation(dest, 'o'))
            ],
                                               """
            // Delta ( for the output layer )
            ${mat1.ctype} m1 = ${mat1.load_same};
            ${mat2.ctype} m2 = ${mat2.load_same};
            ${dest.ctype} d = m1 + m2;
            ${dest.store_same}(d);
            """,
                                               guiding_array='dest')

            kernel_cache[key] = kernel_delta_output.compile(thread)

        kernel_cache[key](mat1, mat2, dest)
コード例 #5
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def softplus_derivative(self, activations, delta, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = delta

        key = (self.softplus_derivative, activations.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))
            kernel = PureParallel([
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('delta', Annotation(activations, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${activations.ctype} a = ${activations.load_same};
            ${delta.ctype} d = ${delta.load_same};
            
            // the softplus function already has been applied 
            // to the activations, so wee need to apply the
            // inverse of softplus chained with logistic
            // note: logistic is the derivative of softplus
            a = min(max(-45.0f, a), 45.0f);
            a = 1.0f / (1.0f / (exp(a) - 1.0f) + 1.0f);
            d = d*a;
            
            ${dest.store_same}(d);
            """,
                                  guiding_array='activations')

            kernel_cache[key] = kernel.compile(thread)

        # Run kernel
        kernel_cache[key](activations, delta, dest)
コード例 #6
0
def renormalize_kernel(ctx, array, norm, constraint):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    constraint = numpy.float32(constraint)

    key = (renormalize_kernel, array.shape, norm.shape, thread._context)
    if key not in kernel_cache.keys():
        comp = PureParallel([
            Parameter('array', Annotation(array, 'io')),
            Parameter('norm', Annotation(norm, 'i')),
            Parameter('constraint', Annotation(constraint))
        ],
                            """
        // Renormalize if necessary
        float n = ${norm.load_idx}(${idxs[1]});
        float c = ${constraint};
        if ( n > c ) {  
            float a = ${array.load_same};
            a = a * c / n;
            ${array.store_same}(a);
        }
            
        """,
                            guiding_array='array')

        kernel_cache[key] = comp.compile(thread)

    kernel_cache[key](array, norm, constraint)
コード例 #7
0
ファイル: transformation.py プロジェクト: xexo7C8/reikna
    def _get_connection_modules(self, output, name, annotation):

        node = self.nodes[name]
        param = Parameter(name, annotation)
        ntr = node.output_ntr if output else node.input_ntr

        m_idx = None
        m_same = None
        m_combined = None

        if ntr is None:
            m_idx = module_leaf_macro(output, param)
        else:
            m_idx = self._get_transformation_module(annotation, ntr)

        subtree_params = self.get_leaf_parameters([name])

        # FIXME: this module won't work at the base level (that is, not in a trnsformation)
        # unless 'idx' variables were defined.
        # This behavior was enabled for PureParallel.from_trf(), which defines these variables.
        m_same = module_same_indices(output, param, subtree_params, m_idx)

        m_combined = module_combined(output, param, subtree_params, m_idx)

        return m_idx, m_same, m_combined
コード例 #8
0
ファイル: transformation.py プロジェクト: xexo7C8/reikna
    def _get_transformation_module(self, annotation, ntr):

        param = Parameter(ntr.connector_node_name, annotation)

        tr_args = [Indices(param.annotation.type.shape)]
        connection_names = []
        for tr_param in ntr.trf.signature.parameters.values():
            connection_name = ntr.node_from_tr[tr_param.name]
            connection_names.append(connection_name)

            if connection_name == ntr.connector_node_name:
                if ntr.output:
                    load_same = node_connector(ntr.output)
                    tr_args.append(
                        KernelParameter(param.name,
                                        param.annotation.type,
                                        load_same=load_same))
                else:
                    store_same = node_connector(ntr.output)
                    tr_args.append(
                        KernelParameter(param.name,
                                        param.annotation.type,
                                        store_same=store_same))
            else:
                tr_args.append(
                    self._get_kernel_argobject(connection_name,
                                               tr_param.annotation))

        subtree_params = self.get_leaf_parameters([ntr.connector_node_name])

        return module_transformation(ntr.output, param, subtree_params,
                                     ntr.trf.snippet, tr_args)
コード例 #9
0
    def _process_kernel_arguments(self, args):
        """
        Scan through kernel arguments passed by the user, check types,
        and wrap ad hoc values if necessary.

        Does not change the plan state.
        """
        processed_args = []
        adhoc_idgen = IdGen('_adhoc')
        adhoc_values = {}

        for arg in args:
            if not isinstance(arg, KernelArgument):
                if hasattr(arg, 'shape') and hasattr(arg, 'dtype'):
                    if len(arg.shape) > 0:
                        raise ValueError(
                            "Arrays are not allowed as ad hoc arguments")

                    # Not creating a new persistent scalar with _scalar(),
                    # because the kernel compilation may fail,
                    # in which case we would have to roll back the plan state.
                    # These arguments are local to this kernel anyway,
                    # so there's no need in registering them in the plan.
                    name = self._translator(adhoc_idgen())
                    adhoc_values[name] = arg
                    annotation = Annotation(Type(arg.dtype))
                    arg = KernelArgument(name, annotation.type)
                else:
                    raise TypeError("Unknown argument type: " + str(type(arg)))
            else:
                annotation = self._get_annotation(arg.name)

            processed_args.append(Parameter(arg.name, annotation))

        return processed_args, adhoc_values
コード例 #10
0
ファイル: transformation.py プロジェクト: xexo7C8/reikna
    def _connect(self, ntr):

        # At this point we assume that ``ntr`` describes a valid connection.
        # All sanity checks are performed in ``connect()``.

        for tr_param in ntr.trf.signature.parameters.values():
            node_name = ntr.node_from_tr[tr_param.name]

            if node_name == ntr.connector_node_name:
                ann = self.leaf_parameters[node_name].annotation
                if ann.input and ann.output:
                    # splitting the 'io' leaf
                    updated_role = 'i' if ntr.output else 'o'

                    # Since it is an array parameter, we do not need to worry
                    # about preserving the default value (it can't have one).
                    self.leaf_parameters[node_name] = Parameter(
                        node_name, Annotation(ann.type, role=updated_role))
                else:
                    # 'i' or 'o' leaf is hidden by the transformation
                    del self.leaf_parameters[node_name]

            else:
                if (node_name in self.leaf_parameters
                        and self.leaf_parameters[node_name].annotation.array):
                    ann = self.leaf_parameters[node_name].annotation
                    if (ann.input and ntr.output) or (ann.output
                                                      and not ntr.output):
                        # Joining 'i' and 'o' paths into an 'io' leaf.
                        # Since it is an array parameter, we do not need to worry
                        # about preserving the default value (it can't have one).
                        self.leaf_parameters[node_name] = Parameter(
                            node_name, Annotation(ann.type, role='io'))
                else:
                    self.leaf_parameters[node_name] = tr_param.rename(
                        node_name)

            if node_name not in self.nodes:
                self.nodes[node_name] = Node()

        self.nodes[ntr.connector_node_name] = self.nodes[
            ntr.connector_node_name].connect(ntr)
コード例 #11
0
ファイル: dropout.py プロジェクト: schreon/neuronaut
def dropout(ctx, mat, rand, probability):
    kernel_cache = ctx.kernel_cache
    probability = numpy.float32(probability)
    thread = ctx.thread
    key = (dropout, mat.dtype, mat.shape)

    if key not in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('mat', Annotation(mat, 'o')),
            Parameter('rand', Annotation(mat, 'i')),
            Parameter('probability', Annotation(probability))
        ],
                              """
        ${rand.ctype} r = ${rand.load_same};
        if (r < ${probability}) {            
            ${mat.store_same}(0.0f);
        }
        """,
                              guiding_array='mat')

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](mat, rand, probability)
コード例 #12
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def scale(self, mat, scalar):
        kernel_cache = self.kernel_cache
        scalar = numpy.float32(scalar)
        thread = self.thread
        key = (self.scale, mat.dtype, mat.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            kernel = PureParallel([
                Parameter('mat', Annotation(mat, 'io')),
                Parameter('scalar', Annotation(scalar))
            ],
                                  """
            // Delta ( for the output layer )
            ${mat.ctype} m = ${mat.load_same};
            ${mat.ctype} s = ${scalar};
            m *= s;
            ${mat.store_same}(m);
            """,
                                  guiding_array='mat')

            kernel_cache[key] = kernel.compile(thread)

        kernel_cache[key](mat, scalar)
コード例 #13
0
ファイル: kernels.py プロジェクト: schreon/neuronaut
    def copy_minibatch(self, array, indices, minibatch):
        kernel_cache, thread = self.kernel_cache, self.thread

        key = (self.copy_minibatch, minibatch.dtype, minibatch.shape,
               array.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert minibatch.shape[0] == indices.shape[0]
            assert indices.dtype == numpy.int32

            dimensions = numpy.int32(len(array.shape))
            assert minibatch.shape[0] == indices.shape[0]
            kernel = PureParallel([
                Parameter('array', Annotation(array, 'i')),
                Parameter('indices', Annotation(indices, 'i')),
                Parameter('minibatch', Annotation(minibatch, 'o'))
            ],
                                  """
            SIZE_T idx = ${indices.load_idx}(${idxs[0]});
            %if dimensions == 2:
            ${minibatch.store_same}(${array.load_idx}(idx, ${idxs[1]}));
            %elif dimensions == 3:
            ${minibatch.store_same}(${array.load_idx}(idx, ${idxs[1]}, ${idxs[2]}));
            %else:
            ${minibatch.store_same}(${array.load_idx}(idx));        
            %endif           
            """,
                                  guiding_array='minibatch',
                                  render_kwds=dict(dimensions=dimensions))
            log.info(array.shape)
            log.info(indices.shape)
            log.info(minibatch.shape)
            kernel_cache[key] = kernel.compile(thread)

        kernel_cache[key](array, indices, minibatch)
コード例 #14
0
ファイル: lwta.py プロジェクト: schreon/neuronaut
def lwta(ctx, mat, lwta_size):
    kernel_cache = ctx.kernel_cache
    lwta_size = numpy.float32(lwta_size)
    thread = ctx.thread
    key = (lwta, mat.dtype, mat.shape, lwta_size)

    if key not in kernel_cache.keys():
        num_units = mat.shape[1]
        log.info("compiling " + str(key))
        kernel = PureParallel([Parameter('mat', Annotation(mat, 'io'))],
                              """
        SIZE_T this_idx = ${idxs[1]};
        SIZE_T group_size = ${lwta_size};
        // only the first thread per group computes anything
        if (this_idx % group_size == 0) {
            SIZE_T argmax = ${idxs[1]};
            SIZE_T candidate_idx;
            ${mat.ctype} ma = ${mat.load_same};
            ${mat.ctype} candidate_value;
            // find the argmax in the group
            for (SIZE_T i=1; i < group_size; i++) {
                candidate_idx = this_idx + i;
                if (candidate_idx >= ${num_units}) break;
                candidate_value = ${mat.load_idx}(${idxs[0]}, candidate_idx);
                if ( candidate_value > ma) {
                    ma = candidate_value;
                    argmax = candidate_idx;
                }
            }
            // second pass: zero all except argmax
            for (SIZE_T i=0; i < group_size; i++) {
                candidate_idx = this_idx + i;
                if (candidate_idx >= ${num_units}) break;
                if ( candidate_idx != argmax ) {
                    ${mat.store_idx}(${idxs[0]}, candidate_idx, 0.0f);
                }
            }
        }
            
        """,
                              guiding_array='mat',
                              render_kwds=dict(lwta_size=lwta_size,
                                               num_units=num_units))

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](mat)
コード例 #15
0
ファイル: transformation.py プロジェクト: xexo7C8/reikna
    def __init__(self, root_parameters):
        # Preserve order of initial root parameters.
        # These can repeat.
        self.root_names = []

        # Keeping whole parameters, because we want to preserve the default values (if any).
        self.root_parameters = {}

        self.nodes = {}  # all nodes of the tree
        self.leaf_parameters = {}  # nodes available for connection

        for param in root_parameters:
            self.root_names.append(param.name)
            if param.name in self.root_parameters and param != self.root_parameters[
                    param.name]:
                # Could be an 'io' parameter used for separate 'i' and 'o' parameters
                # in a nested computation.
                # Need to check types and merge.

                new_ann = param.annotation
                old_param = self.root_parameters[param.name]
                old_ann = old_param.annotation

                # FIXME: Not sure when these can be raised
                assert old_ann.type == new_ann.type
                assert old_param.default == param.default

                # Given the old_param != param, the only possible combinations of roles are
                # 'i' and 'o', 'i' and 'io', 'o' and 'io'.
                # In all cases the resulting role is 'io'.
                new_param = Parameter(param.name,
                                      Annotation(new_ann.type, 'io'),
                                      default=param.default)
                self.root_parameters[param.name] = new_param
                self.leaf_parameters[param.name] = new_param
            else:
                self.nodes[param.name] = Node()
                self.root_parameters[param.name] = param
                self.leaf_parameters[param.name] = param
コード例 #16
0
def sarprop_kernel(ctx, weights, gradient, last_gradient, step_sizes, noise,
                   parameters):
    """ SARPROP update kernel """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert weights.shape == gradient.shape == last_gradient.shape == step_sizes.shape

    key = (sarprop_kernel, weights.shape, thread._context) + tuple(
        parameters.values())
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('weights', Annotation(weights, 'io')),
            Parameter('gradient', Annotation(gradient, 'i')),
            Parameter('last_gradient', Annotation(last_gradient, 'io')),
            Parameter('step_sizes', Annotation(step_sizes, 'io')),
            Parameter('noise', Annotation(step_sizes, 'i'))
        ],
                              """
        ${weights.ctype} w = ${weights.load_same};
        ${gradient.ctype} g = ${gradient.load_same};
        ${last_gradient.ctype} lg = ${last_gradient.load_same};
        ${step_sizes.ctype} s = ${step_sizes.load_same};
        
        ${noise.ctype} n = ${noise.load_same};
        n = fabs(n);
    
        // Adapt step size
        if (g * lg > 0.0f) {            
            s = min(${reward_factor}f * s, ${max_step_size}f); 
            
            // Apply update
            if (g < 0.0f) {
                w = w - s*n;
            }             
            if (g > 0.0f) {
                w = w + s*n;
            } 
        } else {
            // punish step size
            s = max(${punish_factor}f * s, ${min_step_size}f);
        }

        // If l1 weight decay is greater zero, apply it
        % if l1_decay > 0.0:
        if (w > 0.0f) {
            w = max(0.0f, w - ${l1_decay}f);
        }
        if (w < 0.0f) {
            w = min(0.0f, w + ${l1_decay}f);
        }        
        % endif;
 
        // If l2 weight decay is greater zero, apply it
        % if l2_decay > 0.0:
        w *= ${1.0 - l2_decay}f;
        % endif;
               
        // Save last gradient
        lg = g;
            
        ${weights.store_same}(w);
        ${last_gradient.store_same}(lg);
        ${step_sizes.store_same}(s);
        """,
                              guiding_array='weights',
                              render_kwds=parameters)

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](weights, gradient, last_gradient, step_sizes, noise)