Ejemplo n.º 1
0
    def c_code(self, node, name, inputs, outputs, sub):
        images, = inputs
        targets, denoms = outputs
        fail = sub['fail']
        num_braces = 0

        size_f = self._size_f
        add_scale = self._add_scale
        pow_scale = self._pow_scale
        blocked = "true" if self._blocked else "false"

        class_name = self.__class__.__name__
        class_name_upper = class_name.upper()

        basic_setup = self._basic_setup

        setup_nv_images = (
            contiguity_check("images") +
            dimension_check("images", 4) +
            self._images_setup
        )
        num_braces += 2

        setup_nv_targets = output_same_shape('targets', 'images')
        num_braces += 1

        setup_nv_denoms = output_same_shape('denoms', 'images')
        num_braces += 1

        do_normalize = """
        convResponseNormCrossMap(nv_images, nv_denoms, nv_targets, numFilters, sizeF,
                                 addScale, powScale, blocked);
        """

        braces = '}' * num_braces + "\n"

        rval = (basic_setup +
                setup_nv_images +
                setup_nv_targets +
                setup_nv_denoms +
                do_normalize +
                braces)

        rval = rval % locals()

        return rval
Ejemplo n.º 2
0
    def c_code(self, node, name, inputs, outputs, sub):
        images, acts, denoms, dout = inputs
        targets, out_acts = outputs
        fail = sub['fail']
        num_braces = 0
        size_f = self._size_f
        add_scale = self._add_scale
        pow_scale = self._pow_scale
        blocked = "true" if self._blocked else "false"
        inplace = "true" if self._inplace else "false"
        scale_targets = int(self._scale_targets)
        scale_outputs = int(self._scale_outputs)

        class_name = self.__class__.__name__
        class_name_upper = class_name.upper()

        basic_setup = self._basic_setup
        scaling_setup = """
        float scaleTargets = %(scale_targets)s;
        float scaleOutput = %(scale_outputs)s;
        """

        setup_nv_images = (
            contiguity_check("images") +
            dimension_check("images", 4) +
            self._images_setup
        )
        num_braces += 2
        setup_acts = (contiguity_check("acts") +
                      dimension_check("acts", 4) +
        """
        { //setup_nv_images brace 1
        const int * acts_dims = CudaNdarray_HOST_DIMS(%(acts)s);
        """ +
                      ensure_same_shape('acts', 'images') +
        """
        { // setup_nv_acts brace 2
        """)
        num_braces += 2
        setup_nv_denoms = (contiguity_check("denoms") +
                           dimension_check("denoms", 4) +
        """
        {
        const int *denoms_dims = images_dims;
        """ +
                           ensure_same_shape("denoms", "images") +
                           nv_matrix_create("denoms"))
        num_braces += 2

        setup_nv_dout = (contiguity_check("dout") +
                         dimension_check("dout", 4) +
        """
        { // setup_nv_dout brace
        const int *dout_dims = CudaNdarray_HOST_DIMS(%(dout)s);
        """ +
                         ensure_same_shape("dout", "images") +
                         nv_matrix_create("dout"))
        num_braces += 2
        setup_nv_targets = output_same_shape('targets', 'images')
        num_braces += 1

        setup_nv_out_acts = ("""
        const int *out_acts_dims = images_dims;

        #if %(inplace)s
        // XXX: is this right?
        Py_XDECREF(%(out_acts)s);
        %(out_acts)s = %(acts)s;
        Py_INCREF(%(out_acts)s);
        #else
        if (CudaNdarray_prep_output(& %(out_acts)s, 4, out_acts_dims)) {
            Py_DECREF(%(targets)s);
            %(fail)s;
        }
        if (CudaNdarray_CopyFromCudaNdarray(%(out_acts)s, %(acts)s)) {
            Py_DECREF(%(targets)s);
            Py_DECREF(%(out_acts)s);
            %(fail)s;
        }
        #endif
        """ + nv_matrix_create("out_acts"))
        num_braces += 1

        undo_normalize = """
        convResponseNormCrossMapUndo(nv_dout, nv_denoms, nv_images,
                                     nv_out_acts, nv_targets, numFilters,
                                     sizeF, addScale, powScale, blocked,
                                     scaleTargets, scaleOutput);
        """
        rval = "\n".join((basic_setup,
                          scaling_setup,
                          setup_nv_images,
                          setup_acts,
                          setup_nv_denoms,
                          setup_nv_dout,
                          setup_nv_targets,
                          setup_nv_out_acts,
                          undo_normalize,
                          "}" * num_braces))
        return rval % locals()