Exemple #1
0
    def c_support_code_apply(self, node, nodename):
        function_name = "TimCropper_%s" % nodename

        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        import math
        sqrt2pi = math.sqrt(2*math.pi)

        arguments = common.defn_arguments("x y W".split())
        threadindex = common.threadindex(ndim_spatial, "y_dims")
        weightpass_defn = common.weightpass_defn(nodename, self.patch_shape, grad=False)

        strings = []
        strings.append("""
        #include <stdio.h>
        #include <sys/time.h>

        $weightpass_defn

        __global__ void $function_name($arguments) {
            $threadindex
        """)

        for i in range(ndim_spatial):
            strings.append("""
            int a%(i2)s = a[i0 * $ndim_spatial + %(i)s],
                b%(i2)s = b[i0 * $ndim_spatial + %(i)s];
            assert(0 <= a%(i2)s); assert(a%(i2)s <= b%(i2)s); assert(b%(i2)s <= x_dims[%(i2)s]);
            """ % dict(i=i, i2=2 + i))

        # loop over input pixel indices i{2, 3, ...}V
        # compute start of input memory
        # NOTE: assumes x, W contiguous
        # FIXME: the a, b windows are not contiguous, so this is all wrong. must adjust for a, b in both x and W indices
        strings.append("const float* x_pointer = x + i0 * x_strides[0] + i1 * x_strides[1];")
        strings.append("const float* W_pointer = W + i0 * W_strides[0] + i1 * W_strides[1];")
        strings.append("float result = 0.0f;")

        for i, patch_dim in enumerate(self.patch_shape):
            strings.append("""
            x_pointer += a%(i)s;
            for (int i%(i)sV = a%(i)s; i%(i)sV < b%(i)s; ++i%(i)sV) {
            """ % dict(i=2 + i, patch_dim=patch_dim))

        weight = " * ".join("*(W + %i)" % j for j in range(ndim_spatial))
        strings.append("result += $weight * (*x_pointer);")

        strings.append("++x_pointer;")
        strings.append("W_pointer += $ndim_spatial;")

        strings.extend("}" * ndim_spatial)

        # store result at output memory location
        strings.append("size_t y_index = i0 * y_strides[0] + i1 * y_strides[1] + %s;"
                       % " + ".join("i%(i)sv * y_strides[%(i)s]"
                                    % dict(i=2 + i) for i in range(ndim_spatial)))
        strings.append("assert(y_index < y_size);")
        strings.append("y[y_index] = result;")

        strings.append("}")
        from string import Template
        return Template("\n".join(strings)).substitute(locals())
Exemple #2
0
    def c_support_code_apply(self, node, nodename):
        function_name = "TimCropper_%s" % nodename

        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        import math
        sqrt2pi = math.sqrt(2 * math.pi)

        arguments = common.defn_arguments("x y W".split())
        threadindex = common.threadindex(ndim_spatial, "y_dims")
        weightpass_defn = common.weightpass_defn(nodename,
                                                 self.patch_shape,
                                                 grad=False)

        strings = []
        strings.append("""
        #include <stdio.h>
        #include <sys/time.h>

        $weightpass_defn

        __global__ void $function_name($arguments) {
            $threadindex
        """)

        for i in range(ndim_spatial):
            strings.append("""
            int a%(i2)s = a[i0 * $ndim_spatial + %(i)s],
                b%(i2)s = b[i0 * $ndim_spatial + %(i)s];
            assert(0 <= a%(i2)s); assert(a%(i2)s <= b%(i2)s); assert(b%(i2)s <= x_dims[%(i2)s]);
            """ % dict(i=i, i2=2 + i))

        # loop over input pixel indices i{2, 3, ...}V
        # compute start of input memory
        # NOTE: assumes x, W contiguous
        # FIXME: the a, b windows are not contiguous, so this is all wrong. must adjust for a, b in both x and W indices
        strings.append(
            "const float* x_pointer = x + i0 * x_strides[0] + i1 * x_strides[1];"
        )
        strings.append(
            "const float* W_pointer = W + i0 * W_strides[0] + i1 * W_strides[1];"
        )
        strings.append("float result = 0.0f;")

        for i, patch_dim in enumerate(self.patch_shape):
            strings.append("""
            x_pointer += a%(i)s;
            for (int i%(i)sV = a%(i)s; i%(i)sV < b%(i)s; ++i%(i)sV) {
            """ % dict(i=2 + i, patch_dim=patch_dim))

        weight = " * ".join("*(W + %i)" % j for j in range(ndim_spatial))
        strings.append("result += $weight * (*x_pointer);")

        strings.append("++x_pointer;")
        strings.append("W_pointer += $ndim_spatial;")

        strings.extend("}" * ndim_spatial)

        # store result at output memory location
        strings.append(
            "size_t y_index = i0 * y_strides[0] + i1 * y_strides[1] + %s;" %
            " + ".join("i%(i)sv * y_strides[%(i)s]" % dict(i=2 + i)
                       for i in range(ndim_spatial)))
        strings.append("assert(y_index < y_size);")
        strings.append("y[y_index] = result;")

        strings.append("}")
        from string import Template
        return Template("\n".join(strings)).substitute(locals())
Exemple #3
0
    def c_support_code_apply(self, node, nodename):
        function_name = "TimCropperGrad_%s" % nodename
        weight_function_name = "%s_weight" % function_name
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        import math

        sqrt2pi = math.sqrt(2 * math.pi)

        strings = []
        strings.append(
            """
        #include <stdio.h>
        """
        )

        strings.append(common.weightfunction(name=weight_function_name, grad=True))

        arguments = []
        for var in "dydl dyds dCdy x".split():
            arguments.append("const size_t %s_size" % var)
            arguments.append("%sfloat* %s" % ("const " if var in "x dCdy".split() else "", var))
            arguments.append("const int* %s_dims" % var)
            arguments.append("const int* %s_strides" % var)
        # a, b, l, s are contiguous
        arguments.extend("const float* %s" % var for var in "abls")
        arguments = ", \n".join(arguments)

        threadindex = common.threadindex(ndim_spatial, "dCdy_dims")

        # generate code that does output initialization and computation. this is
        # best kept together in the python code to reduce confusion.
        initializations = []
        computations = []
        for var in "ls":
            dy = "dyd" + var
            dw = "dwd" + var

            # patch pixel index; we need to compute dydl[0]..dydl[ndim_spatial]
            # for each pixel, so one more subscript is needed to fully qualify
            # the output element.
            output_indexish = "i0 * %(dy)s_strides[0] + i1 * %(dy)s_strides[1] + %(rest)s" % dict(
                dy=dy,
                rest=" + ".join("i%(i)sv * %(dy)s_strides[%(i)s]" % dict(dy=dy, i=2 + i) for i in range(ndim_spatial)),
            )

            for j in range(ndim_spatial):
                index = "%(output_indexish)s + %(j)s * %(dy)s_strides[$ndim_total]" % dict(
                    output_indexish=output_indexish, dy=dy, j=j
                )

                # initialize to zero
                initializations.append(
                    """
                assert(%(index)s < %(dy)s_size);
                %(dy)s[%(index)s] = 0.0f;
                """
                    % dict(dy=dy, index=index)
                )

                # compute contribution
                # for dy/dl[0], weight = dw/dl0 * w1 * w2
                # for dy/dl[1], weight = w0 * dw/dl1 * w2
                # for dy/dl[2], weight = w0 * w1 * dw/dl2
                # etc. and similarly dy/ds
                weight = " * ".join((dw if i == j else "w") + str(2 + i) for i in range(ndim_spatial))
                computations.append(
                    """
                assert(%(index)s < %(dy)s_size);
                %(dy)s[%(index)s] += %(weight)s * x[x_index];
                """
                    % dict(dy=dy, weight=weight, index=index)
                )

        strings.append(
            """
        __global__ void $function_name($arguments) {
            $threadindex
        """
        )
        strings.append("\n".join(initializations))

        for i in range(ndim_spatial):
            strings.append(
                """
            const int a%(i2)s = a[i0 * $ndim_spatial + %(i)s],
                      b%(i2)s = b[i0 * $ndim_spatial + %(i)s];
            const float l%(i2)s = l[i0 * $ndim_spatial + %(i)s],
                        s%(i2)s = s[i0 * $ndim_spatial + %(i)s];
            assert(0 <= a%(i2)s); assert(a%(i2)s <= b%(i2)s); assert(b%(i2)s <= x_dims[%(i2)s]);
            float w%(i2)s = 0, dwdl%(i2)s = 0, dwds%(i2)s = 0;
            """
                % dict(i=i, i2=2 + i)
            )

        # loop over input pixel indices i{2, 3, ...}V
        for i, patch_dim in enumerate(self.patch_shape):
            strings.append(
                """
            for (int i%(i)sV = a%(i)s; i%(i)sV < b%(i)s; ++i%(i)sV) {
                $weight_function_name(w%(i)s, dwdl%(i)s, dwds%(i)s, %(patch_dim)s, i%(i)sv, i%(i)sV, l%(i)s, s%(i)s);
            """
                % dict(i=2 + i, patch_dim=patch_dim)
            )
        # compute input memory location
        strings.append(
            "size_t x_index = i0 * x_strides[0] + i1 * x_strides[1] + %s;"
            % " + ".join("i%(i)sV * x_strides[%(i)s]" % dict(i=2 + i) for i in range(ndim_spatial))
        )
        strings.append("assert(x_index < x_size);")
        strings.append("\n".join(computations))
        strings.extend("}" * (ndim_spatial + 1))

        from string import Template

        return Template("\n".join(strings)).substitute(locals())
Exemple #4
0
    def c_support_code_apply(self, node, nodename):
        function_name = "TimCropperGrad_%s" % nodename
        weight_function_name = "%s_weight" % function_name
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        import math
        sqrt2pi = math.sqrt(2 * math.pi)

        strings = []
        strings.append("""
        #include <stdio.h>
        """)

        strings.append(
            common.weightfunction(name=weight_function_name, grad=True))

        arguments = []
        for var in "dydl dyds dCdy x".split():
            arguments.append("const size_t %s_size" % var)
            arguments.append(
                "%sfloat* %s" %
                ("const " if var in "x dCdy".split() else "", var))
            arguments.append("const int* %s_dims" % var)
            arguments.append("const int* %s_strides" % var)
        # a, b, l, s are contiguous
        arguments.extend("const float* %s" % var for var in "abls")
        arguments = ", \n".join(arguments)

        threadindex = common.threadindex(ndim_spatial, "dCdy_dims")

        # generate code that does output initialization and computation. this is
        # best kept together in the python code to reduce confusion.
        initializations = []
        computations = []
        for var in "ls":
            dy = "dyd" + var
            dw = "dwd" + var

            # patch pixel index; we need to compute dydl[0]..dydl[ndim_spatial]
            # for each pixel, so one more subscript is needed to fully qualify
            # the output element.
            output_indexish = (
                "i0 * %(dy)s_strides[0] + i1 * %(dy)s_strides[1] + %(rest)s" %
                dict(dy=dy,
                     rest=" + ".join("i%(i)sv * %(dy)s_strides[%(i)s]" %
                                     dict(dy=dy, i=2 + i)
                                     for i in range(ndim_spatial))))

            for j in range(ndim_spatial):
                index = (
                    "%(output_indexish)s + %(j)s * %(dy)s_strides[$ndim_total]"
                    % dict(output_indexish=output_indexish, dy=dy, j=j))

                # initialize to zero
                initializations.append("""
                assert(%(index)s < %(dy)s_size);
                %(dy)s[%(index)s] = 0.0f;
                """ % dict(dy=dy, index=index))

                # compute contribution
                # for dy/dl[0], weight = dw/dl0 * w1 * w2
                # for dy/dl[1], weight = w0 * dw/dl1 * w2
                # for dy/dl[2], weight = w0 * w1 * dw/dl2
                # etc. and similarly dy/ds
                weight = " * ".join((dw if i == j else "w") + str(2 + i)
                                    for i in range(ndim_spatial))
                computations.append("""
                assert(%(index)s < %(dy)s_size);
                %(dy)s[%(index)s] += %(weight)s * x[x_index];
                """ % dict(dy=dy, weight=weight, index=index))

        strings.append("""
        __global__ void $function_name($arguments) {
            $threadindex
        """)
        strings.append("\n".join(initializations))

        for i in range(ndim_spatial):
            strings.append("""
            const int a%(i2)s = a[i0 * $ndim_spatial + %(i)s],
                      b%(i2)s = b[i0 * $ndim_spatial + %(i)s];
            const float l%(i2)s = l[i0 * $ndim_spatial + %(i)s],
                        s%(i2)s = s[i0 * $ndim_spatial + %(i)s];
            assert(0 <= a%(i2)s); assert(a%(i2)s <= b%(i2)s); assert(b%(i2)s <= x_dims[%(i2)s]);
            float w%(i2)s = 0, dwdl%(i2)s = 0, dwds%(i2)s = 0;
            """ % dict(i=i, i2=2 + i))

        # loop over input pixel indices i{2, 3, ...}V
        for i, patch_dim in enumerate(self.patch_shape):
            strings.append("""
            for (int i%(i)sV = a%(i)s; i%(i)sV < b%(i)s; ++i%(i)sV) {
                $weight_function_name(w%(i)s, dwdl%(i)s, dwds%(i)s, %(patch_dim)s, i%(i)sv, i%(i)sV, l%(i)s, s%(i)s);
            """ % dict(i=2 + i, patch_dim=patch_dim))
        # compute input memory location
        strings.append(
            "size_t x_index = i0 * x_strides[0] + i1 * x_strides[1] + %s;" %
            " + ".join("i%(i)sV * x_strides[%(i)s]" % dict(i=2 + i)
                       for i in range(ndim_spatial)))
        strings.append("assert(x_index < x_size);")
        strings.append("\n".join(computations))
        strings.extend("}" * (ndim_spatial + 1))

        from string import Template
        return Template("\n".join(strings)).substitute(locals())