예제 #1
0
 def test_hash_and_eq_params_type(self):
     w1 = ParamsType(
         a1=TensorType("int64", (False, False)),
         a2=TensorType("int64", (False, True, False, False, True)),
         a3=Generic(),
     )
     w2 = ParamsType(
         a1=TensorType("int64", (False, False)),
         a2=TensorType("int64", (False, True, False, False, True)),
         a3=Generic(),
     )
     assert w1 == w2
     assert not (w1 != w2)
     assert hash(w1) == hash(w2)
     assert w1.name == w2.name
     # Changing attributes names only.
     w2 = ParamsType(
         a1=TensorType("int64", (False, False)),
         other_name=TensorType(
             "int64",
             (False, True, False, False, True)),  # a2 -> other_name
         a3=Generic(),
     )
     assert w1 != w2
     # Changing attributes types only.
     w2 = ParamsType(
         a1=TensorType("int64", (False, False)),
         a2=Generic(),  # changing class
         a3=Generic(),
     )
     assert w1 != w2
     # Changing attributes types characteristics only.
     w2 = ParamsType(
         a1=TensorType("int64", (False, True)),  # changing broadcasting
         a2=TensorType("int64", (False, True, False, False, True)),
         a3=Generic(),
     )
     assert w1 != w2
예제 #2
0
class CGer(BaseBLAS, Ger):
    params_type = ParamsType(destructive=bool_t, )

    def c_code(self, node, name, inp, out, sub):
        A, a, x, y = inp
        (Z, ) = out
        code = ger_c_code(A,
                          a,
                          x,
                          y,
                          Z,
                          fail=sub["fail"],
                          params=sub["params"])
        return code

    def c_code_cache_version(self):
        return (11, blas_header_version())
예제 #3
0
    def test_params_type_with_enums(self):
        # Test that we fail if we create a params type with common enum names inside different enum types.
        try:
            ParamsType(enum1=EnumList("A", "B", "C"),
                       enum2=EnumList("A", "B", "F"))
        except AttributeError:
            pass
        else:
            raise Exception(
                "ParamsType should fail with common enum names inside different enum types."
            )

        # Test that we fail if we create a params type with common names in both aliases and constants.
        try:
            ParamsType(
                enum1=EnumList(("A", "a"), ("B", "b")),
                enum2=EnumList(("ONE", "a"), ("TWO", "two")),
            )
        except AttributeError:
            ParamsType(
                enum1=EnumList(("A", "a"), ("B", "b")),
                enum2=EnumList(("ONE", "one"), ("TWO", "two")),
            )
        else:
            raise Exception(
                "ParamsType should fail when there are aliases with same names as some constants."
            )

        # Test that we can access enum values through wrapper directly.
        w = ParamsType(
            enum1=EnumList("A", ("B", "beta"), "C"),
            enum2=EnumList(("D", "delta"), "E", "F"),
        )
        assert w.A == 0 and w.B == 1 and w.C == 2
        assert w.D == 0 and w.E == 1 and w.F == 2
        # Test constants access through aliases.
        assert w.enum_from_alias("beta") == w.B
        assert w.enum_from_alias("delta") == w.D
        assert (w.enum_from_alias("C") == w.C
                )  # C is not an alias, so it should return a constant named C.
        # Test that other regular wrapper attributes are still available.
        assert len(w.fields) == len(w.types) == w.length
        assert w.name
예제 #4
0
class QuadraticCOpFunc(ExternalCOp):
    __props__ = ("a", "b", "c")
    params_type = ParamsType(a=tensor_type_0d, b=scalar_type, c=generic_type)

    def __init__(self, a, b, c):
        super().__init__("c_code/test_quadratic_function.c",
                         "APPLY_SPECIFIC(compute_quadratic)")
        self.a = a
        self.b = b
        self.c = c

    def make_node(self, x):
        x = tensor.as_tensor_variable(x)
        return Apply(self, [x], [x.type()])

    def perform(self, node, inputs, output_storage, coefficients):
        x = inputs[0]
        y = output_storage[0]
        y[0] = coefficients.a * (x**2) + coefficients.b * x + coefficients.c
예제 #5
0
class CGemv(BaseBLAS, Gemv):
    params_type = ParamsType(inplace=bool_t, )

    def __init__(self, inplace):
        super().__init__(inplace)

    def c_code(self, node, name, inp, out, sub):
        y, alpha, A, x, beta = inp
        (z, ) = out
        code = gemv_c_code(
            y,
            A,
            x,
            z,
            alpha,
            beta,
            fail=sub["fail"],
            force_init_beta=check_force_gemv_init(),
            params=sub["params"],
        )
        return code

    def c_code_cache_version(self):
        return (14, blas_header_version(), check_force_gemv_init())
예제 #6
0
class BaseCorr3dMM(gof.OpenMPOp):
    """
    Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and
    `Corr3dMM_gradInputs`. Cannot be used directly.

    Every sub-class must define internal attribute ``_direction`` out of __init__().
    ``_direction`` must take one of following values:

     - "forward" to correlate bottom with weights and store results in top.
     - "backprop weights" to do a valid convolution of bottom with top
       (swapping the first two dimensions) and store results in weights.
     - "backprop inputs" to do a full convolution of top with weights
       (swapping the first two dimensions) and store results in bottom.

    Parameters
    ----------
    border_mode : {'valid', 'full', 'half'}
        Additionally, the padding size could be directly specified by an integer
        or a tuple of three of integers
    subsample
        Perform subsampling of the output (default: (1, 1, 1)).
    filter_dilation
        Perform dilated correlation (default: (1, 1, 1))
    num_groups
        Perform grouped convolutions (default: 1)
    """

    check_broadcast = False
    __props__ = ("border_mode", "subsample", "filter_dilation", "num_groups")

    _direction = None

    params_type = ParamsType(
        direction=EnumList(
            ("DIRECTION_FORWARD", "forward"),  # 0
            ("DIRECTION_BACKPROP_WEIGHTS", "backprop weights"),  # 1
            ("DIRECTION_BACKPROP_INPUTS", "backprop inputs"),
        ),  # 2
        dH=int64,
        dW=int64,
        dD=int64,
        dilH=int64,
        dilW=int64,
        dilD=int64,
        padH=int64,
        padW=int64,
        padD=int64,
        num_groups=int64,
    )

    def __init__(
            self,
            border_mode="valid",
            subsample=(1, 1, 1),
            filter_dilation=(1, 1, 1),
            openmp=None,
            num_groups=1,
    ):
        super().__init__(openmp=openmp)
        if isinstance(border_mode, int):
            if border_mode < 0:
                raise ValueError("invalid border_mode {}, which must be a "
                                 "non-negative integer".format(border_mode))
            border_mode = (border_mode, border_mode, border_mode)
        if isinstance(border_mode, tuple):
            if len(border_mode) != 3 or min(border_mode) < 0:
                raise ValueError(
                    "invalid border_mode {}, which must be a tuple of "
                    "three non-negative integers".format(border_mode))
            pad_h, pad_w, pad_d = map(int, border_mode)
            border_mode = (pad_h, pad_w, pad_d)
        if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0)
                or border_mode in ("valid", "full", "half")):
            raise ValueError(
                "invalid border_mode {}, which must be either "
                '"valid", "full", "half", an integer or a tuple of three'
                " integers".format(border_mode))
        self.border_mode = border_mode
        if len(subsample) != 3:
            raise ValueError("subsample must have three elements")
        if len(filter_dilation) != 3:
            raise ValueError("filter_dilation must have three elements")
        self.subsample = tuple(subsample)
        self.filter_dilation = tuple(filter_dilation)
        if num_groups < 1:
            raise ValueError("Number of groups should be greater than 0")
        self.num_groups = num_groups

        if not theano.config.blas.ldflags:
            # Theano will use a NumPy C implementation of [sd]gemm_ instead.
            self.blas_type = ""
        else:
            if "openblas" in theano.config.blas.ldflags:
                self.blas_type = "openblas"
            elif "mkl" in theano.config.blas.ldflags:
                self.blas_type = "mkl"
            else:
                self.blas_type = ""

        if self._direction not in [
                "forward", "backprop weights", "backprop inputs"
        ]:
            raise ValueError("_direction must be one of 'forward', "
                             "'backprop weights', 'backprop inputs'")

    @property
    def pad(self):
        if self.border_mode == "half":
            return (-1, -1, -1)
        elif self.border_mode == "full":
            return (-2, -2, -2)
        elif isinstance(self.border_mode, tuple):
            return self.border_mode
        else:
            assert self.border_mode == "valid"
            return (0, 0, 0)

    # Direction should be converted to real enum value,
    # as it is compared to integer later in c_code_helper().
    direction = property(
        lambda self: self.params_type.enum_from_alias(self._direction))

    dH = property(lambda self: self.subsample[0])
    dW = property(lambda self: self.subsample[1])
    dD = property(lambda self: self.subsample[2])

    dilH = property(lambda self: self.filter_dilation[0])
    dilW = property(lambda self: self.filter_dilation[1])
    dilD = property(lambda self: self.filter_dilation[2])

    padH = property(lambda self: self.pad[0])
    padW = property(lambda self: self.pad[1])
    padD = property(lambda self: self.pad[2])

    def __str__(self):
        return "{}{{{}, {}, {}, {}}}".format(
            self.__class__.__name__,
            self.border_mode,
            str(self.subsample),
            str(self.filter_dilation),
            str(self.num_groups),
        )

    @staticmethod
    def as_common_dtype(in1, in2):
        """
        Upcast input variables if necessary.
        """
        dtype = theano.scalar.upcast(in1.dtype, in2.dtype)
        return in1.astype(dtype), in2.astype(dtype)

    def __setstate__(self, d):
        self.__dict__.update(d)
        if not hasattr(self, "num_groups"):
            self.num_groups = 1

    def c_support_code(self):
        ccodes = blas_headers.blas_header_text()
        if self.blas_type == "openblas":
            ccodes += blas_headers.openblas_threads_text()
        elif self.blas_type == "mkl":
            ccodes += blas_headers.mkl_threads_text()
        return ccodes

    def c_libraries(self):
        return ldflags()

    def c_compile_args(self):
        compile_args = ldflags(libs=False, flags=True)
        compile_args += super().c_compile_args()
        return compile_args

    def c_lib_dirs(self):
        return ldflags(libs=False, libs_dir=True)

    def c_header_dirs(self):
        return ldflags(libs=False, include_dir=True)

    def c_headers(self):
        headers = ["<stdio.h>"]
        headers += super().c_headers()
        return headers

    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
        return (8, self.openmp, blas_header_version())

    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
        # these files
        sub = {}
        dtype = str(node.__dict__["inputs"][0].dtype)
        assert dtype in ("float32", "float64")
        if dtype == "float32":
            sub["gemm"] = "sgemm_"
            sub["float_type"] = "npy_float"
            sub["float_typenum"] = "NPY_FLOAT"
            sub["n_bytes"] = 4
            sub["c_float_type"] = "float"
        else:
            sub["gemm"] = "dgemm_"
            sub["float_type"] = "npy_double"
            sub["float_typenum"] = "NPY_DOUBLE"
            sub["n_bytes"] = 8
            sub["c_float_type"] = "double"

        if self.openmp:
            sub["omp_flags"] = "#pragma omp parallel for schedule(static)"
            sub["omp_get_max_threads"] = "omp_get_max_threads()"
            sub["omp_get_thread_num"] = "omp_get_thread_num()"

            if self.blas_type == "openblas":
                sub["blas_set_num_threads"] = "openblas_set_num_threads"
                sub["blas_get_num_threads"] = "openblas_get_num_threads()"
            elif self.blas_type == "mkl":
                sub["blas_set_num_threads"] = "mkl_set_num_threads"
                sub["blas_get_num_threads"] = "mkl_get_max_threads()"
            else:
                sub["blas_set_num_threads"] = ""
                sub["blas_get_num_threads"] = "0"
        else:
            sub["omp_flags"] = ""
            sub["omp_get_max_threads"] = "1"
            sub["omp_get_thread_num"] = "0"
            sub["blas_set_num_threads"] = ""
            sub["blas_get_num_threads"] = "0"

        files = [os.path.join("c_code", "corr3d_gemm.c")]
        codes = [
            open(os.path.join(os.path.split(__file__)[0], f)).read()
            for f in files
        ]
        final_code = ""
        for code in codes:
            final_code += code
        return final_code % sub

    def c_code_helper(self,
                      bottom,
                      weights,
                      top,
                      sub,
                      height=None,
                      width=None,
                      depth=None):
        """
        This generates the C code for Corr3dMM (direction="forward"),
        Corr3dMM_gradWeights (direction="backprop weights"), and
        Corr3dMM_gradInputs (direction="backprop inputs").
        Depending on the direction, one of bottom, weights, top will
        receive the output, while the other two serve as inputs.

        :param bottom: Variable name of the input images in the forward pass,
            or the gradient of the input images in backprop wrt. inputs
        :param weights: Variable name of the filters in the forward pass,
            or the gradient of the filters in backprop wrt. weights
        :param top: Variable name of the output images / feature maps in the
            forward pass, or the gradient of the outputs in the backprop passes
        :param sub: Dictionary of substitutions useable to help generating the
            C code.
        :param height: If self.subsample[0] != 1, a variable giving the height
            of the filters for direction="backprop weights" or the height of
            the input images for direction="backprop inputs".

            If self.border_mode == 'half', a variable giving the height of the
            filters for direction="backprop weights".  Ignored otherwise.
        :param width: If self.subsample[1] != 1, a variable giving the width
            of the filters for direction="backprop weights" or the width of the
            input images for direction="backprop inputs".

            If self.border_mode == 'half', a variable giving the width of the
            filters for direction="backprop weights".  Ignored otherwise.
        :param depth: If self.subsample[1] != 1, a variable giving the depth
            of the filters for direction="backprop weights" or the depth of the
            input images for direction="backprop inputs".

            If self.border_mode == 'half', a variable giving the depth of the
            filters for direction="backprop weights".  Ignored otherwise.
        """

        # When subsampling, we cannot unambiguously infer the height and width
        # of bottom and weights from top, so we require them to be given.
        # Similarly, when border_mode="half", we cannot infer the weight size.
        if height:
            height = f"(*(npy_int64 *)(PyArray_DATA({height})))"
        else:
            if ((self.direction != 0) and
                (self.dH != 1)) or ((self.direction == 1) and
                                    (self.padH == -1)):
                raise ValueError(
                    "height must be given for backprop with vertical sampling or border_mode='half'"
                )
            height = "-1"
        if width:
            width = f"(*(npy_int64 *)(PyArray_DATA({width})))"
        else:
            if ((self.direction != 0) and
                (self.dW != 1)) or ((self.direction == 1) and
                                    (self.padW == -1)):
                raise ValueError(
                    "width must be given for backprop with horizontal sampling or border_mode='half'"
                )
            width = "-1"
        if depth:
            depth = f"(*(npy_int64 *)(PyArray_DATA({depth})))"
        else:
            if ((self.direction != 0) and
                (self.dD != 1)) or ((self.direction == 1) and
                                    (self.padD == -1)):
                raise ValueError(
                    "depth must be given for backprop with depth sampling or border_mode='half'"
                )
            depth = "-1"

        return """
    // Mandatory args
    int direction = %(params)s->direction;  // forward, bprop weights, bprop inputs

    // Optional args
    int dH = %(params)s->dH;
    int dW = %(params)s->dW;
    int dD = %(params)s->dD;
    int dilH = %(params)s->dilH;
    int dilW = %(params)s->dilW;
    int dilD = %(params)s->dilD;
    int padH = %(params)s->padH;
    int padW = %(params)s->padW;
    int padD = %(params)s->padD;
    int numgroups = %(params)s->num_groups;

    PyArrayObject * bottom = %(bottom)s;
    PyArrayObject * weights = %(weights)s;
    PyArrayObject * top = %(top)s;
    PyArrayObject * out2 = NULL;
    PyArrayObject **out = NULL;

    switch(%(params)s->direction) {
        case DIRECTION_FORWARD:
            out = &%(top)s;
            break;
        case DIRECTION_BACKPROP_WEIGHTS:
            out = &%(weights)s;
            break;
        case DIRECTION_BACKPROP_INPUTS:
            out = &%(bottom)s;
            break;
        default:
            PyErr_SetString(PyExc_ValueError, "CPU Corr3dMM: Invalid direction.");
            {%(fail)s}
            break;
    }

    // Obtain or infer kernel width, height and depth
    // (we need to know it early to be able to handle auto-padding)
    int kH, kW, kD, dil_kH, dil_kW, dil_kD;
    if (direction != 1) {
        // weight is an input variable, we can just read its shape
        kH = PyArray_DIMS(weights)[2];
        kW = PyArray_DIMS(weights)[3];
        kD = PyArray_DIMS(weights)[4];
    }
    else {
        if (%(height)s != -1) {
            // kernel height is specified (perhaps vertical subsampling or half padding)
            kH = %(height)s;
        }
        else if (padH == -2) {
            // vertical full padding, we can infer the kernel height
            kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
        }
        else {
            // explicit padding, we can infer the kernel height
            kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
        }
        if (%(width)s != -1) {
            kW = %(width)s;
        }
        else if (padW == -2) {
            kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
        }
        else {
            kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
        }
        if (%(depth)s != -1) {
            kD = %(depth)s;
        }
        else if (padD == -2) {
            kD = (2 - PyArray_DIMS(bottom)[4] + (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
        }
        else {
            kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
        }
    }

    // Implicit dilated kernel size
    dil_kH = (kH - 1) * dilH + 1;
    dil_kW = (kW - 1) * dilW + 1;
    dil_kD = (kD - 1) * dilD + 1;

    // Auto-padding if requested
    if (padH == -1) {  // vertical half padding
        padH = dil_kH / 2;
    }
    else if (padH == -2) {  // vertical full padding
        padH = dil_kH - 1;
    }
    else if (padH < 0) {
        PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padH must be >= -2");
        %(fail)s
    }
    if (padW == -1) {  // horizontal half padding
        padW = dil_kW / 2;
    }
    else if (padW == -2) {  // horizontal full padding
        padW = dil_kW - 1;
    }
    else if (padW < 0) {
        PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padW must be >= -2");
        %(fail)s
    }
    if (padD == -1) {  // depth half padding
        padD = dil_kD / 2;
    }
    else if (padD == -2) {  // depth full padding
        padD = dil_kD - 1;
    }
    else if (padD < 0) {
        PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padD must be >= -2");
        %(fail)s
    }

    // Infer output shape
    npy_intp out_dim[5];
    switch(direction) {
    case 0:  // forward pass
        // output is top: (batchsize, num_filters, height, width, depth)
        // height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
        out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
        out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
        out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
        out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
        out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
        if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
        {
            PyErr_Format(PyExc_ValueError,
                         "Corr3dMM: impossible output shape\\n"
                         "  bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
                         (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                         (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                         (long int)PyArray_DIMS(bottom)[4],
                         (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                         (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                         (long int)PyArray_DIMS(weights)[4],
                         (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                         (long int)out_dim[3], (long int)out_dim[4]);
            %(fail)s
        }
        break;
    case 1:  // backprop wrt. weights
        // output is weights: (num_filters, num_channels, height, width, depth)
        // height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
        out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
        out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
        out_dim[2] = (npy_intp)kH;  // already inferred further above
        out_dim[3] = (npy_intp)kW;  // how convenient
        out_dim[4] = (npy_intp)kD;
        if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
        {
            PyErr_Format(PyExc_ValueError,
                         "Corr3dMM backprop wrt. weights: impossible output shape\\n"
                         "  bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
                         (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                         (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                         (long int)PyArray_DIMS(bottom)[4],
                         (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                         (long int)out_dim[3], (long int)out_dim[4],
                         (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                         (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
                         (long int)PyArray_DIMS(top)[4]);
            %(fail)s
        }
        break;
    case 2:  // backprop wrt. inputs
        // output is bottom: (batchsize, num_channels, height, width, depth)
        // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
        out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
        out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1] * numgroups;
        out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
        out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
        out_dim[4] = (npy_intp)((%(depth)s != -1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
        if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
        {
            PyErr_Format(PyExc_ValueError,
                         "Corr3dMM backprop wrt. inputs: impossible output shape\\n"
                         "  bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                         "  top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
                         (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                         (long int)out_dim[3], (long int)out_dim[4],
                         (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                         (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                         (long int)PyArray_DIMS(weights)[4],
                         (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                         (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
                         (long int)PyArray_DIMS(top)[4]);
            %(fail)s
        }
        break;
    default:
        PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
        %(fail)s
    }

    // Prepare output array
    int typenum;
    if ( !(*out
           && PyArray_NDIM(*out)==4
           && PyArray_IS_C_CONTIGUOUS(*out)
           && PyArray_DIMS(*out)[0]==out_dim[0]
           && PyArray_DIMS(*out)[1]==out_dim[1]
           && PyArray_DIMS(*out)[2]==out_dim[2]
           && PyArray_DIMS(*out)[3]==out_dim[3]
           && PyArray_DIMS(*out)[4]==out_dim[4]))
    {
        Py_XDECREF(*out);
        if (direction != 1) {
          typenum = PyArray_TYPE(weights);
        }
        else {
          typenum = PyArray_TYPE(bottom);
        }
        //Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
        *out = (PyArrayObject*)PyArray_ZEROS(5,
                                          out_dim,
                                          typenum,
                                          0);
        if (NULL == *out)
        {
            PyErr_Format(PyExc_RuntimeError,
                    "BaseCorr3dMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld x %%lld",
                    (long long)out_dim[0], (long long)out_dim[1],
                    (long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4]);
            %(fail)s
        }
    }

    // Call corr3dMM code
    out2 = corr3dMM(%(bottom)s, %(weights)s, %(top)s, direction,
                    dH, dW, dD, dilH, dilW, dilD, padH, padW, padD,
                    numgroups);
    if (out2==NULL){
       %(fail)s
    }
    assert (out2 == *out);

""" % dict(
            bottom=bottom,
            weights=weights,
            top=top,
            height=height,
            width=width,
            depth=depth,
            fail=sub["fail"],
            params=sub["params"],
        )
예제 #7
0
class GpuAdvancedIncSubtensor1(COp):
    """
    Implement AdvancedIncSubtensor1 on the gpu.

    """

    _f16_ok = True
    __props__ = ("inplace", "set_instead_of_inc")
    params_type = ParamsType(
        inplace=bool_t,
        set_instead_of_inc=bool_t,
        context=gpu_context_type,
        # following params are used into c_init_code_struct(),
        # as inputs are not available in that function.
        ndim_input_0=size_t,
        ndim_input_1=size_t,
        typecode_input_0=int_t,
        typecode_input_1=int_t,
    )

    def __init__(self, inplace=False, set_instead_of_inc=False):
        self.inplace = inplace
        self.set_instead_of_inc = set_instead_of_inc
        if inplace:
            self.destroy_map = {0: [0]}

    def clone_inplace(self):
        return self.__class__(inplace=True, set_instead_of_inc=self.set_instead_of_inc)

    def make_node(self, x, y, ilist):
        ctx_name = infer_context_name(x, y)
        x_ = as_gpuarray_variable(x, ctx_name)
        y_ = as_gpuarray_variable(y, ctx_name)
        ilist_ = tt.as_tensor_variable(ilist)

        assert x_.type.ndim >= y_.type.ndim

        if ilist_.type.dtype not in tt.integer_dtypes:
            raise TypeError("index must be integers")
        if ilist_.type.ndim != 1:
            raise TypeError("index must be vector")
        if x_.type.ndim == 0:
            raise TypeError("cannot index into a scalar")
        if y_.type.ndim > x_.type.ndim:
            if self.set_instead_of_inc:
                opname = "set"
            else:
                opname = "increment"
            raise TypeError(
                "cannot %s x subtensor with ndim=%s by y with ndim=%s "
                % (opname, x_.type.ndim, y_.type.ndim)
            )

        return gof.Apply(self, [x_, y_, ilist_], [x_.type()])

    def get_params(self, node):
        return self.params_type.get_params(
            self,
            context=node.outputs[0].type.context,
            # following params are used into c_init_code_struct().
            ndim_input_0=node.inputs[0].ndim,
            ndim_input_1=node.inputs[1].ndim,
            typecode_input_0=node.inputs[0].type.typecode,
            typecode_input_1=node.inputs[1].type.typecode,
        )

    # We can't use the parent version that loops on each index
    # as we also need to loop when set_instead_of_inc is True and the
    # parent doesn't loop in that case.
    def perform(self, node, inp, out_, params=None):
        # TODO opt to make this inplace
        x, y, idx = inp
        (out,) = out_

        if not self.inplace:
            x = x.copy()

        out[0] = x

        if len(idx) == 0:
            return

        # Make sure idx is not a GpuArray otherwise we cannot use its
        # content to index x and y (This is because we serve as
        # fallback for _dev20).
        if isinstance(idx, gpuarray.GpuArray):
            idx = np.asarray(idx)

        # If `y` has as many dimensions as `x`, then we want to iterate
        # jointly on `x` and `y`. Otherwise, it means `y` should be
        # broadcasted to fill all relevant rows of `x`.
        if y.ndim == x.ndim and y.shape[0] != 1:
            assert len(y) == len(idx)
            if self.set_instead_of_inc:
                for (j, i) in enumerate(idx):
                    x[i] = y[j]
            else:
                k = get_iadd(node.inputs[0], node.inputs[1])
                for (j, i) in enumerate(idx):
                    k(x[i], y[j], broadcast=True)
        else:
            if y.ndim == x.ndim:
                # First dim is always 1 in this case.
                reshaped_y = y.reshape(y.shape[1:])
            else:
                nb_dims_to_add = (x.ndim - 1) - y.ndim
                reshaped_y = y.reshape((1,) * nb_dims_to_add + y.shape)

            if self.set_instead_of_inc:
                for i in idx:
                    x[i] = reshaped_y
            else:
                k = get_iadd(node.inputs[0], node.inputs[1])
                for i in idx:
                    k(x[i], reshaped_y, broadcast=True)

    def c_headers(self):
        return [
            "<numpy_compat.h>",
            "<gpuarray/error.h>",
            "<gpuarray/array.h>",
            "<gpuarray/elemwise.h>",
            "gpuarray_helper.h",
        ]

    def c_header_dirs(self):
        return [gpuarray_helper_inc_dir()]

    def c_support_code_struct(self, node, nodename):
        return "\nGpuElemwise *iadd;\n"

    def c_init_code_struct(self, node, name, sub):
        return """
        gpuelemwise_arg args[2] = {{0}};
        args[0].name = "a";
        args[0].typecode = %(params)s->typecode_input_0;
        args[0].flags = GE_READ|GE_WRITE;
        args[1].name = "b";
        args[1].typecode = %(params)s->typecode_input_1;
        args[1].flags = GE_READ;
        iadd = GpuElemwise_new(%(params)s->context->ctx, "", "a += b",
                               2, args, %(params)s->ndim_input_1, GE_CONVERT_F16);
        if (iadd == NULL) {
          PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
          %(fail)s
        }
        """ % dict(
            params=sub["params"], fail=sub["fail"]
        )

    def c_code(self, node, name, inputs, outputs, sub):
        if node.inputs[0].ndim != node.inputs[1].ndim:
            raise NotImplementedError("This case does not have C code yet.")

        return """
        PyGpuArrayObject *row_x, *row_y;
        size_t nd = %(params)s->ndim_input_0;
        ssize_t *start = NULL, *step = NULL;
        size_t num_indices, j;
        int ret;
        int broadcast_y;

        start = (ssize_t*)malloc(nd * sizeof(ssize_t));
        step = (ssize_t*)malloc(nd * sizeof(ssize_t));
        if (start == NULL || step == NULL) {
            PyErr_NoMemory();
            %(fail)s
        }

        for (j = 0; j < nd; ++j) {
          start[j] = 0;
          step[j] = 1;
        }
        step[0] = 0;
        num_indices = PyArray_SIZE(%(ind)s);
        if (!%(params)s->inplace) {
          %(out)s = theano_try_copy(%(out)s, %(x)s);
          if (%(out)s == NULL) {
            // Exception already set
            %(fail)s
            }
        } else {
          Py_XDECREF(%(out)s);
          %(out)s = %(x)s;
          Py_INCREF(%(out)s);
        }
        if (num_indices != 0) {
          if ((num_indices - 1) > LONG_MAX) {
            PyErr_Format(PyExc_AssertionError,
                         "num_indices %%lld exceeds LONG_MAX + 1", (long long)num_indices);
            %(fail)s
          }
          broadcast_y = PyGpuArray_DIM(%(y)s, 0) == 1;
          for (j = 0; j < num_indices; j++) {
            start[0] = *(dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j);
            if (start[0] < 0)
              start[0] += PyGpuArray_DIM(%(out)s, 0);
            if (start[0] < 0 || start[0] >= PyGpuArray_DIM(%(out)s, 0)) {
               PyErr_SetString(PyExc_IndexError, "index out of bounds");
               %(fail)s;
            }
            row_x = pygpu_index(%(out)s, start, (ssize_t *)PyGpuArray_DIMS(%(out)s), step);
            if (row_x == NULL)
              %(fail)s;

            if (broadcast_y)
              start[0] = 0;
            else
              start[0] = j;

            row_y = pygpu_index(%(y)s, start, (ssize_t *)PyGpuArray_DIMS(%(y)s), step);
            if (row_y == NULL) {
              Py_DECREF(row_x);
              %(fail)s;
            }

            if (%(params)s->set_instead_of_inc) {
              ret = GpuArray_setarray(&row_x->ga, &row_y->ga);
            } else {
              void *args[2];
              args[0] = (void *)&row_x->ga;
              args[1] = (void *)&row_y->ga;
              ret = GpuElemwise_call(iadd, args, GE_BROADCAST | GE_PADSHAPE);
            }
            Py_DECREF(row_x);
            Py_DECREF(row_y);
            if (ret != GA_NO_ERROR)
              PyErr_SetString(PyExc_RuntimeError, "Failed to set/inc elements");
          }
        }

        free(start);
        free(step);
        """ % dict(
            x=inputs[0],
            y=inputs[1],
            ind=inputs[2],
            out=outputs[0],
            params=sub["params"],
            fail="""
                   {
                        free(start);
                        free(step);
                        %(fail)s
                   }
                   """
            % dict(fail=sub["fail"]),
        )

    def c_code_cache_version(self):
        return (5,)
예제 #8
0
    def test_params_type_filtering(self):
        shape_tensor5 = (1, 2, 2, 3, 2)
        size_tensor5 = (shape_tensor5[0] * shape_tensor5[1] *
                        shape_tensor5[2] * shape_tensor5[3] * shape_tensor5[4])
        random_tensor = np.random.normal(
            size=size_tensor5).reshape(shape_tensor5)

        w = ParamsType(
            a1=TensorType("int32", (False, False)),
            a2=TensorType("float64", (False, False, False, False, False)),
            a3=Generic(),
        )

        # With a value that does not match the params type.
        o = Params(
            w,
            a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11,
                                                12]]).astype("int64"),
            a2=random_tensor.astype("float32"),
            a3=2000,
        )
        # should fail (o.a1 is not int32, o.a2 is not float64)
        with pytest.raises(TypeError):
            w.filter(o, True)
        # should fail (o.a1 is not int32, o.a2 is not float64, and downcast is disallowed)
        with pytest.raises(TypeError):
            w.filter(o, False, False)
        # Should pass.
        w.filter(o, strict=False, allow_downcast=True)

        # With a value that matches the params type.
        o1 = Params(
            w,
            a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11,
                                                12]]).astype("int32"),
            a2=random_tensor.astype("float64"),
            a3=2000,
        )
        # All should pass.
        w.filter(o1, strict=True)
        w.filter(o1, strict=False, allow_downcast=False)
        w.filter(o1, strict=False, allow_downcast=True)

        # Check values_eq and values_eq_approx.
        o2 = Params(
            w,
            a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11,
                                                12]]).astype("int32"),
            a2=random_tensor.astype("float64"),
            a3=2000,
        )
        assert w.values_eq(o1, o2)
        assert w.values_eq_approx(o1, o2)

        # Check value_eq_approx.
        # NB: I don't know exactly which kind of differences is rejected by values_eq but accepted by values_eq_approx.
        # So, I just play a little with float values.
        o3 = Params(
            w,
            a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11,
                                                12]]).astype("int32"),
            a2=(random_tensor.astype("float32") * 10 / 2.2 * 2.19999999999 /
                10).astype("float64"),
            a3=2000.0 - 0.00000000000000001,
        )
        assert w.values_eq_approx(o1, o3)
예제 #9
0
class QuadraticOpFunc(COp):
    __props__ = ("a", "b", "c")
    params_type = ParamsType(a=tensor_type_0d, b=scalar_type, c=generic_type)

    def __init__(self, a, b, c):
        self.a = a
        self.b = b
        self.c = c

    def make_node(self, x):
        x = tensor.as_tensor_variable(x)
        return Apply(self, [x], [x.type()])

    def perform(self, node, inputs, output_storage, coefficients):
        x = inputs[0]
        y = output_storage[0]
        y[0] = coefficients.a * (x**2) + coefficients.b * x + coefficients.c

    def c_code_cache_version(self):
        return (1, 5)

    def c_support_code_apply(self, node, name):
        float_type = node.inputs[0].type.dtype_specs()[1]
        return """
        /* Computes: x = a*x*x + b*x + c for x in tensor. */
        int quadratic_%(name)s(PyArrayObject* tensor, %(float_type)s a, %(float_type)s b, %(float_type)s c) {
            NpyIter* iterator = NpyIter_New(tensor,
                NPY_ITER_READWRITE | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK,
                NPY_KEEPORDER, NPY_NO_CASTING, NULL);
            if(iterator == NULL) {
                PyErr_SetString(PyExc_RuntimeError, "Unable to iterate over a tensor for an elemwise operation.");
                return -1;
            }
            NpyIter_IterNextFunc* get_next = NpyIter_GetIterNext(iterator, NULL);
            char** data_ptr = NpyIter_GetDataPtrArray(iterator);
            npy_intp* stride_ptr = NpyIter_GetInnerStrideArray(iterator);
            npy_intp* innersize_ptr = NpyIter_GetInnerLoopSizePtr(iterator);
            do {
                char* data = *data_ptr;
                npy_intp stride = *stride_ptr;
                npy_intp count = *innersize_ptr;
                while(count) {
                    %(float_type)s x = *((%(float_type)s*)data);
                    *((%(float_type)s*)data) = a*x*x + b*x + c;
                    data += stride;
                    --count;
                }
            } while(get_next(iterator));
            NpyIter_Deallocate(iterator);
            return 0;
        }
        """ % {
            "name": name,
            "float_type": float_type,
        }

    def c_code(self, node, name, inputs, outputs, sub):
        return """
        %(float_type)s a = (%(float_type)s) (*(npy_float64*) PyArray_GETPTR1(%(coeff)s->a, 0)); // 0-D TensorType.
        %(float_type)s b =                                                   %(coeff)s->b;      // Scalar.
        %(float_type)s c =                 (%(float_type)s) PyFloat_AsDouble(%(coeff)s->c);     // Generic.
        Py_XDECREF(%(Y)s);
        %(Y)s = (PyArrayObject*)PyArray_EMPTY(PyArray_NDIM(%(X)s), PyArray_DIMS(%(X)s), PyArray_TYPE(%(X)s), PyArray_IS_F_CONTIGUOUS(%(X)s));
        if (PyArray_CopyInto(%(Y)s, %(X)s) != 0) {
            PyErr_SetString(PyExc_RuntimeError, "Unable to copy input into output.");
            %(fail)s
        };
        if (quadratic_%(name)s(%(Y)s, a, b, c) != 0) {
            PyErr_SetString(PyExc_RuntimeError, "Unable to compute quadratic function.");
            %(fail)s
        }
        """ % dict(
            name=name,
            coeff=sub["params"],
            fail=sub["fail"],
            X=inputs[0],
            Y=outputs[0],
            float_type=node.inputs[0].type.c_element_type(),
        )
예제 #10
0
 def test_hash_and_eq_params(self):
     wp1 = ParamsType(
         a=Generic(),
         array=TensorType("int64", (False, )),
         floatting=Scalar("float64"),
         npy_scalar=TensorType("float64", tuple()),
     )
     wp2 = ParamsType(
         a=Generic(),
         array=TensorType("int64", (False, )),
         floatting=Scalar("float64"),
         npy_scalar=TensorType("float64", tuple()),
     )
     w1 = Params(
         wp1,
         a=1,
         array=np.asarray([1, 2, 4, 5, 7]),
         floatting=-4.5,
         npy_scalar=np.asarray(12),
     )
     w2 = Params(
         wp2,
         a=1,
         array=np.asarray([1, 2, 4, 5, 7]),
         floatting=-4.5,
         npy_scalar=np.asarray(12),
     )
     assert w1 == w2
     assert not (w1 != w2)
     assert hash(w1) == hash(w2)
     # Changing attributes names only (a -> other_name).
     wp2_other = ParamsType(
         other_name=Generic(),
         array=TensorType("int64", (False, )),
         floatting=Scalar("float64"),
         npy_scalar=TensorType("float64", tuple()),
     )
     w2 = Params(
         wp2_other,
         other_name=1,
         array=np.asarray([1, 2, 4, 5, 7]),
         floatting=-4.5,
         npy_scalar=np.asarray(12),
     )
     assert w1 != w2
     # Changing attributes values only (now a=2).
     w2 = Params(
         wp2,
         a=2,
         array=np.asarray([1, 2, 4, 5, 7]),
         floatting=-4.5,
         npy_scalar=np.asarray(12),
     )
     assert w1 != w2
     # Changing NumPy array values (5 -> -5).
     w2 = Params(
         wp2,
         a=1,
         array=np.asarray([1, 2, 4, -5, 7]),
         floatting=-4.5,
         npy_scalar=np.asarray(12),
     )
     assert w1 != w2