Exemple #1
0
    def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1)):
        inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
        filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
        dCdH_shape = [dCdH_shape[i] for i in (0, 3, 1, 2)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        dCdH_val = np.random.random(dCdH_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        dCdH = gpuarray_shared_constructor(dCdH_val)
        shape = gpuarray_shared_constructor(np.array(filters_shape[2:]))

        if subsample == (1, 1):
            conv_ref = CorrMM_gradWeights(subsample=subsample)(
                ref_cast(inputs), ref_cast(dCdH)
            )
            conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(inputs, dCdH)
        else:
            conv_ref = CorrMM_gradWeights(subsample=subsample)(
                ref_cast(inputs), ref_cast(dCdH), shape=shape
            )
            conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(
                inputs, dCdH, shape=shape
            )

        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
Exemple #2
0
    def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)):
        inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)

        bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
        bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
        bottom_depth = (inputs_shape[4] - 1) * subsample[2] + filters_shape[4]
        bottom_shape = gpuarray_shared_constructor(
            np.array([bottom_height, bottom_width, bottom_depth]))

        if subsample == (1, 1, 1):
            conv_ref = Corr3dMMGradInputs(subsample=subsample)(
                kern=ref_cast(filters), topgrad=ref_cast(inputs))
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=filters, topgrad=inputs)
        else:
            conv_ref = Corr3dMMGradInputs(subsample=subsample)(
                kern=ref_cast(filters),
                topgrad=ref_cast(inputs),
                shape=bottom_shape)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=filters, topgrad=inputs, shape=bottom_shape)

        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
Exemple #3
0
    def run_conv_valid(
        self,
        inputs_shape,
        filters_shape,
        border_mode="valid",
        filter_dilation=(1, 1),
        subsample=(1, 1),
        unshared=False,
        verify_grad=False,
    ):
        inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
        if unshared:
            filters_shape = [filters_shape[i] for i in (0, 1, 2, 5, 3, 4)]
        else:
            filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        filters_val = np.random.random(filters_shape).astype(config.floatX)

        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)

        conv_ref = CorrMM(
            border_mode=border_mode,
            filter_dilation=filter_dilation,
            subsample=subsample,
            unshared=unshared,
        )(ref_cast(inputs), ref_cast(filters))
        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)

        conv = GpuCorrMM(
            border_mode=border_mode,
            filter_dilation=filter_dilation,
            subsample=subsample,
            unshared=unshared,
        )(inputs, filters)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

        if verify_grad:
            utt.verify_grad(
                GpuCorrMM(
                    border_mode=border_mode,
                    filter_dilation=filter_dilation,
                    subsample=subsample,
                    unshared=unshared,
                ),
                [inputs_val, filters_val],
                mode=mode_with_gpu,
            )
Exemple #4
0
 def run_gradweight_runtime_algorithm(algo):
     with aesara.config.change_flags(dnn__conv__algo_bwd_filter=algo):
         inputs = TensorType(dtype, _broadcastable)()
         filters = TensorType(dtype, _broadcastable)()
         conv = dnn_conv(
             img=inputs,
             kerns=filters,
             algo=algo,
             precision=dtype,
             subsample=unit_shape,
             dilation=unit_shape,
         )
         grad_w = aesara.gradient.grad(conv.sum(), [filters])
         f = aesara.function([inputs, filters],
                             grad_w,
                             mode=mode_with_gpu)
         assert 1 == len([
             node for node in f.maker.fgraph.apply_nodes
             if isinstance(node.op, GpuDnnConvGradW)
         ])
         assert not any(
             isinstance(node.op, GpuDnnConv)
             for node in f.maker.fgraph.apply_nodes)
         assert not any(
             isinstance(node.op, GpuDnnConvGradI)
             for node in f.maker.fgraph.apply_nodes)
         if self.ndim == 3:
             flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
         else:
             flipped_filters = filters[:, :, ::-1, ::-1]
         conv_ref = self.cpu_conv_class(subsample=unit_shape)(
             ref_cast(inputs), flipped_filters)
         grad_w_ref = aesara.gradient.grad(conv_ref.sum(), [filters])
         f_ref = aesara.function([inputs, filters],
                                 grad_w_ref,
                                 mode="FAST_RUN")
         runtime_shapes = self.runtime_shapes
         if algo in ("time_once", "guess_once"):
             runtime_shapes = [list(runtime_shapes[0])]
             runtime_shapes[0][0] = 5
         for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
             print("Shapes:", inputs_shape, filters_shape)
             for i in range(ntimes):
                 inputs_val = np.random.random(inputs_shape).astype(
                     dtype)
                 filters_val = np.random.random(filters_shape).astype(
                     dtype)
                 gpu_res = f(inputs_val, filters_val)
                 cpu_res = f_ref(inputs_val, filters_val)
                 utt.assert_allclose(cpu_res, np.asarray(gpu_res))
 def run_fwd_runtime_algorithm(algo):
     inputs = theano.tensor.TensorType(dtype, _broadcastable)()
     filters = theano.tensor.TensorType(dtype, _broadcastable)()
     # Scale down the input values to prevent very large absolute errors
     # due to float rounding
     lower_inputs = inputs / 10
     lower_filters = filters / 10
     conv = dnn_conv(
         img=lower_inputs,
         kerns=lower_filters,
         algo=algo,
         precision=dtype,
         subsample=unit_shape,
         dilation=unit_shape,
     )
     f = theano.function([inputs, filters], conv, mode=mode_with_gpu)
     if self.ndim == 3:
         flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
     else:
         flipped_filters = lower_filters[:, :, ::-1, ::-1]
     conv_ref = self.cpu_conv_class(subsample=unit_shape)(
         ref_cast(lower_inputs), flipped_filters)
     f_ref = theano.function([inputs, filters],
                             conv_ref,
                             mode="FAST_RUN")
     runtime_shapes = self.runtime_shapes
     if algo in ("time_once", "guess_once"):
         runtime_shapes = [list(runtime_shapes[0])]
         runtime_shapes[0][0] = 5
     for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
         print("Shapes:", inputs_shape, filters_shape)
         for i in range(ntimes):
             inputs_val = np.random.random(inputs_shape).astype(dtype)
             filters_val = np.random.random(filters_shape).astype(dtype)
             gpu_res = np.asarray(f(inputs_val, filters_val))
             cpu_res = f_ref(inputs_val, filters_val)
             self.scale_numpy_arrays_inplace(cpu_res, gpu_res, 1)
             utt.assert_allclose(cpu_res, gpu_res)
    def run_conv_gradweight(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        if beta == 0:
            filters_val = None
        else:
            filters_val = np.random.random(filters_shape).astype(dtype)
            filters_val /= 10
        topgrad_val = self.array_like_conv_output(
            inputs_shape, filters_shape, border_mode, subsample, dilation, dtype
        )

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        inputs_val /= 10
        topgrad_val /= 10

        inputs = theano.shared(inputs_val)
        topgrad = theano.shared(topgrad_val)

        # Compile a theano function for the cuDNN implementation
        grad_w = dnn_gradweight(
            inputs,
            topgrad,
            filters_shape,
            alpha=alpha,
            beta=beta,
            out=filters_val,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )

        f = theano.function([], grad_w, mode=mode_with_gpu)

        # Compile a theano function for the reference implementation
        grad_w_ref = self.cpu_gradweight_class(
            border_mode=border_mode, subsample=subsample, filter_dilation=dilation
        )(ref_cast(inputs), ref_cast(topgrad), filters_shape[2:])
        if conv_mode == "conv":
            if inputs.ndim == 5:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1, ::-1]
            else:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1]
        f_ref = theano.function([], grad_w_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_filter_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * filters_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
    def run_conv_fwd(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        filters_val = np.random.random(filters_shape).astype(dtype)

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = theano.shared(inputs_val)
        filters = theano.shared(filters_val)

        if beta == 0:
            out = None
        else:
            out = self.array_like_conv_output(
                inputs_shape, filters_shape, border_mode, subsample, dilation, dtype
            )
            out /= 10
        # Compile a theano function for the cuDNN implementation
        conv = dnn_conv(
            img=inputs,
            kerns=filters,
            alpha=alpha,
            beta=beta,
            out=out,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == "conv":
            if inputs.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile a theano function for the reference implementation
        conv_ref = self.cpu_conv_class(
            border_mode=border_mode, subsample=subsample, filter_dilation=dilation
        )(ref_cast(inputs), flipped_filters)
        f_ref = theano.function([], conv_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_fwd_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * out
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
Exemple #8
0
    def run_conv_gradinput(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        if beta == 0:
            inputs_val = None
        else:
            inputs_val = np.random.random(inputs_shape).astype(dtype)
            inputs_val /= 10
        filters_val = np.random.random(filters_shape).astype(dtype)
        topgrad_val = self.array_like_conv_output(inputs_shape, filters_shape,
                                                  border_mode, subsample,
                                                  dilation, dtype)

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        filters_val /= 10
        topgrad_val /= 10

        filters = aesara.shared(filters_val)
        topgrad = aesara.shared(topgrad_val)

        # Compile an Aesara function for the cuDNN implementation
        grad_i = dnn_gradinput(
            filters,
            topgrad,
            inputs_shape,
            alpha=alpha,
            beta=beta,
            out=inputs_val,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )

        f = aesara.function([], grad_i, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == "conv":
            if filters.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile an Aesara function for the reference implementation
        grad_i_ref = self.cpu_gradinput_class(
            border_mode=border_mode,
            subsample=subsample,
            filter_dilation=dilation)(ref_cast(flipped_filters),
                                      ref_cast(topgrad), inputs_shape[2:])
        f_ref = aesara.function([], grad_i_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_data_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * inputs_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)