Ejemplo n.º 1
0
    def run_conv_gradinput(self, algo, dtype, precision, parameters):
        inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters

        if beta == 0:
            inputs_val = None
        else:
            inputs_val = np.random.random(inputs_shape).astype(dtype)
            inputs_val /= 10
        filters_val = np.random.random(filters_shape).astype(dtype)
        topgrad_val = self.array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype)

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        filters_val /= 10
        topgrad_val /= 10

        filters = theano.shared(filters_val)
        topgrad = theano.shared(topgrad_val)

        # Compile a theano function for the cuDNN implementation
        grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
                               border_mode=border_mode, subsample=subsample, dilation=dilation, conv_mode=conv_mode,
                               algo=algo, precision=precision)

        f = theano.function([], grad_i, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == 'conv':
            if filters.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile a theano function for the reference implementation
        grad_i_ref = self.cpu_gradinput_class(border_mode=border_mode,
                                              subsample=subsample,
                                              filter_dilation=dilation
                                              )(ref_cast(flipped_filters), ref_cast(topgrad), inputs_shape[2:])
        f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_data_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * inputs_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
Ejemplo n.º 2
0
 def run_gradweight_runtime_algorithm(algo):
     theano.config.dnn.conv.algo_bwd_filter = algo
     inputs = theano.tensor.TensorType(dtype, _broadcastable)()
     filters = theano.tensor.TensorType(dtype, _broadcastable)()
     conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype,
                     subsample=unit_shape, dilation=unit_shape)
     grad_w = theano.tensor.grad(conv.sum(), [filters])
     f = theano.function([inputs, filters], grad_w, mode=mode_with_gpu)
     assert 1 == len([node for node in f.maker.fgraph.apply_nodes if isinstance(node.op, GpuDnnConvGradW)])
     assert not any(isinstance(node.op, GpuDnnConv) for node in f.maker.fgraph.apply_nodes)
     assert not any(isinstance(node.op, GpuDnnConvGradI) for node in f.maker.fgraph.apply_nodes)
     if self.ndim == 3:
         flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
     else:
         flipped_filters = filters[:, :, ::-1, ::-1]
     conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters)
     grad_w_ref = theano.tensor.grad(conv_ref.sum(), [filters])
     f_ref = theano.function([inputs, filters], grad_w_ref, mode='FAST_RUN')
     runtime_shapes = self.runtime_shapes
     if algo in ('time_once', 'guess_once'):
         runtime_shapes = [list(runtime_shapes[0])]
         runtime_shapes[0][0] = 5
     for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
         print('Shapes:', inputs_shape, filters_shape)
         for i in range(ntimes):
             inputs_val = np.random.random(inputs_shape).astype(dtype)
             filters_val = np.random.random(filters_shape).astype(dtype)
             gpu_res = f(inputs_val, filters_val)
             cpu_res = f_ref(inputs_val, filters_val)
             utt.assert_allclose(cpu_res, np.asarray(gpu_res))
Ejemplo n.º 3
0
 def run_fwd_runtime_algorithm(algo):
     inputs = theano.tensor.TensorType(dtype, _broadcastable)()
     filters = theano.tensor.TensorType(dtype, _broadcastable)()
     # Scale down the input values to prevent very large absolute errors
     # due to float rounding
     lower_inputs = inputs / 10
     lower_filters = filters / 10
     conv = dnn_conv(img=lower_inputs, kerns=lower_filters, algo=algo, precision=dtype,
                     subsample=unit_shape, dilation=unit_shape)
     f = theano.function([inputs, filters], conv, mode=mode_with_gpu)
     if self.ndim == 3:
         flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
     else:
         flipped_filters = lower_filters[:, :, ::-1, ::-1]
     conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(lower_inputs), flipped_filters)
     f_ref = theano.function([inputs, filters], conv_ref, mode='FAST_RUN')
     runtime_shapes = self.runtime_shapes
     if algo in ('time_once', 'guess_once'):
         runtime_shapes = [list(runtime_shapes[0])]
         runtime_shapes[0][0] = 5
     for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
         print('Shapes:', inputs_shape, filters_shape)
         for i in range(ntimes):
             inputs_val = np.random.random(inputs_shape).astype(dtype)
             filters_val = np.random.random(filters_shape).astype(dtype)
             gpu_res = np.asarray(f(inputs_val, filters_val))
             cpu_res = f_ref(inputs_val, filters_val)
             self.scale_numpy_arrays_inplace(cpu_res, gpu_res, 1)
             utt.assert_allclose(cpu_res, gpu_res)
Ejemplo n.º 4
0
    def run_conv_fwd(self, algo, dtype, precision, parameters):
        inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        filters_val = np.random.random(filters_shape).astype(dtype)

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = theano.shared(inputs_val)
        filters = theano.shared(filters_val)

        if beta == 0:
            out = None
        else:
            out = self.array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype)
            out /= 10
        # Compile a theano function for the cuDNN implementation
        conv = dnn_conv(img=inputs, kerns=filters, alpha=alpha, beta=beta, out=out, border_mode=border_mode,
                        subsample=subsample, dilation=dilation, conv_mode=conv_mode, algo=algo, precision=precision)
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == 'conv':
            if inputs.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile a theano function for the reference implementation
        conv_ref = self.cpu_conv_class(border_mode=border_mode,
                                       subsample=subsample,
                                       filter_dilation=dilation)(ref_cast(inputs), flipped_filters)
        f_ref = theano.function([], conv_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_fwd_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * out
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
Ejemplo n.º 5
0
    def run_conv_gradweight(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        if beta == 0:
            filters_val = None
        else:
            filters_val = np.random.random(filters_shape).astype(dtype)
            filters_val /= 10
        topgrad_val = self.array_like_conv_output(
            inputs_shape, filters_shape, border_mode, subsample, dilation, dtype
        )

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        inputs_val /= 10
        topgrad_val /= 10

        inputs = theano.shared(inputs_val)
        topgrad = theano.shared(topgrad_val)

        # Compile a theano function for the cuDNN implementation
        grad_w = dnn_gradweight(
            inputs,
            topgrad,
            filters_shape,
            alpha=alpha,
            beta=beta,
            out=filters_val,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )

        f = theano.function([], grad_w, mode=mode_with_gpu)

        # Compile a theano function for the reference implementation
        grad_w_ref = self.cpu_gradweight_class(
            border_mode=border_mode, subsample=subsample, filter_dilation=dilation
        )(ref_cast(inputs), ref_cast(topgrad), filters_shape[2:])
        if conv_mode == "conv":
            if inputs.ndim == 5:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1, ::-1]
            else:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1]
        f_ref = theano.function([], grad_w_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_filter_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * filters_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)