def burn(): sz = 128 img_shp = [sz, sz, sz, sz] kern_shp = [sz // 2, sz, 3, 3] out_shp = get_conv_output_shape(img_shp, kern_shp, "valid", (1, 1)) img = tensor4("img") kern = tensor4("kern") out = tensor4("out") def rand(shp): return np.random.rand(*shp).astype(config.floatX) img = aesara.shared(rand(img_shp)) kern = aesara.shared(rand(kern_shp)) out = aesara.shared(rand(out_shp)) # beta 1 is needed to force the reuse of out, otherwise, it is # replaced by a GpuAllocEmpty o1 = dnn._dnn_conv(img, kern, conv_mode="conv", out=out, beta=1.0) mode = aesara.compile.get_default_mode().including( "local_remove_all_assert") f = aesara.function([], [o1], mode=mode) aesara.printing.debugprint(f) print("Start computation") for i in range(10000): f.fn() print("Computation stopped")
def infer_shape(self, fgraph, node, input_shape): imshp = input_shape[0] kshp = input_shape[1] res = get_conv_output_shape( imshp, kshp, self.border_mode, self.subsample, self.filter_dilation ) return [res]
def array_like_conv_output(self, inputs_shape, filters_shape, border_mode, subsample, dilation, dtype): # Return a random array with inferred convolution output shape. out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation) out_shp = assert_conv_shape(out_shp) return np.random.random(out_shp).astype(dtype)
def get_if_valid_conv_output_shape(case_tuple): # Filter function to keep only cases that produce valid convolution output shapes. out_shp = get_conv_output_shape( case_tuple[0], # input shape case_tuple[1], # filter shape case_tuple[4], # border mode case_tuple[2], # subsample case_tuple[3], ) # dilation try: return assert_conv_shape(out_shp) except ValueError: return False
def local_conv2d_gradinputs_cpu(fgraph, node): if ( not isinstance(node.op, AbstractConv2d_gradInputs) or node.inputs[0].dtype == "float16" ): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or not isinstance( topgrad.type, TensorType ): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return None if node.op.num_groups > 1 or node.op.unshared: return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == "valid" and node.op.subsample != (1, 1): # The op don't support that anymore. return False # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = "valid" if not node.op.border_mode == "full": mode = "full" filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[ 2: ] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp( imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint="bprop inputs", ) din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = aesara.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(fgraph, node): if ( not isinstance(node.op, AbstractConv2d_gradWeights) or node.inputs[0].dtype == "float16" ): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return if node.op.num_groups > 1 or node.op.unshared: return None if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)): return None dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[ 2: ] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == "valid": (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == "full": (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError("Only [full,valid] modes are currently supported.") # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp( imshp, kshp, nkern, bsize, 1, 1, output_mode="valid", unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint="bprop weights", ) res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == "valid": res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = aesara.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def local_abstractconv_cudnn_alt(fgraph, node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
args.input_shape, args.filter_shape, args.subsample, args.dilation, args.border_mode, args.conv_mode, args.alpha, args.beta, ) if args.print_infos: CheckDnn.print_infos(count_tests=False) print("======================") print("Running", test, algo, dtype, precision, *parameters) if test == FWD: tests.run_conv_fwd(algo, dtype, precision, parameters) expected_output_shape = get_conv_output_shape( args.input_shape, args.filter_shape, args.border_mode, args.subsample, args.dilation, ) elif test == BWD_FILTER: tests.run_conv_gradweight(algo, dtype, precision, parameters) expected_output_shape = args.filter_shape elif test == BWD_DATA: tests.run_conv_gradinput(algo, dtype, precision, parameters) expected_output_shape = args.input_shape print("Computed shape:", expected_output_shape) print("... OK")