def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) outshp = N.hstack((nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack((nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def f_conv(self, x, spec, in_dim, weight_name): layer_type, dims = spec num_filters = dims[0] filter_size = (dims[1], dims[1]) stride = (dims[2], dims[2]) bm = 'full' if 'convf' in layer_type else 'valid' num_channels = in_dim[0] W = self.weight(self.rand_init_conv( (num_filters, num_channels) + filter_size), weight_name) if stride != (1, 1): f = GpuCorrMM(subsample=stride, border_mode=bm, pad=(0, 0)) y = f(gpu_contiguous(x), gpu_contiguous(W)) else: assert self.p.batch_size == self.p.valid_batch_size y = conv2d(x, W, image_shape=(2*self.p.batch_size, ) + in_dim, filter_shape=((num_filters, num_channels) + filter_size), border_mode=bm) output_size = ((num_filters,) + ConvOp.getOutputShape(in_dim[1:], filter_size, stride, bm)) return y, output_size
def f_conv(self, x, spec, in_dim, weight_name): layer_type, dims = spec num_filters = dims[0] filter_size = (dims[1], dims[1]) stride = (dims[2], dims[2]) bm = 'full' if 'convf' in layer_type else 'valid' num_channels = in_dim[0] W = self.weight( self.rand_init_conv((num_filters, num_channels) + filter_size), weight_name) if stride != (1, 1): f = GpuCorrMM(subsample=stride, border_mode=bm, pad=(0, 0)) y = f(gpu_contiguous(x), gpu_contiguous(W)) else: assert self.p.batch_size == self.p.valid_batch_size y = conv2d(x, W, image_shape=(2 * self.p.batch_size, ) + in_dim, filter_shape=((num_filters, num_channels) + filter_size), border_mode=bm) output_size = ( (num_filters, ) + ConvOp.getOutputShape(in_dim[1:], filter_size, stride, bm)) return y, output_size
def get_dim(self, name): if name == 'input_': return (self.num_channels, ) + self.image_size if name == 'output': return ((self.num_filters, ) + ConvOp.getOutputShape(self.image_size, self.filter_size, self.step, self.border_mode)) return super(Convolutional, self).get_dim(name)
def get_dim(self, name): if name == 'input_': return (self.num_channels,) + self.image_size if name == 'output': return ((self.num_filters,) + ConvOp.getOutputShape(self.image_size, self.filter_size, self.step, self.border_mode)) return super(Convolutional, self).get_dim(name)
def col_shape(self): rows_cols = ConvOp.getOutputShape( self._img_shape[2:], self._filters_shape[2:], self._subsample, self._border_mode) rval = (self._filters_shape[0],)+tuple(rows_cols) return rval
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) ## manual implementation # check first stage padimg = imgval if conv_mode == 'full': padimg_shp = N.array( imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1])) padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp]) padimg[:, :, kshp[0] - 1:-kshp[0] + 1, kshp[1] - 1:-kshp[1] + 1] = imgval outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) if validate: # causes an atexit problem from scipy.signal.sigtools import _convolve2d from scipy.signal.signaltools import _valfrommode, _bvalfromboundary val = _valfrommode(conv_mode) bval = _bvalfromboundary('fill') for b in range(bsize): # loop over batches for n in range(nkern): # loop over filters for i in range(imshp[0]): # loop over input feature maps outval[b,n,...] += _convolve2d(\ imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]] ntot += time.time() - time1 # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ #[:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 if conv_op_py: time1 = time.time() for i in range(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ #[:,:,0::ss[0],0::ss[1]] tpytot += time.time() - time1 assert (N.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 if validate: temp = N.abs(outval - hidval2) assert (temp < 1e-5).all() if validate and conv_op_py: temp = N.abs(outval - hidval3) assert (temp < 1e-5).all() imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ #[:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def local_conv2d_gradinputs_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradInputs) or node.inputs[0].dtype == "float16"): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return None if node.op.num_groups > 1 or node.op.unshared: return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == "valid" and node.op.subsample != (1, 1): # The op don't support that anymore. return False # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = "valid" if not node.op.border_mode == "full": mode = "full" filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp( imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint="bprop inputs", ) din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradWeights) or node.inputs[0].dtype == "float16"): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return if node.op.num_groups > 1 or node.op.unshared: return None if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)): return None dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == "valid": (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == "full": (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( "Only [full,valid] modes are currently supported.") # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp( imshp, kshp, nkern, bsize, 1, 1, output_mode="valid", unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint="bprop weights", ) res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == "valid": res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = ConvOp.getOutputShape(op_imshp[2:], op_kshp[2:], node.op.subsample, node.op.border_mode) fulloutshp = ConvOp.getOutputShape(op_imshp[2:], op_kshp[2:], (1, 1), node.op.border_mode) nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = ConvOp.getOutputShape(op_imshp[2:], op_kshp[2:], node.op.subsample, node.op.border_mode) fulloutshp = ConvOp.getOutputShape(op_imshp[2:], op_kshp[2:], (1, 1), node.op.border_mode) newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) return [res]
def exec_multilayer_conv_nnet_old( conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0, ): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, xrange(len(nkerns))): if do_print: print("************* layer %i ***************" % n_layer) print(conv_mode, ss, n_layer, kshp, nkern) # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) # manual implementation # check first stage padimg = imgval if conv_mode == "full": padimg_shp = N.array(imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1])) padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp]) padimg[:, :, kshp[0] - 1 : -kshp[0] + 1, kshp[1] - 1 : -kshp[1] + 1] = imgval outshp = N.hstack((nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) if validate: # causes an atexit problem from scipy.signal.sigtools import _convolve2d from scipy.signal.signaltools import _valfrommode, _bvalfromboundary val = _valfrommode(conv_mode) bval = _bvalfromboundary("fill") for b in xrange(bsize): # loop over batches for n in xrange(nkern): # loop over filters for i in xrange(imshp[0]): # loop over input feature maps outval[b, n, ...] += _convolve2d(imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0)[ 0 :: ss[0], 0 :: ss[1] ] ntot += time.time() - time1 # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)( inputs4, kerns4 ) else: conv_op = ConvOp( imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose, )(inputs4, kerns4) # l1shp = N.hstack((nkern, # ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) time1 = time.time() for i in xrange(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 if conv_op_py: time1 = time.time() for i in xrange(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ # [:,:,0::ss[0],0::ss[1]] tpytot += time.time() - time1 assert (N.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 if validate: temp = N.abs(outval - hidval2) assert (temp < 1e-5).all() if validate and conv_op_py: temp = N.abs(outval - hidval3) assert (temp < 1e-5).all() imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot