def test_compare_1D_and_2D_upsampling_values(self): """Compare 1D and 2D upsampling This method verifies the bilinear upsampling done by using 1D and 2D kernels will generate the same result. """ # checking upsampling with ratio 5 input_x = np.random.rand(5, 4, 6, 7).astype(theano.config.floatX) mat_1D = bilinear_upsampling(input=input_x, ratio=5, batch_size=5, num_input_channels=4, use_1D_kernel=True) mat_2D = bilinear_upsampling(input=input_x, ratio=5, batch_size=5, num_input_channels=4, use_1D_kernel=False) f_1D = theano.function([], mat_1D, mode=self.compile_mode) f_2D = theano.function([], mat_2D, mode=self.compile_mode) utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06) # checking upsampling with ratio 8 input_x = np.random.rand(12, 11, 10, 7).astype(theano.config.floatX) mat_1D = bilinear_upsampling(input=input_x, ratio=8, batch_size=12, num_input_channels=11, use_1D_kernel=True) mat_2D = bilinear_upsampling(input=input_x, ratio=8, batch_size=12, num_input_channels=11, use_1D_kernel=False) f_1D = theano.function([], mat_1D, mode=self.compile_mode) f_2D = theano.function([], mat_2D, mode=self.compile_mode) utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
def get_output(self, input): #apply convolution if self.stride >= 1: conv = conv2d(input=input, filters=self.W, subsample=(self.stride, self.stride), border_mode='half') #same size else: #Not proper deconv: conv + upsampling try: ratio = int(1. / self.stride) except: print '\n \t why is ratio 0? \n \t WARNING \n \t assuming stride=0.5 \n' ratio = 2 up = bilinear_upsampling(input, ratio) conv = conv2d(input=up, filters=self.W, subsample=(1, 1), border_mode='half') #apply activation if self.activation is None: self.output = conv + self.b.dimshuffle('x', 0, 'x', 'x') else: self.output = self.activation(conv + self.b.dimshuffle('x', 0, 'x', 'x')) return self.output
def __init__(self, input, ratio, use_1D_kernel=False): self.input = input self.x = input self.output = abstract_conv.bilinear_upsampling( input=self.x, ratio=ratio, use_1D_kernel=use_1D_kernel) self.params = []
def get_classmap(model, X, nb_classes, batch_size, num_input_channels, ratio): inc = model.layers[0].input conv6 = model.layers[-4].output conv6_resized = absconv.bilinear_upsampling(conv6, ratio, batch_size=batch_size, num_input_channels=num_input_channels) WT = model.layers[-1].W.T conv6_resized = K.reshape(conv6_resized, (-1, num_input_channels, 224 * 224)) classmap = K.dot(WT, conv6_resized).reshape((-1, nb_classes, 224, 224)) get_cmap = K.function([inc], classmap) return get_cmap([X])
def _forward(self): if theano.config.device.startswith('gpu'): from theano.tensor.nnet.abstract_conv import bilinear_upsampling else: raise AssertionError('Bilinear interpolation requires GPU and cuDNN.') inpt = T.reshape(self.inpt, (self.inpt_depth, self.n_inpt, self.inpt_height, self.inpt_width)) pre_res = bilinear_upsampling(input=inpt, ratio=self.up_factor) shuffle_res = pre_res.dimshuffle((2, 3, 0, 1)) res = self._bilinear_upsampling_1D(inpt=shuffle_res, ratio=self.up_factor) self.output = res.dimshuffle((2, 3, 0, 1)) self.output = T.shape_padaxis(self.output, axis=0) self.output = T.unbroadcast(self.output, 0)
def apply(self, x): if x.ndim == 5: out = x.reshape( (x.shape[0] * x.shape[1], x.shape[2], x.shape[3], x.shape[4])) else: out = x out = bilinear_upsampling(out, ratio=self.ratio, use_1D_kernel=self.use_1D_kernel) if x.ndim == 5: out = out.reshape((x.shape[0], x.shape[1], x.shape[2], out.shape[-2], out.shape[-1])) return out
def test_bilinear_upsampling_reshaping(self): # Test bilinear upsampling without giving shape information # This method tests the bilinear_upsampling method # without giving batch_size and num_input_channels # upsampling for a ratio of two input_x = np.array([[[[1, 2], [3, 4]]]], dtype=theano.config.floatX) for ratio in [2, 3]: for use_1D_kernel in [True, False]: bilin_mat = bilinear_upsampling(input=input_x, ratio=ratio, batch_size=None, num_input_channels=None, use_1D_kernel=use_1D_kernel) f = theano.function([], bilin_mat, mode=self.compile_mode) up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio) utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
def get_layer_output_function(layer_name, DEFAULT_PAD = 1, bilinear_up = None): vgg_net = build_model(DEFAULT_PAD = DEFAULT_PAD) image_batch = T.tensor4('images') # expected shape: (None, 3, 224, 224) mean_values = vgg_info['mean value'].astype(theano.config.floatX) image_batch_subtracted = image_batch[:,::-1] - mean_values[None, :, None, None] output_tensor = lasagne.layers.get_output(vgg_net[layer_name], inputs = image_batch_subtracted ) if bilinear_up is not None: output_tensor = bilinear_upsampling(output_tensor, bilinear_up, use_1D_kernel=False) func = theano.function([image_batch], output_tensor) return func
def _forward(self): if theano.config.device.startswith('gpu'): from theano.tensor.nnet.abstract_conv import bilinear_upsampling else: raise AssertionError( 'Bilinear interpolation requires GPU and cuDNN.') inpt = T.reshape( self.inpt, (self.inpt_depth, self.n_inpt, self.inpt_height, self.inpt_width)) pre_res = bilinear_upsampling(input=inpt, ratio=self.up_factor) shuffle_res = pre_res.dimshuffle((2, 3, 0, 1)) res = self._bilinear_upsampling_1D(inpt=shuffle_res, ratio=self.up_factor) self.output = res.dimshuffle((2, 3, 0, 1)) self.output = T.shape_padaxis(self.output, axis=0) self.output = T.unbroadcast(self.output, 0)
def test_bilinear_upsampling_1D(self): """Test bilinear upsampling using 1D kernels This method tests the bilinear_upsampling method when using 1D kernels for some upsampling ratios. """ # upsampling for a ratio of two input_x = np.array([[[[1, 2], [3, 4]]]], dtype=theano.config.floatX) for ratio in [2, 3, 4, 5, 6, 7, 8, 9]: bilin_mat = bilinear_upsampling(input=input_x, ratio=ratio, batch_size=1, num_input_channels=1, use_1D_kernel=True) f = theano.function([], bilin_mat, mode=self.compile_mode) up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio) utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
def get_classmap(model, X, ind): global nametoid inc = model.layers[0].input name = 'convolution2d_' + str(ind + 1) #namew = 'convolution2d_'+str(2*(ind+1)) layerid = nametoid[name] #wlayerid = nametoid[namew] conv6 = model.layers[layerid].output #wei = model.layers[wlayerid].W #conv6 = conv6*wei #conv6_resized = K.resize_images(conv6,15,15,'th') conv6_resized = absconv.bilinear_upsampling(conv6, 15, batch_size=1, num_input_channels=1) conv6_resized = K.spatial_2d_padding(conv6_resized, padding=(1, 1), dim_ordering='th') get_cmap = K.function([inc], conv6_resized) return get_cmap([X])
def get_output_for(self, input, **kwargs): upscaled = bilinear_upsampling(input=input, ratio=self.radio) #upscaled = input * 2 return upscaled
def __init__(self, img_batch, normLoc, batch_size=16, mnist_size=28, channels=1, depth=3, minRadius=4, sensorBandwidth=8): """ Recurrent Attention Model from "Recurrent Models of Visual Attention" (Mnih + 2014) Parameters ---------- :type layer_id: str :param layer_id: id of this layer :type img_batch: a 2D variable, each row an mnist image :param img_batch: model inputs :type normLoc: variable with size (batch_size x 2) :param normLoc: model inputs :type batch_size: int :param batch_size: batch size :type mnist_size: int :param mnist_size: length of the mnist square (usually 28) :type channels: int :param channels: channels of mnist (usually 1) :type depth: int :param depth: channels of zoom (3 in this paper) :type minRadius: int :param minRadius: minimum radius of the glimpse :type sensorBandwidth: int :param sensorBandwidth: length of the glimpse square :return self.zooms: (batch, depth, channel, height, width) """ self.batch_size = batch_size self.mnist_size = mnist_size self.channels = channels self.depth = depth self.minRadius = minRadius self.sensorBandwidth = sensorBandwidth # from [-1.0, 1.0] -> [0, 28] loc = ((normLoc + 1) / 2) * mnist_size loc = T.cast(loc, 'int32') # img with size (batch, channels, height, width) img = T.reshape(img_batch, (batch_size, channels, mnist_size, mnist_size)) self.img = img # with size (batch, 1, h, w) zooms = [] # zooms of all the images in batch maxRadius = minRadius * (2 ** (depth - 1)) # radius of the largest zoom offset = maxRadius # zero-padding the batch to (batch, channels, h + 2R, w + 2R) img = T.concatenate((T.zeros((batch_size, channels, maxRadius, mnist_size)), img), axis=2) img = T.concatenate((img, T.zeros((batch_size, channels, maxRadius, mnist_size))), axis=2) img = T.concatenate((T.zeros((batch_size, channels, mnist_size + 2 * maxRadius, maxRadius)), img), axis=3) img = T.concatenate((img, T.zeros((batch_size, channels, mnist_size + 2 * maxRadius, maxRadius))), axis=3) img = T.cast(img, dtype=theano.config.floatX) for k in xrange(batch_size): imgZooms = [] # zoom for a single image # one_img with size (channels, 2R + size, 2R + size), channels=1 here one_img = img[k, :, :, :] for i in xrange(depth): # r = minR, 2 * minR, ..., (2^(depth - 1)) * minR r = minRadius * (2 ** i) d_raw = 2 * r # patch size to be cropped loc_k = loc[k, :] # location of the k-th glimpse, (2, ) adjusted_loc = T.cast(offset + loc_k - r, 'int32') # upper-left corner of the patch # one_img = T.reshape(one_img, (one_img.shape[0], one_img.shape[1])) # Get a zoom patch with size (d_raw, d_raw) from one_image # zoom = one_img[adjusted_loc[0]: (adjusted_loc[0] + d_raw), # adjusted_loc[1]: (adjusted_loc[1] + d_raw)] # zoom with size (channels, 2 * r, 2 * r) zoom = one_img[:, adjusted_loc[0]: (adjusted_loc[0] + d_raw), adjusted_loc[1]: (adjusted_loc[1] + d_raw)] if r < sensorBandwidth: # bilinear-interpolation # here, zoom is a 2D patch with size (2r, 2r) # zoom = T.swapaxes(zoom, 1, 2) # zoom = T.swapaxes(zoom, 0, 1) # here, zoom with size (channel, height, width) zoom_reshape = T.reshape(zoom, (1, zoom.shape[0], zoom.shape[1], zoom.shape[2])) zoom_bandwidth = upsample.bilinear_upsampling(zoom_reshape, ratio=(sensorBandwidth / r), batch_size=1, num_input_channels=channels) # bandwith is with size (channel, height, width) zoom_bandwidth = T.reshape(zoom_bandwidth, (zoom_bandwidth.shape[1], zoom_bandwidth.shape[2], zoom_bandwidth.shape[3])) elif r > sensorBandwidth: # pooling operation will be down over the last 2 dimension # zoom = T.swapaxes(zoom, 1, 2) # zoom = T.swapaxes(zoom, 0, 1) # here, zoom with size (channel, height, width) zoom_bandwidth = pool.pool_2d(input=zoom, ds=(r / sensorBandwidth, r / sensorBandwidth), mode='average_inc_pad', ignore_border=True) else: zoom_bandwidth = zoom imgZooms.append(zoom_bandwidth) zooms.append(T.stack(imgZooms)) # returned self.zooms is with size (batch, depth, channel, height, width) self.zooms = T.stack(zooms)
def _glimpseSensor(img_batch, normLoc): """ This function calculate the glimpse sensors for a batch of images :type img_batch: tensor variable with size (batch_size, 784) :param img_batch: batch of images :type normLoc: tensor variable with size (batch_size, 2) :param normLoc: locations of the batch of images :return: :type zooms: tensor variable with size (batch, depth, channel, height, width) :param zooms: zooms of the batch of images """ # from [-1.0, 1.0] -> [0, 28] loc = ((normLoc + 1) / 2) * self.mnist_size loc = T.cast(loc, 'int32') # img with size (batch, channels, height, width) img = T.reshape(img_batch, (self.batch_size, self.channels, self.mnist_size, self.mnist_size)) # with size (batch, h, w, 1) after reshape zooms = [] # zooms of all the images in batch maxRadius = self.minRadius * (2**(self.depth - 1) ) # radius of the largest zoom offset = maxRadius # zero-padding the batch to (batch, h + 2R, w + 2R, channels) img = T.concatenate((T.zeros((self.batch_size, self.channels, maxRadius, self.mnist_size)), img), axis=2) img = T.concatenate((img, T.zeros((self.batch_size, self.channels, maxRadius, self.mnist_size))), axis=2) img = T.concatenate((T.zeros( (self.batch_size, self.channels, self.mnist_size + 2 * maxRadius, maxRadius)), img), axis=3) img = T.concatenate( (img, T.zeros((self.batch_size, self.channels, self.mnist_size + 2 * maxRadius, maxRadius))), axis=3) img = T.cast(img, dtype=theano.config.floatX) for k in xrange(self.batch_size): imgZooms = [] # zoom for a single image # one_img with size (channels, 2R + size, 2R + size), channels=1 here one_img = img[k, :, :, :] for i in xrange(self.depth): # r = minR, 2 * minR, ..., (2^(depth - 1)) * minR r = self.minRadius * (2**i) d_raw = 2 * r # patch size to be cropped loc_k = loc[k, :] # location of the k-th glimpse, (2, ) adjusted_loc = T.cast( offset + loc_k - r, 'int32') # upper-left corner of the patch # Get a zoom patch with size (d_raw, d_raw) from one_image zoom = one_img[:, adjusted_loc[0]:(adjusted_loc[0] + d_raw), adjusted_loc[1]:(adjusted_loc[1] + d_raw)] if r < self.sensorBandwidth: # bilinear-interpolation # here, zoom is a 2D patch with size (2r, 2r) # zoom = T.swapaxes(zoom, 1, 2) # zoom = T.swapaxes(zoom, 0, 1) # here, zoom with size (channel, height, width) zoom_reshape = T.reshape( zoom, (1, zoom.shape[0], zoom.shape[1], zoom.shape[2])) zoom_bandwidth = upsample.bilinear_upsampling( zoom_reshape, ratio=(self.sensorBandwidth / r), batch_size=1, num_input_channels=self.channels) # bandwith is with size (channel, height, width) zoom_bandwidth = T.reshape( zoom_bandwidth, (zoom_bandwidth.shape[1], zoom_bandwidth.shape[2], zoom_bandwidth.shape[3])) elif r > self.sensorBandwidth: # pooling operation will be down over the last 2 dimensions zoom_bandwidth = pool.pool_2d( input=zoom, ds=(r / self.sensorBandwidth, r / self.sensorBandwidth), mode='average_inc_pad', ignore_border=True) else: zoom_bandwidth = zoom imgZooms.append(zoom_bandwidth) zooms.append(T.stack(imgZooms)) # returned zooms is with size (batch, depth, channel, height, width) return T.stack(zooms)
def get_output_for(self, inputs, **kwargs): return bilinear_upsampling(inputs,ratio = 2)
def __init__(self, img_batch, normLoc, batch_size=16, mnist_size=28, channels=1, depth=3, minRadius=4, sensorBandwidth=8): """ Recurrent Attention Model from "Recurrent Models of Visual Attention" (Mnih + 2014) Parameters ---------- :type layer_id: str :param layer_id: id of this layer :type img_batch: a 2D variable, each row an mnist image :param img_batch: model inputs :type normLoc: variable with size (batch_size x 2) :param normLoc: model inputs :type batch_size: int :param batch_size: batch size :type mnist_size: int :param mnist_size: length of the mnist square (usually 28) :type channels: int :param channels: channels of mnist (usually 1) :type depth: int :param depth: channels of zoom (3 in this paper) :type minRadius: int :param minRadius: minimum radius of the glimpse :type sensorBandwidth: int :param sensorBandwidth: length of the glimpse square :return self.zooms: (batch, depth, channel, height, width) """ self.batch_size = batch_size self.mnist_size = mnist_size self.channels = channels self.depth = depth self.minRadius = minRadius self.sensorBandwidth = sensorBandwidth # from [-1.0, 1.0] -> [0, 28] loc = ((normLoc + 1) / 2) * mnist_size loc = T.cast(loc, 'int32') # img with size (batch, channels, height, width) img = T.reshape(img_batch, (batch_size, channels, mnist_size, mnist_size)) self.img = img # with size (batch, h, w, 1) zooms = [] # zooms of all the images in batch maxRadius = minRadius * (2**(depth - 1)) # radius of the largest zoom offset = maxRadius # zero-padding the batch to (batch, h + 2R, w + 2R, channels) img = T.concatenate((T.zeros( (batch_size, channels, maxRadius, mnist_size)), img), axis=2) img = T.concatenate( (img, T.zeros((batch_size, channels, maxRadius, mnist_size))), axis=2) img = T.concatenate((T.zeros( (batch_size, channels, mnist_size + 2 * maxRadius, maxRadius)), img), axis=3) img = T.concatenate((img, T.zeros((batch_size, channels, mnist_size + 2 * maxRadius, maxRadius))), axis=3) img = T.cast(img, dtype=theano.config.floatX) for k in xrange(batch_size): imgZooms = [] # zoom for a single image # one_img with size (channels, 2R + size, 2R + size), channels=1 here one_img = img[k, :, :, :] for i in xrange(depth): # r = minR, 2 * minR, ..., (2^(depth - 1)) * minR r = minRadius * (2**i) d_raw = 2 * r # patch size to be cropped loc_k = loc[k, :] # location of the k-th glimpse, (2, ) adjusted_loc = T.cast( offset + loc_k - r, 'int32') # upper-left corner of the patch # one_img = T.reshape(one_img, (one_img.shape[0], one_img.shape[1])) # Get a zoom patch with size (d_raw, d_raw) from one_image # zoom = one_img[adjusted_loc[0]: (adjusted_loc[0] + d_raw), # adjusted_loc[1]: (adjusted_loc[1] + d_raw)] zoom = one_img[:, adjusted_loc[0]:(adjusted_loc[0] + d_raw), adjusted_loc[1]:(adjusted_loc[1] + d_raw)] if r < sensorBandwidth: # bilinear-interpolation # here, zoom is a 2D patch with size (2r, 2r) # zoom = T.swapaxes(zoom, 1, 2) # zoom = T.swapaxes(zoom, 0, 1) # here, zoom with size (channel, height, width) zoom_reshape = T.reshape( zoom, (1, zoom.shape[0], zoom.shape[1], zoom.shape[2])) zoom_bandwidth = upsample.bilinear_upsampling( zoom_reshape, ratio=(sensorBandwidth / r), batch_size=1, num_input_channels=channels) # bandwith is with size (channel, height, width) zoom_bandwidth = T.reshape( zoom_bandwidth, (zoom_bandwidth.shape[1], zoom_bandwidth.shape[2], zoom_bandwidth.shape[3])) elif r > sensorBandwidth: # pooling operation will be down over the last 2 dimension # zoom = T.swapaxes(zoom, 1, 2) # zoom = T.swapaxes(zoom, 0, 1) # here, zoom with size (channel, height, width) zoom_bandwidth = pool.pool_2d(input=zoom, ds=(r / sensorBandwidth, r / sensorBandwidth), mode='average_inc_pad', ignore_border=True) else: zoom_bandwidth = zoom imgZooms.append(zoom_bandwidth) zooms.append(T.stack(imgZooms)) # returned self.zooms is with size (batch, depth, channel, height, width) self.zooms = T.stack(zooms)
def get_output_for(self, input_, **kwargs): return bilinear_upsampling(input_, ratio=self.ratio, batch_size=self.input_shape[0], num_input_channels=self.input_shape[1], use_1D_kernel=self.use_1D_kernel)
def call(self, x, mask=None): return bilinear_upsampling(x, ratio=self.ratio)