def _transform(theta, input, downsample_factor): num_batch, num_channels, height, width = input.shape theta = T.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = T.cast(height / downsample_factor[0], 'int64') out_width = T.cast(width / downsample_factor[1], 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = T.dot(theta, grid) x_s = T_g[:, 0] y_s = T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, out_height, out_width) output = T.reshape( input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output
def get_output_for(self, input, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = T.flatten(input, 2) activation = T.dot(input, self.W) if self.b is not None: activation = T.broadcast('+', activation , T.dimshuffle(self.b, 'x', 0), 'xx,1x') return self.nonlinearity(activation)
def _meshgrid(height, width): # This function is the grid generator from eq. (1) in reference [1]. # It is equivalent to the following numpy code: # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), # np.linspace(-1, 1, height)) # ones = np.ones(np.prod(x_t.shape)) # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) # It is implemented in Theano instead to support symbolic grid sizes. # Note: If the image size is known at layer construction time, we could # compute the meshgrid offline in numpy instead of doing it dynamically # in Theano. However, it hardly affected performance when we tried. x_t = T.dot(T.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle('x', 0)) y_t = T.dot(_linspace(-1.0, 1.0, height).dimshuffle(0, 'x'), T.ones((1, width))) x_t_flat = x_t.reshape((1, -1)) y_t_flat = y_t.reshape((1, -1)) ones = T.ones_like(x_t_flat) grid = T.concatenate([x_t_flat, y_t_flat, ones], axis=0) return grid