def transform_img(theta, input, downsample_factor, interp='bicubic'): num_batch, num_channels, height, width = input.shape theta = T.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = T.cast(height / downsample_factor[0], 'int64') out_width = T.cast(width / downsample_factor[1], 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = T.dot(theta, grid) x_s = T_g[:, 0] y_s = T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) if interp == 'bicubic': interpolator = _interpolate_bicubic elif interp == 'bilinear': interpolator = _interpolate_bilinear else: raise ValueError("Interpolation must be \"bilinear\" or \"bicubic\", got \"%s\""%interp) input_transformed = interpolator( input_dim, x_s_flat, y_s_flat, out_height, out_width) output = T.reshape( input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output
def transform_img(theta, input, downsample_factor, interp="bicubic"): num_batch, num_channels, height, width = input.shape theta = T.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = T.cast(height / downsample_factor[0], "int64") out_width = T.cast(width / downsample_factor[1], "int64") grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = T.dot(theta, grid) x_s = T_g[:, 0] y_s = T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) input_dim = input.dimshuffle(0, 2, 3, 1) if interp == "bicubic": interpolator = _interpolate_bicubic elif interp == "bilinear": interpolator = _interpolate_bilinear else: raise ValueError('Interpolation must be "bilinear" or "bicubic", got "%s"' % interp) input_transformed = interpolator(input_dim, x_s_flat, y_s_flat, out_height, out_width) output = T.reshape(input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format return output
def _interpolate_bicubic(im, x, y, out_height, out_width): # *_f are floats num_batch, height, width, channels = im.shape height_f = T.cast(height, theano.config.floatX) width_f = T.cast(width, theano.config.floatX) grid = _meshgrid(out_height, out_width) x_grid_flat = grid[0].flatten() y_grid_flat = grid[1].flatten() # clip coordinates to [-1, 1] x = T.clip(x, -1, 1) y = T.clip(y, -1, 1) # scale coordinates from [-1, 1] to [0, width/height - 1] x = (x + 1) / 2 * (width_f - 1) y = (y + 1) / 2 * (height_f - 1) x0_f = T.floor(x) y0_f = T.floor(y) x0 = T.cast(x0_f, "int64") y0 = T.cast(y0_f, "int64") # return T.concatenate(((x0-x).dimshuffle(0, 'x')**2, 0.0*dg2(x.dimshuffle(0, 'x')), 0.0*dg2(x0.dimshuffle(0, 'x'))), 1) offsets = np.arange(-1, 3).astype(int) dim2 = width dim1 = width * height base = T.repeat(T.arange(num_batch, dtype="int64") * dim1, out_height * out_width) # Need to convert (x, y) to linear def _flat_idx(xx, yy, dim2=dim2): return base + yy * dim2 + xx y_locs = [y0 + offset for offset in offsets] ys = [T.clip(loc, 0, height - 1) for loc in y_locs] def _cubic_interp_dim(im_flat, other_idx): """Cubic interpolation along a dimension """ neighbor_locs = [x0 + offset for offset in offsets] neighbor_idx = [T.clip(nloc, 0, width - 1) for nloc in neighbor_locs] xidxs = neighbor_idx yidxs = [other_idx] * len(neighbor_idx) neighbor_idxs = [_flat_idx(xidx, yidx) for xidx, yidx in zip(xidxs, yidxs)] values = [im_flat[idx] for idx in neighbor_idxs] weights = [_cubic_conv_weights(dg2(nloc) - x).dimshuffle(0, "x") for nloc in neighbor_locs] # Interpolate along x direction out = T.sum([dg2(v) * w for w, v in zip(weights, values)], axis=0) / T.sum(weights, axis=0) return out im_flat = im.reshape((-1, channels)) ims = [_cubic_interp_dim(im_flat, yidx) for yidx in ys] yweights = [_cubic_conv_weights(dg2(yloc) - y).dimshuffle(0, "x") for yloc in y_locs] out = T.sum( [v * _cubic_conv_weights(dg2(yloc) - y).dimshuffle(0, "x") for v, yloc in zip(ims, y_locs)], axis=0 ) / T.sum(yweights, axis=0) return out
def create_train_func(layers): Xa, Xb = T.tensor4('Xa'), T.tensor4('Xb') Xa_batch, Xb_batch = T.tensor4('Xa_batch'), T.tensor4('Xb_batch') Tp = get_output( layers['trans'], inputs={ layers['inputa']: Xa, layers['inputb']: Xb, }, deterministic=False, ) # transforms: ground-truth, predicted Tg = T.fmatrix('Tg') Tg_batch = T.fmatrix('Tg_batch') theta_gt = Tg.reshape((-1, 2, 3)) theta_pr = Tp.reshape((-1, 2, 3)) # grids: ground-truth, predicted Gg = T.dot(theta_gt, _meshgrid(20, 20)) Gp = T.dot(theta_pr, _meshgrid(20, 20)) train_loss = T.mean(T.sqr(Gg - Gp)) params = get_all_params(layers['trans'], trainable=True) updates = nesterov_momentum(train_loss, params, 1e-3, 0.9) corr_func = theano.function( inputs=[theano.In(Xa_batch), theano.In(Xb_batch), theano.In(Tg_batch)], outputs=[Tp, train_loss], updates=updates, givens={ Xa: Xa_batch, Xb: Xb_batch, # Ia, Ib Tg: Tg_batch, # transform Ia --> Ib }) return corr_func
def _interpolate_bicubic(im, x, y, out_height, out_width): # *_f are floats num_batch, height, width, channels = im.shape height_f = T.cast(height, theano.config.floatX) width_f = T.cast(width, theano.config.floatX) grid = _meshgrid(out_height, out_width) x_grid_flat = grid[0].flatten() y_grid_flat = grid[1].flatten() # clip coordinates to [-1, 1] x = T.clip(x, -1, 1) y = T.clip(y, -1, 1) # scale coordinates from [-1, 1] to [0, width/height - 1] x = (x + 1) / 2 * (width_f - 1) y = (y + 1) / 2 * (height_f - 1) x0_f = T.floor(x) y0_f = T.floor(y) x0 = T.cast(x0_f, 'int64') y0 = T.cast(y0_f, 'int64') #return T.concatenate(((x0-x).dimshuffle(0, 'x')**2, 0.0*dg2(x.dimshuffle(0, 'x')), 0.0*dg2(x0.dimshuffle(0, 'x'))), 1) offsets = np.arange(-1, 3).astype(int) dim2 = width dim1 = width*height base = T.repeat( T.arange(num_batch, dtype='int64')*dim1, out_height*out_width) # Need to convert (x, y) to linear def _flat_idx(xx, yy, dim2=dim2): return base + yy * dim2 + xx y_locs = [y0 + offset for offset in offsets] ys = [T.clip(loc, 0, height - 1) for loc in y_locs] def _cubic_interp_dim(im_flat, other_idx): """Cubic interpolation along a dimension """ neighbor_locs = [x0 + offset for offset in offsets] neighbor_idx = [T.clip(nloc, 0, width - 1) for nloc in neighbor_locs] xidxs = neighbor_idx yidxs = [other_idx] * len(neighbor_idx) neighbor_idxs = [_flat_idx(xidx, yidx) for xidx, yidx in zip(xidxs, yidxs)] values = [im_flat[idx] for idx in neighbor_idxs] weights = [_cubic_conv_weights(dg2(nloc) - x).dimshuffle(0, 'x') for nloc in neighbor_locs] # Interpolate along x direction out = T.sum([dg2(v) * w for w, v in zip(weights, values)], axis=0) / T.sum(weights, axis=0) return out im_flat = im.reshape((-1, channels)) ims = [_cubic_interp_dim(im_flat, yidx) for yidx in ys] yweights = [_cubic_conv_weights(dg2(yloc) - y).dimshuffle(0, 'x') for yloc in y_locs] out = T.sum([v * _cubic_conv_weights(dg2(yloc) - y).dimshuffle(0, 'x') for v, yloc in zip(ims, y_locs)], axis=0) / T.sum(yweights, axis=0) return out
def create_valid_func(layers): Xa, Xb = T.tensor4('Xa'), T.tensor4('Xb') Xa_batch, Xb_batch = T.tensor4('Xa_batch'), T.tensor4('Xb_batch') Tp = get_output( layers['trans'], inputs={ layers['inputa']: Xa, layers['inputb']: Xb, }, deterministic=True, ) # transforms: ground-truth, predicted Tg = T.fmatrix('Tg') Tg_batch = T.fmatrix('Tg_batch') theta_gt = Tg.reshape((-1, 2, 3)) theta_pr = Tp.reshape((-1, 2, 3)) # grids: ground-truth, predicted Gg = T.dot(theta_gt, _meshgrid(20, 20)) Gp = T.dot(theta_pr, _meshgrid(20, 20)) valid_loss = T.mean(T.sqr(Gg - Gp)) corr_func = theano.function( inputs=[theano.In(Xa_batch), theano.In(Xb_batch), theano.In(Tg_batch)], outputs=[Tp, valid_loss], givens={ Xa: Xa_batch, Xb: Xb_batch, # Ia, Ib Tg: Tg_batch, # transform Ia --> Ib }) return corr_func