def _rand_crop3d(*imgs): with tf.name_scope(kwargs.pop('name', None), 'rand_crop', list(imgs) + [sz]): value = imgs[0] size = tf.convert_to_tensor(sz, dtype=tf.int32, name="size") shape = tf.shape(value)[:2] check = tf.Assert(tf.reduce_all(shape >= size), ["Need value.shape >= size, got", shape, size]) shape = control_flow_ops.with_dependencies([check], shape) # assert same shape for v in imgs: vshape = tf.shape(v)[:2] check = tf.Assert(tf.reduce_all(shape.equal(vshape)), ["Need same (H,W,?) image.shape[:2] == otherimage.shape[:2], got", shape, vshape]) shape = control_flow_ops.with_dependencies([check], shape) limit = shape - size + 1 offset = tf.random_uniform(tf.shape(shape), dtype=size.dtype, maxval=size.dtype.max) % limit # add seed # take last dim as-is # tf.assert_greater_equal(offset, 0) # tf.assert_greater_equal(size, 0) offset = offset.append(0) size = size.append(-1) return tuple(tf.slice(v, offset, size) for v in imgs)
def sparsemax(logits, axis=-1, name=None): """ :param logits: tf.Tensor :param axis: :param name: :return: """ # https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/sparsemax/python/ops/sparsemax.py logits = tf.shiftdim(logits, -axis - 1) # lshape = logits.shape tshape = tf.shape(logits) dims = tshape[axis] logits = tf.reshape(logits, (-1, dims)) obs = tf.shape(logits)[0] # sort z z = logits - tf.mean(logits, axis=1, keepdims=True) z_sorted = tf.sort(z) # calculate k(z) z_cumsum = tf.cumsum(z_sorted, axis=1) k = tf.range(1, dims + 1).astype(dtype=logits.dtype) z_check = 1 + k * z_sorted > z_cumsum k_z = tf.sum(z_check.astype(tf.int32), axis=1) # calculate tau(z) indices = tf.stack([tf.range(0, obs), k_z - 1], axis=1) tau_sum = tf.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / k_z.astype(logits.dtype) res = tf.maximum(tf.cast(0, logits.dtype), z - tau_z[:, tf.newaxis]) # rotate axis res = tf.reshape(res, tshape) # res.set_shape(lshape) res = tf.shiftdim(res, axis + 1) return res
def _rand_crop_offsets(*imgs): with tf.name_scope(None, 'rand_crop', list(imgs) + [sz]): value = imgs[0] size = tf.convert_to_tensor(sz, dtype=tf.int32, name="size") shape = tf.shape(value)[1:3] # HW of BHWC check = tf.Assert(tf.reduce_all(shape >= size), ["Need value.shape >= size, got", shape, size]) shape = control_flow_ops.with_dependencies([check], shape) # assert same shape for v in imgs: vshape = tf.shape(v)[1:3] # assert v.ndim == 4 check = tf.Assert(tf.reduce_all(shape.equal(vshape)), ["Need same (H,W,?) image.shape[1:3] == otherimage.shape[1:3], got", shape, vshape]) shape = control_flow_ops.with_dependencies([check], shape) limit = shape - size + 1 if value.dims[0] is None: batchshape = tf.shape(value)[:1].append(2) else: batchshape = (value.dims[0], 2) offsets = tf.random_uniform(batchshape, dtype=size.dtype, maxval=size.dtype.max) % limit # add seed # offsets = tf.random_uniform(batchshape, maxval=limit, dtype=tf.int32) # sz = size size = size.append(-1) def _3d_crop(args): values, offset = args offset = offset.append(0) # outs = [tf.slice(img, offset, size) for img in values] outs = [] for img in values: out = tf.slice(img, offset, size) out.set_shape(list(sz)+v.dims[-1:]) outs.append(out) return outs return tf.map_fn(_3d_crop, [imgs, offsets], dtype=[v.dtype for v in imgs]), offsets
def pad_if_need(image, size, offsets=None): """ :param image: tensor3d[H,W,C] :param size: (int, int) targetsize (H,W) :param offsets: (0,0) for None :return: """ assert image.ndim == 3 imshape = tf.shape(image) # get target shape if possible tshape = image.dims for i in (0, 1): if tshape[i] is not None and size[i] > tshape[i]: tshape[i] = size[i] targetshape = tf.convert_to_tensor(size).append(imshape[-1]) need = targetshape - imshape # padding need need = tf.where(need > 0, need, tf.zeros(tf.shape(need), dtype=tf.int32)) if offsets is None: offsets = [0, 0, 0] else: offsets = list(offsets) offsets.append(0) # upper padding = need // 2 padding_first = need // 2 + tf.convert_to_tensor(offsets) padding_left = need - padding_first padding = tf.concat(0, [[padding_first], [padding_left]]).T out = tf.pad(image, padding, 'CONSTANT') # rshape = tf.maximum(imshape, targetshape) # if known shape.. set out.set_shape(tshape) return out
def deconv(x, outdim, kernel, stride=1, padding='SAME', initializer=tf.he_uniform, bias=False, extra=None, **kwargs): nd = x.ndim - 2 out_shape = _deconv_outshape(nd, x.dims, outdim, kernel, stride, padding, extra) oshape = tf.TensorShape(out_shape) if out_shape[0] is None: out_shape[0] = tf.shape(x)[0] out_shape = tf.stack(out_shape) kernel_shape = _kernel_shape(nd, kernel, outdim, x.dims[-1]) # swap in and out channel stride = _stride_shape(nd, stride) # stride W = tf.get_weight('W', shape=kernel_shape, initializer=initializer(kernel_shape)) if nd == 2: out = tf.nn.conv2d_transpose(x, W, out_shape, strides=stride, padding=padding) elif nd == 3: out = tf.nn.conv3d_transpose(x, W, out_shape, strides=stride, padding=padding) else: raise NotImplementedError('not implementd for ndim [{0}]'.format(nd)) if bias: b = tf.get_bias('b', shape=(outdim, ), initializer=tf.zeros_initializer(), **kwargs) out = tf.nn.bias_add(out, b) out.set_shape(oshape) return out
def _crop_center_one(imgs, name=None): size = tf.convert_to_tensor(sz, dtype=tf.int32, name="size") hw = tf.shape(imgs)[-3:-1] # no gpu support # check = tf.Assert(tf.reduce_all(hw >= size), # ['Need crop size less than tensor tensor.shape[-3:-1] >= cropsize, got', hw, size]) # hw = control_flow_ops.with_dependencies([check], hw) offset = (hw - size) // 2 if imgs.ndim == 3: offset = tf.concat(0, [offset, [0]]) size = tf.concat(0, [size, [-1]]) if imgs.ndim == 4: offset = tf.concat(0, [[0], offset, [0]]) size = tf.concat(0, [[-1], size, [-1]]) return tf.slice(imgs, offset, size, name=name)
def rand_cbox(shape, count, starts=None, sizes=None): """ :param shape: :param count: :param starts: :param sizes: :return: random colored box """ from .blend import composite bbox = rand_bbox(shape, count, starts=starts, sizes=sizes) mask = bbox3_to_mask(bbox, shape[1:3]).expand_dims(-1) # B,Count,H,W, 1 cshape = [bbox.dims[0] or tf.shape(bbox)[0], count, 1, 1, 3] colors = tf.random_uniform(shape=cshape) colorbox = tf.select(mask, colors, 0.) colorbox = composite(colorbox, order='BL') return colorbox
def transform_4r(img, theta, outsize=None, oob=None): """ :param img: [BHWC] :param theta: [2,3] :param outsize: [H',W'] or [H,W] if none :param oob: out of bound value [C,] or [1] :return: [BH'W'C] """ assert img.ndim == 4 if outsize is None: # todo improve later if None in img.dims[1:3]: outsize = tf.shape(img)[1:3] else: outsize = img.dims[1:3] h, w = outsize[0], outsize[1] B, H, W, C = img.shapes # height, width normalization to (-1, 1) # cx = tf.linspace(-1., 1., w) # cy = tf.linspace(-1., 1., h) cx = tf.linspace(-0.5, 0.5, w) cy = tf.linspace(-0.5, 0.5, h) xt, yt = tf.meshgrid(cx, cy) # target coord xyt = tf.stack([xt.flat(), yt.flat(), tf.ones((w * h, ))]) # matching source coord [x; y] [2, pixels] xys = theta.dot(xyt) # xs, ys = xys.split() # split along 0 axis # reshape to [2, H', W'] # xys = xys.reshape((2, h, w)) return sampling_xy(img, xys, outsize, oob=oob)
def sampling_xy_3r(img, xys, outsize=None, oob=None): """ differentiable image sampling (with interpolation) :param img: source image [HWC] :param xys: source coord [2, H'*W'] if outsize given :param outsize: [H',W'] or None, xys must has rank3 :return: [B,H',W',C] """ assert img.ndim == 3 oobv = oob if oobv is None: # oobv = tf.zeros(shape=(img.dims[-1]), dtype=tf.float32) # [0., 0., 0.] oobv = 0. # oobv = [0., 0., 0.] oobv = tf.convert_to_tensor(oobv) if outsize is None: outsize = tf.shape(xys)[1:] xys = xys.flat2d() H, W, C = img.shapes WH = tf.stack([W, H]).to_float().reshape((2, 1)) # XYf = (xys + 1.) * WH * 0.5 # scale to HW coord ( + 1 for start from 0) XYf = (xys + 0.5) * WH # * 0.5 # scale to HW coord ( + 1 for start from 0) XYS = tf.ceil(XYf) # left top weight # prepare weights w00 = XYS - XYf # [2, p] w11 = 1. - w00 # [2, p] # get near 4 pixels per pixel XYS = XYS.to_int32() # [2, p] # todo check xy order XYs = XYS - 1 Xs = tf.stack([XYs[0], XYS[0]]) Ys = tf.stack([XYs[1], XYS[1]]) # get mask of outof bound # leave option for filling value Xi = Xs.clip_by_value(0, W - 1) Yi = Ys.clip_by_value(0, H - 1) inb = tf.logical_and(Xi.equal(Xs), Yi.equal(Ys)) # [2, p] inb = tf.reduce_any(inb, axis=0, keepdims=True) # all oob? [1, p]- # inb = inb.expand_dims(2).to_float() # [1, p] inb = inb.reshape((-1, 1)).to_float() # [p, 1] 1 for channel # get 4 pixels [p, C] p00 = getpixel(img, tf.stack([Yi[0], Xi[0]]).T) p01 = getpixel(img, tf.stack([Yi[0], Xi[1]]).T) p10 = getpixel(img, tf.stack([Yi[1], Xi[0]]).T) p11 = getpixel(img, tf.stack([Yi[1], Xi[1]]).T) # stacked nearest : [4, p, C] near4 = tf.stack([p00, p01, p10, p11], axis=0) # XYw : 4 near point weights [4, pixel] w4 = tf.stack([ w00[1] * w00[0], # left top w00[1] * w11[0], # right top w11[1] * w00[0], # left bottom w11[1] * w11[0] ]) # right bottom # weighted sum of 4 nearest pixels broadcasting w4 = w4.reshape((4, -1, 1)) # interpolated = tf.sum(w4 * near4.to_float(), axis=1) # [p, C] interpolated = tf.sum(w4 * near4.to_float(), axis=0) # [p, C] # assign oob value # fill oob by broadcasting oobv = oobv.reshape((1, -1)) # [p, C] interpolated = interpolated * inb + oobv * (1. - inb) output = interpolated.reshape((outsize[0], outsize[1], C)) # reshape [p, C] => [H', W', C] return output