def test_forward_floor(): ishape = (1, 3, 10, 10) inp_array = np.random.uniform(size=ishape).astype(np.float32) with tf.Graph().as_default(): in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype) tf.floor(in1) compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Floor:0')
def process_reals(x, lod, mirror_augment, drange_data, drange_net): with tf.name_scope('ProcessReals'): with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): s = tf.shape(x) mask = tf.random_uniform([s[0], 1, 1, 1], 0.0, 1.0) mask = tf.tile(mask, [1, s[1], s[2], s[3]]) x = tf.where(mask < 0.5, x, tf.reverse(x, axis=[3])) with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tfutil.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2 ** tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) return x
def _interpolate2d(imgs, x, y): n_batch = tf.shape(imgs)[0] xlen = tf.shape(imgs)[1] ylen = tf.shape(imgs)[2] n_channel = tf.shape(imgs)[3] x = tf.to_float(x) y = tf.to_float(y) xlen_f = tf.to_float(xlen) ylen_f = tf.to_float(ylen) zero = tf.zeros([], dtype='int32') max_x = tf.cast(xlen - 1, 'int32') max_y = tf.cast(ylen - 1, 'int32') # scale indices from [-1, 1] to [0, xlen/ylen] x = (x + 1.) * (xlen_f - 1.) * 0.5 y = (y + 1.) * (ylen_f - 1.) * 0.5 # do sampling x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) base = _repeat(tf.range(n_batch) * xlen * ylen, ylen * xlen) base_x0 = base + x0 * ylen base_x1 = base + x1 * ylen index00 = base_x0 + y0 index01 = base_x0 + y1 index10 = base_x1 + y0 index11 = base_x1 + y1 # use indices to lookup pixels in the flat image and restore # n_channel dim imgs_flat = tf.reshape(imgs, [-1, n_channel]) imgs_flat = tf.to_float(imgs_flat) I00 = tf.gather(imgs_flat, index00) I01 = tf.gather(imgs_flat, index01) I10 = tf.gather(imgs_flat, index10) I11 = tf.gather(imgs_flat, index11) # and finally calculate interpolated values dx = x - tf.to_float(x0) dy = y - tf.to_float(y0) w00 = tf.expand_dims((1. - dx) * (1. - dy), 1) w01 = tf.expand_dims((1. - dx) * dy, 1) w10 = tf.expand_dims(dx * (1. - dy), 1) w11 = tf.expand_dims(dx * dy, 1) output = tf.add_n([w00*I00, w01*I01, w10*I10, w11*I11]) # reshape output = tf.reshape(output, [n_batch, xlen, ylen, n_channel]) return output
def staircase_loss(y_true, y_pred, var_a=16.0, cnst=1.0/255.0): """ Keras Staircase Loss """ height = cnst width = cnst var_x = K.clip(K.abs(y_true - y_pred) - 0.5 * cnst, 0.0, 1.0) loss = height*(K.tanh(var_a*((var_x/width)-tf.floor(var_x/width)-0.5)) / (2.0*K.tanh(var_a/2.0)) + 0.5 + tf.floor(var_x/width)) loss += 1e-10 return K.mean(loss, axis=-1)
def get_output_shape_tensor(self, flatten=None): if flatten == None: flatten = self.flatten with tf.name_scope(self.layer_name): if self.conv_padding.lower() == 'same': if self.pool: if self.pool_type.lower() == 'same': out_shape = (self.input_shape[0], tf.to_int32(tf.ceil(tf.ceil(tf.to_float(self.input_shape[1]) / self.conv_stride[1]) / self.pool_size[0])), tf.to_int32(tf.ceil(tf.ceil(tf.to_float(self.input_shape[2])) / self.conv_stride[2]) / self.pool_size[1]), self.filter_shape[3]) elif self.pool_type.lower() == 'valid': out_shape = (self.input_shape[0], tf.to_int32(tf.floor(tf.ceil(tf.to_float(self.input_shape[1]) / self.conv_stride[1]) / self.pool_size[0])), tf.to_int32( tf.floor(tf.to_float(tf.ceil(tf.to_float(self.input_shape[2])) / self.conv_stride[2]) / self.pool_size[1])), self.filter_shape[3]) else: out_shape = (self.input_shape[0], tf.to_int32(tf.ceil(tf.to_float(self.input_shape[1]) / self.conv_stride[1])), tf.to_int32(tf.ceil(tf.to_float(self.input_shape[2])) / self.conv_stride[2]), self.filter_shape[3]) elif self.conv_padding.lower() == 'valid': if self.pool: if self.pool_type.lower() == 'same': out_shape = (self.input_shape[0], tf.to_int32(tf.ceil(np.ceil( tf.to_float(self.input_shape[1] - self.filter_shape[0] + 1) / self.conv_stride[1])) / self.pool_size[0]), tf.to_int32(tf.ceil(np.ceil( tf.to_float(self.input_shape[2] - self.filter_shape[1] + 1) / self.conv_stride[2])) / self.pool_size[1]), self.filter_shape[3]) elif self.pool_type.lower() == 'valid': out_shape = (self.input_shape[0], tf.to_int32(tf.floor(np.ceil( tf.to_float(self.input_shape[1] - self.filter_shape[0] + 1) / self.conv_stride[1])) / self.pool_size[0]), tf.to_int32(tf.floor(np.ceil( tf.to_float(self.input_shape[2] - self.filter_shape[1] + 1) / self.conv_stride[2])) / self.pool_size[1]), self.filter_shape[3]) else: out_shape = (self.input_shape[0], tf.to_int32( tf.ceil(tf.to_float(self.input_shape[1] - self.filter_shape[0] + 1) / self.conv_stride[1])), tf.to_int32( tf.ceil(tf.to_float(self.input_shape[2] - self.filter_shape[1] + 1) / self.conv_stride[2])), self.filter_shape[3]) return (out_shape[0], out_shape[1] * out_shape[2] * out_shape[3]) if flatten else out_shape
def sample_img(img, n_samples): sx = tf.random_uniform((n_samples,), 0, 1) * 27 sy = tf.random_uniform((n_samples,), 0, 1) * 27 sx_lower = tf.cast(tf.floor(sx), tf.int32) sx_upper = tf.cast(tf.ceil(sx), tf.int32) sy_lower = tf.cast(tf.floor(sy), tf.int32) sy_upper = tf.cast(tf.ceil(sy), tf.int32) sx_nearest = tf.cast(tf.round(sx), tf.int32) sy_nearest = tf.cast(tf.round(sy), tf.int32) inds = tf.pack([sx_nearest, sy_nearest]) samples = tf.gather(tf.reshape(img, (-1,)), sx_nearest + sy_nearest*28) return sx/27, sy/27, samples
def _log_unnormalized_prob(self, x): safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 1.) y = -self.power * tf.log(safe_x) is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y)) neg_inf = tf.fill( tf.shape(y), value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype)) return tf.where(is_supported, y, neg_inf)
def sparse_dropout(x, keep_prob, noise_shape): """Dropout for sparse tensors.""" random_tensor = keep_prob random_tensor += tf.random_uniform(noise_shape) dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) pre_out = tf.sparse_retain(x, dropout_mask) return pre_out * (1./keep_prob)
def count_sketch(probs, project_size): """ Calculates count-min sketch of a tensor. Args: probs: A `Tensor` project_size: output size (`int`) Returns:c A projected count-min sketch `Tensor` with shape [batch_size, project_size]. """ with tf.variable_scope('CountSketch_'+probs.name.replace(':', '_')) as scope: input_size = int(probs.get_shape()[1]) # h, s must be sampled once history = tf.get_collection('__countsketch') if scope.name in history: scope.reuse_variables() tf.add_to_collection('__countsketch', scope.name) h = tf.get_variable('h', [input_size], initializer=tf.random_uniform_initializer(0, project_size), trainable=False) s = tf.get_variable('s', [input_size], initializer=tf.random_uniform_initializer(0, 2), trainable=False) h = tf.cast(h, 'int32') s = tf.cast(tf.floor(s) * 2 - 1, 'int32') # 1 or -1 sk = _sketch_op.count_sketch(probs, h, s, project_size) sk.set_shape([probs.get_shape()[0], project_size]) return sk
def dropout_sparse(x, keep_prob, num_nonzero_elems): noise_shape = [num_nonzero_elems] random_tensor = keep_prob random_tensor += tf.random_uniform(noise_shape) dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) pre_out = tf.sparse_retain(x, dropout_mask) return pre_out * (1./keep_prob)
def test_ImageSample(self): import numpy as np h, w = 3, 4 def np_sample(img, coords): # a reference implementation coords = np.maximum(coords, 0) coords = np.minimum(coords, np.array([img.shape[1] - 1, img.shape[2] - 1])) xs = coords[:, :, :, 1].reshape((img.shape[0], -1)) ys = coords[:, :, :, 0].reshape((img.shape[0], -1)) ret = np.zeros((img.shape[0], coords.shape[1], coords.shape[2], img.shape[3]), dtype='float32') for k in range(img.shape[0]): xss, yss = xs[k], ys[k] ret[k, :, :, :] = img[k, yss, xss, :].reshape((coords.shape[1], coords.shape[2], 3)) return ret bimg = np.random.rand(2, h, w, 3).astype('float32') # mat = np.array([ # [[[1,1], [1.2,1.2]], [[-1, -1], [2.5, 2.5]]], # [[[1,1], [1.2,1.2]], [[-1, -1], [2.5, 2.5]]] # ], dtype='float32') #2x2x2x2 mat = (np.random.rand(2, 5, 5, 2) - 0.2) * np.array([h + 3, w + 3]) true_res = np_sample(bimg, np.floor(mat + 0.5).astype('int32')) inp, mapping = self.make_variable(bimg, mat) output = sample(inp, tf.cast(tf.floor(mapping + 0.5), tf.int32)) res = self.run_variable(output) self.assertTrue((res == true_res).all())
def _cdf(self, y): low = self._low high = self._high # Recall the promise: # cdf(y) := P[Y <= y] # = 1, if y >= high, # = 0, if y < low, # = P[X <= y], otherwise. # P[Y <= j] = P[floor(Y) <= j] since mass is only at integers, not in # between. j = tf.floor(y) # P[X <= j], used when low < X < high. result_so_far = self.distribution.cdf(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += tf.zeros_like(result_so_far) # Re-define values at the cutoffs. if low is not None: result_so_far = tf.where(j < low, tf.zeros_like(result_so_far), result_so_far) if high is not None: result_so_far = tf.where(j >= high, tf.ones_like(result_so_far), result_so_far) return result_so_far
def fpn_map_rois_to_levels(boxes): """ Assign boxes to level 2~5. Args: boxes (nx4): Returns: [tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level. [tf.Tensor]: 4 tensors, the gathered boxes in each level. Be careful that the returned tensor could be empty. """ sqrtarea = tf.sqrt(tf_area(boxes)) level = tf.to_int32(tf.floor( 4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2)))) # RoI levels range from 2~5 (not 6) level_ids = [ tf.where(level <= 2), tf.where(tf.equal(level, 3)), # == is not supported tf.where(tf.equal(level, 4)), tf.where(level >= 5)] level_ids = [tf.reshape(x, [-1], name='roi_level{}_id'.format(i + 2)) for i, x in enumerate(level_ids)] num_in_levels = [tf.size(x, name='num_roi_level{}'.format(i + 2)) for i, x in enumerate(level_ids)] add_moving_summary(*num_in_levels) level_boxes = [tf.gather(boxes, ids) for ids in level_ids] return level_ids, level_boxes
def sample(probabilities): ''' Sample a tensor based on the probabilities :param probabilities: A tensor of probabilities given by 'restricted_boltzman_machine.get_probabilities' :return: A sampled sampled tensor ''' return tf.floor(probabilities + tf.random_uniform(tf.shape(probabilities), 0, 1))
def generate_dropout_masks(keep_prob, shape, amount): masks = [] for _ in range(amount): dropout_mask = tf.random_uniform(shape) + (keep_prob) dropout_mask = tf.floor(dropout_mask) / (keep_prob) masks.append(dropout_mask) return masks
def quantize(t, quant_scale, max_value=1.0): """Quantize a tensor t with each element in [-max_value, max_value].""" t = tf.minimum(max_value, tf.maximum(t, -max_value)) big = quant_scale * (t + max_value) + 0.5 with tf.get_default_graph().gradient_override_map({"Floor": "CustomIdG"}): res = (tf.floor(big) / quant_scale) - max_value return res
def loop_body(should_continue, k): """Resample the non-accepted points.""" # The range of U is chosen so that the resulting sample K lies in # [0, tf.int64.max). The final sample, if accepted, is K + 1. u = tf.random_uniform( shape, minval=minval_u, maxval=maxval_u, dtype=self.power.dtype, seed=seed()) # Sample the point X from the continuous density h(x) \propto x^(-power). x = self._hat_integral_inverse(u) # Rejection-inversion requires a `hat` function, h(x) such that # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the # support. A natural hat function for us is h(x) = x^(-power). # # After sampling X from h(x), suppose it lies in the interval # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if # if lies to the left of x_K, where x_K is defined by: # \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1), # where H(x) = \int_x^inf h(x) dx. # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)). # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)). # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1). # Update the non-accepted points. # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5). k = tf.where(should_continue, tf.floor(x + 0.5), k) accept = (u <= self._hat_integral(k + .5) + tf.exp(self._log_prob(k + 1))) return [should_continue & (~accept), k]
def rnn_decoder(cell, inputs, initial_state, embedding_size, embedding_length, sequence_length, name='RNNDecoder', reuse=False, use_inputs_prob=0.0, static_input=None): with tf.variable_scope(name, reuse=reuse): # print(tf.get_variable_scope().reuse, tf.get_variable_scope().name) with tf.name_scope("embedding"): batch_size = tf.shape(initial_state)[0] embedding_table = tf.get_variable( name='embedding_table', shape=[embedding_length, embedding_size], initializer=tf.truncated_normal_initializer(stddev=glorot_mul(embedding_length, embedding_size)), ) # 0 is index for _SOS_ (start of sentence symbol) initial_embedding = tf.gather(embedding_table, tf.zeros(tf.pack([batch_size]), tf.int32)) states = [initial_state] outputs = [] outputs_softmax = [] decoder_outputs_argmax_embedding = [] for j in range(sequence_length): with tf.variable_scope(tf.get_variable_scope(), reuse=True if j > 0 else None): # get input : # either feedback the previous decoder argmax output # or use the provided input (note that you have to use the previous input (index si therefore -1) input = initial_embedding if j > 0: true_input = tf.gather(embedding_table, inputs[j - 1]) decoded_input = decoder_outputs_argmax_embedding[-1] choice = tf.floor(tf.random_uniform([1], use_inputs_prob, 1 + use_inputs_prob, tf.float32)) input = choice * true_input + (1.0 - choice) * decoded_input if static_input: input = tf.concat(1, [input, static_input]) # print(tf.get_variable_scope().reuse, tf.get_variable_scope().name) output, state = cell(input, states[-1]) projection = linear( input=output, input_size=cell.output_size, output_size=embedding_length, name='output_linear_projection' ) outputs.append(projection) states.append(state) softmax = tf.nn.softmax(projection, name="output_softmax") # we do no compute the gradient trough argmax output_argmax = tf.stop_gradient(tf.argmax(softmax, 1)) # we do no compute the gradient for embeddings when used with noisy argmax outputs output_argmax_embedding = tf.stop_gradient(tf.gather(embedding_table, output_argmax)) decoder_outputs_argmax_embedding.append(output_argmax_embedding) outputs_softmax.append(tf.expand_dims(softmax, 1)) # remove the initial state states = states[1:] return states, outputs, outputs_softmax
def ImageSample(inputs, borderMode='repeat'): """ Sample the images using the given coordinates, by bilinear interpolation. This was described in the paper: `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_. This is equivalent to `torch.nn.functional.grid_sample`, up to some non-trivial coordinate transformation. This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0). Note that this may not be what you need. Args: inputs (list): [images, coords]. images has shape NHWC. coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value coordinate. borderMode: either "repeat" or "constant" (zero-filled) Returns: tf.Tensor: a tensor named ``output`` of shape (N, H', W', C). """ log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-12-01") image, mapping = inputs assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4 input_shape = image.get_shape().as_list()[1:] assert None not in input_shape, \ "Images in ImageSample layer must have fully-defined shape" assert borderMode in ['repeat', 'constant'] orig_mapping = mapping mapping = tf.maximum(mapping, 0.0) lcoor = tf.floor(mapping) ucoor = lcoor + 1 diff = mapping - lcoor neg_diff = 1.0 - diff # bxh2xw2x2 lcoory, lcoorx = tf.split(lcoor, 2, 3) ucoory, ucoorx = tf.split(ucoor, 2, 3) lyux = tf.concat([lcoory, ucoorx], 3) uylx = tf.concat([ucoory, lcoorx], 3) diffy, diffx = tf.split(diff, 2, 3) neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3) ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy, sample(image, ucoor) * diffx * diffy, sample(image, lyux) * neg_diffy * diffx, sample(image, uylx) * diffy * neg_diffx], name='sampled') if borderMode == 'constant': max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32) mask = tf.greater_equal(orig_mapping, 0.0) mask2 = tf.less_equal(orig_mapping, max_coor) mask = tf.logical_and(mask, mask2) # bxh2xw2x2 mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean mask = tf.expand_dims(mask, 3) ret = ret * tf.cast(mask, tf.float32) return tf.identity(ret, name='output')
def imageWarpIm(imageBatch,pMtrxBatch,opt,name=None): with tf.name_scope("ImWarp"): imageBatch = tf.expand_dims(imageBatch,-1) batchSize = tf.shape(imageBatch)[0] imageH,imageW = opt.H,opt.H H,W = opt.H,opt.W warpGTmtrxBatch = tf.tile(tf.expand_dims(opt.warpGTmtrx,0),[batchSize,1,1]) transMtrxBatch = tf.matmul(warpGTmtrxBatch,pMtrxBatch) # warp the canonical coordinates X,Y = np.meshgrid(np.linspace(-1,1,W),np.linspace(-1,1,H)) XYhom = tf.transpose(tf.stack([X.reshape([-1]),Y.reshape([-1]),np.ones([X.size])],axis=1)) XYhomBatch = tf.tile(tf.expand_dims(XYhom,0),[batchSize,1,1]) XYwarpHomBatch = tf.matmul(transMtrxBatch,tf.to_float(XYhomBatch)) XwarpHom,YwarpHom,ZwarpHom = tf.split(XYwarpHomBatch,3,1) Xwarp = tf.reshape(XwarpHom/ZwarpHom,[batchSize,H,W]) Ywarp = tf.reshape(YwarpHom/ZwarpHom,[batchSize,H,W]) # get the integer sampling coordinates Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp) Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp) XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil) YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil) imageIdx = tf.tile(tf.reshape(tf.range(batchSize),[batchSize,1,1]),[1,H,W]) imageVec = tf.reshape(imageBatch,[-1,tf.shape(imageBatch)[3]]) imageVecOutside = tf.concat([imageVec,tf.zeros([1,tf.shape(imageBatch)[3]])],0) idxUL = (imageIdx*imageH+YfloorInt)*imageW+XfloorInt idxUR = (imageIdx*imageH+YfloorInt)*imageW+XceilInt idxBL = (imageIdx*imageH+YceilInt)*imageW+XfloorInt idxBR = (imageIdx*imageH+YceilInt)*imageW+XceilInt idxOutside = tf.fill([batchSize,H,W],batchSize*imageH*imageW) def insideIm(Xint,Yint): return (Xint>=0)&(Xint<imageW)&(Yint>=0)&(Yint<imageH) idxUL = tf.where(insideIm(XfloorInt,YfloorInt),idxUL,idxOutside) idxUR = tf.where(insideIm(XceilInt,YfloorInt),idxUR,idxOutside) idxBL = tf.where(insideIm(XfloorInt,YceilInt),idxBL,idxOutside) idxBR = tf.where(insideIm(XceilInt,YceilInt),idxBR,idxOutside) # bilinear interpolation Xratio = tf.reshape(Xwarp-Xfloor,[batchSize,H,W,1]) Yratio = tf.reshape(Ywarp-Yfloor,[batchSize,H,W,1]) ImUL = tf.to_float(tf.gather(imageVecOutside,idxUL))*(1-Xratio)*(1-Yratio) ImUR = tf.to_float(tf.gather(imageVecOutside,idxUR))*(Xratio)*(1-Yratio) ImBL = tf.to_float(tf.gather(imageVecOutside,idxBL))*(1-Xratio)*(Yratio) ImBR = tf.to_float(tf.gather(imageVecOutside,idxBR))*(Xratio)*(Yratio) ImWarpBatch = ImUL+ImUR+ImBL+ImBR ImWarpBatch = tf.identity(ImWarpBatch,name=name) return ImWarpBatch
def grey_scale_image(self, x): assert len(x.shape) == 4 assert x.shape[-1].value == 3, 'number of channels must be 3 (i.e. RGB)' ker_init = tf.constant_initializer([[0.114], [0.587], [0.299]]) grey_x = tf.layers.conv2d(x, 1, [1, 1], padding='same', kernel_initializer=ker_init, use_bias=False, trainable=False) return tf.floor(grey_x)
def testStudentTWithAbsDfSoftplusSigma(self): with self.test_session(): df = tf.constant([-3.2, -4.6]) mu = tf.constant([-4.2, 3.4]) sigma = tf.constant([-6.4, -8.8]) student = ds.StudentTWithAbsDfSoftplusSigma(df=df, mu=mu, sigma=sigma) self.assertAllClose(tf.floor(tf.abs(df)).eval(), student.df.eval()) self.assertAllClose(mu.eval(), student.mu.eval()) self.assertAllClose(tf.nn.softplus(sigma).eval(), student.sigma.eval())
def ImageSample(inputs, borderMode='repeat'): """ Sample the template image using the given coordinate, by bilinear interpolation. This was described in the paper: `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_. Args: inputs (list): [template, coords]. template has shape NHWC. coords has shape (N,H',W',2), where each pair of the last dimension is a (y, x) real-value coordinate. borderMode: either "repeat" or "constant" (zero-filled) Returns: tf.Tensor: a tensor named ``output`` of shape (N,H',W',C). """ # TODO borderValue template, mapping = inputs assert template.get_shape().ndims == 4 and mapping.get_shape().ndims == 4 input_shape = template.get_shape().as_list()[1:] assert None not in input_shape, \ "Images in ImageSample layer must have fully-defined shape" assert borderMode in ['repeat', 'constant'] orig_mapping = mapping mapping = tf.maximum(mapping, 0.0) lcoor = tf.floor(mapping) ucoor = lcoor + 1 diff = mapping - lcoor neg_diff = 1.0 - diff # bxh2xw2x2 lcoory, lcoorx = tf.split(lcoor, 2, 3) ucoory, ucoorx = tf.split(ucoor, 2, 3) lyux = tf.concat([lcoory, ucoorx], 3) uylx = tf.concat([ucoory, lcoorx], 3) diffy, diffx = tf.split(diff, 2, 3) neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3) # prod = tf.reduce_prod(diff, 3, keep_dims=True) # diff = tf.Print(diff, [tf.is_finite(tf.reduce_sum(diff)), tf.shape(prod), # tf.reduce_max(diff), diff], summarize=50) ret = tf.add_n([sample(template, lcoor) * neg_diffx * neg_diffy, sample(template, ucoor) * diffx * diffy, sample(template, lyux) * neg_diffy * diffx, sample(template, uylx) * diffy * neg_diffx], name='sampled') if borderMode == 'constant': max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32) mask = tf.greater_equal(orig_mapping, 0.0) mask2 = tf.less_equal(orig_mapping, max_coor) mask = tf.logical_and(mask, mask2) # bxh2xw2x2 mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean mask = tf.expand_dims(mask, 3) ret = ret * tf.cast(mask, tf.float32) return tf.identity(ret, name='output')
def _log_unnormalized_prob(self, x): # The log-probability at negative points is always -inf. # Catch such x's and set the output value accordingly. safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.) y = safe_x * self.log_rate - tf.lgamma(1. + safe_x) is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y)) neg_inf = tf.fill(tf.shape(y), value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype)) return tf.where(is_supported, y, neg_inf)
def dropout_sparse(x, keep_prob, num_nonzero_elems, dtype=tf.float32): """Dropout for sparse tensors. Currently fails for very large sparse tensors (>1M elements) """ noise_shape = [num_nonzero_elems] random_tensor = tf.cast(keep_prob, dtype=dtype) random_tensor += tf.random_uniform(noise_shape, dtype=dtype) dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) pre_out = tf.sparse_retain(x, dropout_mask) return tf.cast(pre_out, dtype) * tf.cast((1./keep_prob), dtype)
def calculate_image(noise_values, phases, shape): val = tf.floor((tf.add_n(tf.split( 2, phases, tf.reshape(noise_values, [shape[0], shape[1], phases]) / tf.pow( 2.0, tf.linspace(0.0, tf.to_float(phases - 1), phases)) )) + 1.0) * 128) return tf.concat(2, [val, val, val])
def _resample_linear(self, inputs, sample_coords): in_size = inputs.shape.as_list() in_spatial_size = in_size[1:-1] in_spatial_rank = infer_spatial_rank(inputs) batch_size = in_size[0] out_spatial_rank = infer_spatial_rank(sample_coords) out_spatial_size = sample_coords.shape.as_list()[1:-1] if in_spatial_rank == 2 and self.boundary == 'ZERO': inputs = tf.transpose(inputs, [0, 2, 1, 3]) return tf.contrib.resampler.resampler(inputs, sample_coords) xy = tf.unstack(sample_coords, axis=-1) base_coords = [tf.floor(coords) for coords in xy] floor_coords = [ tf.cast(self.boundary_func(x, in_spatial_size[idx]), COORDINATES_TYPE) for (idx, x) in enumerate(base_coords)] ceil_coords = [ tf.cast(self.boundary_func(x + 1.0, in_spatial_size[idx]), COORDINATES_TYPE) for (idx, x) in enumerate(base_coords)] if self.boundary == 'ZERO': weight_0 = [tf.expand_dims(x - tf.cast(i, tf.float32), -1) for (x, i) in zip(xy, floor_coords)] weight_1 = [tf.expand_dims(tf.cast(i, tf.float32) - x, -1) for (x, i) in zip(xy, ceil_coords)] else: weight_0 = [tf.expand_dims(x - i, -1) for (x, i) in zip(xy, base_coords)] weight_1 = [1.0 - w for w in weight_0] batch_ids = tf.reshape( tf.range(batch_size), [batch_size] + [1] * out_spatial_rank) batch_ids = tf.tile(batch_ids, [1] + out_spatial_size) sc = (floor_coords, ceil_coords) def get_knot(binary_code): coord = [sc[code][ind] for ind, code in enumerate(binary_code)] coord = tf.stack([batch_ids] + coord, -1) return tf.gather_nd(inputs, coord) def _pyramid_combination(two_samples, w_0, w_1): if len(w_0) == 1: return two_samples[0] * w_1[0] + two_samples[1] * w_0[0] f_0 = _pyramid_combination(two_samples[::2], w_0[:-1], w_1[:-1]) f_1 = _pyramid_combination(two_samples[1::2], w_0[:-1], w_1[:-1]) return f_0 * w_1[-1] + f_1 * w_0[-1] binary_neighbour_ids = [ [int(c) for c in format(i, '0%ib' % in_spatial_rank)] for i in range(2 ** in_spatial_rank)] samples = [get_knot(bc) for bc in binary_neighbour_ids] return _pyramid_combination(samples, weight_0, weight_1)
def _compare(self, x, use_gpu): np_floor, np_ceil = np.floor(x), np.ceil(x) with self.test_session(use_gpu=use_gpu) as sess: inx = tf.convert_to_tensor(x) ofloor, oceil = tf.floor(inx), tf.ceil(inx) tf_floor, tf_ceil = sess.run([ofloor, oceil]) self.assertAllEqual(np_floor, tf_floor) self.assertAllEqual(np_ceil, tf_ceil) self.assertShapeEqual(np_floor, ofloor) self.assertShapeEqual(np_ceil, oceil)
def drop_path(net, keep_prob, is_training=True): """Drops out a whole example hiddenstate with the specified probability.""" if is_training: batch_size = tf.shape(net)[0] noise_shape = [batch_size, 1, 1, 1] random_tensor = keep_prob random_tensor += tf.random_uniform(noise_shape, dtype=tf.float32) binary_tensor = tf.floor(random_tensor) net = tf.div(net, keep_prob) * binary_tensor return net
def _step(self, J, voltage, dt): voltage += tf.nn.relu(J) * dt n_spikes = tf.floor(voltage) voltage -= n_spikes out = n_spikes * self.alpha # we use stop_gradient to avoid propagating any nans (those get # propagated through the cond even if the spiking version isn't # being used at all) return tf.stop_gradient(out), tf.stop_gradient(voltage)
a = tf.Variable(0.01) optimizer = tf.train.GradientDescentOptimizer(a) train = optimizer.minimize(cost) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(1000): sess.run(train, feed_dict={X: x_data, Y: y_data}) if step % 200 == 0: print(step, sess.run(cost, feed_dict={ X: x_data, Y: y_data }), sess.run(W)) correct_prediction = tf.equal(tf.floor(hypothesis + 0.5), Y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print( sess.run([ hypothesis, tf.floor(hypothesis + 0.5), correct_prediction, accuracy ], feed_dict={ X: x_data, Y: y_data })) print("Accuracy: ", accuracy.eval({X: x_data, Y: y_data}))
def test_Floor(self): t = tf.floor(self.random(4, 3) - 0.5) self.check(t)
def transform_ray_v_depths(ray_depths, v_step, lfsize): with tf.variable_scope('transform_ray_v_depths') as scope: b_sz = tf.shape(ray_depths)[0] y_sz = tf.shape(ray_depths)[1] x_sz = tf.shape(ray_depths)[2] v_sz = lfsize[3] # create and reparameterize light field grid b_vals = tf.to_float(tf.range(b_sz)) v_vals = tf.to_float(tf.range(v_sz)) - tf.to_float(v_sz - 1) / 2.0 y_vals = tf.to_float(tf.range(y_sz)) x_vals = tf.to_float(tf.range(x_sz)) b, y, x, v = tf.meshgrid(b_vals, y_vals, x_vals, v_vals, indexing='ij') # warp coordinates by ray depths y_t = y + v_step * ray_depths x_t = x v_t = v - v_step + tf.to_float(v_sz - 1) / 2.0 # indices for linear interpolation b_1 = tf.to_int32(b) y_1 = tf.to_int32(tf.floor(y_t)) y_2 = y_1 + 1 x_1 = tf.to_int32(tf.floor(x_t)) x_2 = x_1 + 1 v_1 = tf.to_int32(v_t) y_1 = tf.clip_by_value(y_1, 0, y_sz - 1) y_2 = tf.clip_by_value(y_2, 0, y_sz - 1) x_1 = tf.clip_by_value(x_1, 0, x_sz - 1) x_2 = tf.clip_by_value(x_2, 0, x_sz - 1) v_1 = tf.clip_by_value(v_1, 0, v_sz - 1) # assemble interpolation indices interp_pts_1 = tf.stack([b_1, y_1, x_1, v_1], -1) interp_pts_2 = tf.stack([b_1, y_2, x_1, v_1], -1) interp_pts_3 = tf.stack([b_1, y_1, x_2, v_1], -1) interp_pts_4 = tf.stack([b_1, y_2, x_2, v_1], -1) # gather light fields to be interpolated lf_1 = tf.gather_nd(ray_depths, interp_pts_1) lf_2 = tf.gather_nd(ray_depths, interp_pts_2) lf_3 = tf.gather_nd(ray_depths, interp_pts_3) lf_4 = tf.gather_nd(ray_depths, interp_pts_4) # calculate interpolation weights y_1_f = tf.to_float(y_1) x_1_f = tf.to_float(x_1) d_y_1 = 1.0 - (y_t - y_1_f) d_y_2 = 1.0 - d_y_1 d_x_1 = 1.0 - (x_t - x_1_f) d_x_2 = 1.0 - d_x_1 w1 = d_y_1 * d_x_1 w2 = d_y_2 * d_x_1 w3 = d_y_1 * d_x_2 w4 = d_y_2 * d_x_2 lf = tf.add_n([w1 * lf_1, w2 * lf_2, w3 * lf_3, w4 * lf_4]) return lf
def dropout(x, pkeep, phase=None, mask=None): mask = tf.floor(pkeep + tf.random_uniform(tf.shape(x))) if mask is None else mask if phase is None: return mask * x else: return switch(phase, mask * x, pkeep * x)
def get_predictions_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map): model = modeling.BertModel(config=self.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False, scope='bert') all_encoder_layers = model.get_all_encoder_layers() mention_doc = model.get_sequence_output() self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) num_sentences = tf.shape(mention_doc)[0] max_sentence_length = tf.shape(mention_doc)[1] mention_doc = self.flatten_emb_by_sentence(mention_doc, input_mask) num_words = util.shape(mention_doc, 0) antecedent_doc = mention_doc flattened_sentence_indices = sentence_map candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( mention_doc, mention_doc, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb, candidate_starts, candidate_ends) candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] # beam size k = tf.minimum( 3900, tf.to_int32( tf.floor( tf.to_float(num_words) * self.config["top_span_ratio"]))) c = tf.minimum(self.config["max_top_antecedents"], k) # pull from beam top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), num_words, True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] genre_emb = tf.gather( tf.get_variable( "genre_embeddings", [len(self.genres), self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), genre) # [emb] if self.config['use_metadata']: speaker_ids = self.flatten_emb_by_sentence(speaker_ids, input_mask) top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]i else: top_span_speaker_ids = None dummy_scores = tf.zeros([k, 1]) # [k, 1] top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1) word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1), [1, seg_len]) flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]), tf.reshape(input_mask, [-1])) mention_segments = tf.expand_dims( tf.gather(flat_word_segments, top_span_starts), 1) # [k, 1] antecedent_segments = tf.gather(flat_word_segments, tf.gather(top_span_starts, top_antecedents)) #[k, c] segment_distance = tf.clip_by_value( mention_segments - antecedent_segments, 0, self.config['max_training_sentences'] - 1) if self.config['use_segment_distance'] else None #[k, c] if self.config['fine_grained']: for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather( top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] else: top_antecedent_scores = top_fast_antecedent_scores top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def make_mask(keep_prob, units): random_tensor = keep_prob # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) return tf.floor(random_tensor) / keep_prob
def execute_floor(self): return tf.floor(self.a, name="floor" + str(self.node_num))
def compile(self, force_var_reuse=False, checkpoint=None, use_trt=False, precision='FP32'): """TensorFlow graph is built here.""" if 'initializer' not in self.params: initializer = None else: init_dict = self.params.get('initializer_params', {}) initializer = self.params['initializer'](**init_dict) if not self.on_horovod: # not using Horovod # below we follow data parallelism for multi-GPU training losses = [] for gpu_cnt, gpu_id in enumerate(self._gpu_ids): with tf.device("/gpu:{}".format(gpu_id)), tf.variable_scope( name_or_scope=tf.get_variable_scope(), # re-using variables across GPUs. reuse=force_var_reuse or (gpu_cnt > 0), initializer=initializer, dtype=self.get_tf_dtype(), ): deco_print("Building graph on GPU:{}".format(gpu_id)) if self._interactive: self.get_data_layer( gpu_cnt).create_interactive_placeholders() else: self.get_data_layer(gpu_cnt).build_graph() input_tensors = self.get_data_layer(gpu_cnt).input_tensors loss, self._outputs[ gpu_cnt] = self.build_forward_pass_graph( input_tensors, gpu_id=gpu_cnt, checkpoint=checkpoint, use_trt=use_trt, precision=precision) if self._outputs[gpu_cnt] is not None and \ not isinstance(self._outputs[gpu_cnt], list): raise ValueError( 'Decoder outputs have to be either None or list') if self._mode == "train" or self._mode == "eval": losses.append(loss) # end of for gpu_ind loop if self._mode == "train": self.loss = tf.reduce_mean(losses) if self._mode == "eval": self.eval_losses = losses else: # is using Horovod # gpu_id should always be zero, since Horovod takes care of isolating # different processes to 1 GPU only with tf.device("/gpu:0"), tf.variable_scope( name_or_scope=tf.get_variable_scope(), reuse=force_var_reuse, initializer=initializer, dtype=self.get_tf_dtype(), ): deco_print("Building graph in Horovod rank: {}".format( self._hvd.rank())) self.get_data_layer().build_graph() input_tensors = self.get_data_layer().input_tensors all_loss, self._output = self._build_forward_pass_graph( input_tensors, gpu_id=0) if isinstance(all_loss, (dict, )): loss = all_loss['loss'] else: loss = all_loss if self._output is not None and not isinstance( self._output, list): raise ValueError( 'Decoder outputs have to be either None or list') if self._mode == "train": self.loss = loss if self._mode == "eval": self.eval_losses = [loss] try: self._num_objects_per_step = [ self._get_num_objects_per_step(worker_id) for worker_id in range(self.num_gpus) ] except NotImplementedError: pass if self._mode == "train": if 'lr_policy' not in self.params: lr_policy = None else: lr_params = self.params.get('lr_policy_params', {}) # adding default decay_steps = max_steps if lr_policy supports it and # different value is not provided func_params = signature(self.params['lr_policy']).parameters if 'decay_steps' in func_params and 'decay_steps' not in lr_params: lr_params['decay_steps'] = self._last_step if 'steps_per_epoch' in func_params and \ 'steps_per_epoch' not in lr_params and 'num_epochs' in self.params: lr_params['steps_per_epoch'] = self.steps_in_epoch lr_policy = lambda gs: self.params['lr_policy'](global_step=gs, **lr_params) if self.params.get('iter_size', 1) > 1: self.skip_update_ph = tf.placeholder(tf.bool) var_list = tf.trainable_variables() freeze_variables_regex = self.params.get('freeze_variables_regex', None) if freeze_variables_regex is not None: pattern = re.compile(freeze_variables_regex) var_list = [ var for var in tf.trainable_variables() if not pattern.match(var.name) ] self.train_op = optimize_loss( loss=tf.cast(self.loss, tf.float32) + get_regularization_loss(), dtype=self.params['dtype'], optimizer=self.params['optimizer'], optimizer_params=self.params.get('optimizer_params', {}), var_list=var_list, clip_gradients=self.params.get('max_grad_norm', None), learning_rate_decay_fn=lr_policy, summaries=self.params.get('summaries', None), larc_params=self.params.get('larc_params', None), loss_scaling=self.params.get('loss_scaling', 1.0), loss_scaling_params=self.params.get('loss_scaling_params', None), on_horovod=self.on_horovod, iter_size=self.params.get('iter_size', 1), skip_update_ph=self.skip_update_ph, model=self) tf.summary.scalar(name="train_loss", tensor=self.loss) if self.steps_in_epoch: tf.summary.scalar( name="epoch", tensor=tf.floor( tf.train.get_global_step() / tf.constant(self.steps_in_epoch, dtype=tf.int64)), ) if not self.on_horovod or self._hvd.rank() == 0: if freeze_variables_regex is not None: deco_print('Complete list of variables:') for var in tf.trainable_variables(): deco_print('{}'.format(var.name), offset=2) deco_print("Trainable variables:") total_params = 0 unknown_shape = False for var in var_list: var_params = 1 deco_print('{}'.format(var.name), offset=2) deco_print('shape: {}, {}'.format(var.get_shape(), var.dtype), offset=4) if var.get_shape(): for dim in var.get_shape(): var_params *= dim.value total_params += var_params else: unknown_shape = True if unknown_shape: deco_print( "Encountered unknown variable shape, can't compute total " "number of parameters.") else: deco_print( 'Total trainable parameters: {}'.format(total_params))
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num + 1, :] label = tf.reshape(label, [-1]) # calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.stack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") # calculate objects tensor [CELL_SIZE, CELL_SIZE] # calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.stack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") # objects = response # calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): # nilboy base_boxes[y, x, :] = [ self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) # calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) # calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] # calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) # sqrt_w = tf.abs(label[2]) # sqrt_h = tf.abs(label[3]) # calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] # p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) # p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) # p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) # p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) # p_sqrt_w = predict_boxes[:, :, :, 2] # p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) # calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) # calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] # class_loss class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale # class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale # object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale # object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale # noobject_loss # noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale # coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
def sample(probs): """ random sample of 0s and 1s based on probs """ import tensorflow as tf return tf.floor(probs + tf.random.uniform(tf.shape(probs), 0, 1))
def compute_grid_positions(boxes, boundaries, output_size, sample_offset): """Compute the grid position w.r.t. the corresponding feature map. Args: boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the information of each box w.r.t. the corresponding feature map. boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) in terms of the number of pixels of the corresponding feature map size. boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing the boundary (in (y, x)) of the corresponding feature map for each box. Any resampled grid points that go beyond the bounary will be clipped. output_size: a scalar indicating the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. Returns: kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1]. kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1]. box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2] box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2] """ batch_size, num_boxes, _ = boxes.get_shape().as_list() box_grid_x = [] box_grid_y = [] for i in range(output_size): box_grid_x.append(boxes[:, :, 1] + (i + sample_offset) * boxes[:, :, 3] / output_size) box_grid_y.append(boxes[:, :, 0] + (i + sample_offset) * boxes[:, :, 2] / output_size) box_grid_x = tf.stack(box_grid_x, axis=2) box_grid_y = tf.stack(box_grid_y, axis=2) box_grid_y0 = tf.floor(box_grid_y) box_grid_x0 = tf.floor(box_grid_x) box_grid_x0 = tf.maximum(0., box_grid_x0) box_grid_y0 = tf.maximum(0., box_grid_y0) box_grid_x0 = tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)) box_grid_x1 = tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1)) box_grid_y0 = tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)) box_grid_y1 = tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1)) box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1) box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1) # The RoIAlign feature f can be computed by bilinear interpolation of four # neighboring feature points f0, f1, f2, and f3. # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 ly = box_grid_y - box_grid_y0 lx = box_grid_x - box_grid_x0 hy = 1.0 - ly hx = 1.0 - lx kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]) kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]) return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
def __call__(self, inputs, state, scope=None): """Run one step of ZoneoutLSTMCell. Args: inputs: input Tensor, 2D, `[batch, num_units]. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. Returns: A tuple containing: - A `2-D, [batch, output_dim]`, Tensor representing the output of the ZoneoutLSTMCell after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of ZoneoutLSTMCell after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, h_prev) = state else: c_prev = tf.slice(state, [0, 0], [-1, self._num_units]) h_prev = tf.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = _linear([inputs, h_prev], 4 * self._num_units, True) i, j, f, o = array_ops.split(lstm_matrix, 4, axis=1) # diagonal connections dtype = inputs.dtype if self._use_peepholes: w_f_diag = tf.get_variable("W_F_diag", shape=[self._num_units], dtype=dtype) w_i_diag = tf.get_variable("W_I_diag", shape=[self._num_units], dtype=dtype) w_o_diag = tf.get_variable("W_O_diag", shape=[self._num_units], dtype=dtype) with tf.name_scope("zoneout"): # binary mask tensor for cell keep_prob_cell = tf.convert_to_tensor(self.zoneout_prob_cell, dtype=c_prev.dtype) random_tensor_cell = keep_prob_cell random_tensor_cell += tf.random_uniform(tf.shape(c_prev), seed=None, dtype=c_prev.dtype) binary_mask_cell = tf.floor(random_tensor_cell) binary_mask_cell_complement = tf.ones( tf.shape(c_prev)) - binary_mask_cell # make binary mask tensor for output keep_prob_output = tf.convert_to_tensor(self.zoneout_prob_output, dtype=h_prev.dtype) random_tensor_output = keep_prob_output random_tensor_output += tf.random_uniform(tf.shape(h_prev), seed=None, dtype=h_prev.dtype) binary_mask_output = tf.floor(random_tensor_output) binary_mask_output_complement = tf.ones( tf.shape(h_prev)) - binary_mask_output # apply zoneout for cell if self._use_peepholes: c_temp = c_prev * tf.sigmoid(f + self._forget_bias + w_f_diag * c_prev) + \ tf.sigmoid(i + w_i_diag * c_prev) * self._activation(j) if self.is_training and self.zoneout_prob_cell > 0.0: c = binary_mask_cell * c_prev + binary_mask_cell_complement * c_temp else: # like dropout, use expectation in inference c = keep_prob_cell * c_prev + (1 - keep_prob_cell) * c_temp else: c_temp = c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid( i) * self._activation(j) if self.is_training and self.zoneout_prob_output > 0.0: c = binary_mask_cell * c_prev + binary_mask_cell_complement * c_temp else: # like dropout, use expectation in inference c = keep_prob_cell * c_prev + (1 - keep_prob_cell) * c_temp if self._cell_clip: c = tf.clip_by_value(c, -self._cell_clip, self._cell_clip) # apply zoneout for output if self._use_peepholes: h_temp = tf.sigmoid(o + w_o_diag * c) * self._activation(c) if self.is_training and self.zoneout_prob_output > 0.0: h = binary_mask_output * h_prev + binary_mask_output_complement * h_temp else: # as dropout, use expectation in inference h = keep_prob_output * h_prev + (1 - keep_prob_output) * h_temp else: h_temp = tf.sigmoid(o) * self._activation(c) if self.is_training and self.zoneout_prob_output > 0.0: h = binary_mask_output * h_prev + binary_mask_output_complement * h_temp else: # as dropout, use expectation in inference h = keep_prob_output * h_prev + (1 - keep_prob_output) * h_temp # apply prejection if self._num_proj is not None: w_proj = tf.get_variable("W_P", [self.num_units, num_proj], dtype=dtype) h = tf.matmul(h, w_proj) if self._proj_clip is not None: h = tf.clip_by_value(h, -self._proj_clip, self._proj_clip) new_state = (tf.contrib.rnn.LSTMStateTuple(c, h) if self._state_is_tuple else tf.concat([c, h], axis=1)) return h, new_state
def _interpolate(im, x, y, out_size): with tf.compat.v1.variable_scope('_interpolate'): # constants num_batch = tf.shape(input=im)[0] height = tf.shape(input=im)[1] width = tf.shape(input=im)[2] channels = tf.shape(input=im)[3] x = tf.cast(x, 'float32') y = tf.cast(y, 'float32') height_f = tf.cast(height, 'float32') width_f = tf.cast(width, 'float32') out_height = out_size[0] out_width = out_size[1] zero = tf.zeros([], dtype='int32') max_y = tf.cast(tf.shape(input=im)[1] - 1, 'int32') max_x = tf.cast(tf.shape(input=im)[2] - 1, 'int32') # scale indices from [-1, 1] to [0, width/height] x = (x + 1.0) * (width_f) / 2.0 y = (y + 1.0) * (height_f) / 2.0 # do sampling x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) dim2 = width dim1 = width * height base = _repeat(tf.range(num_batch) * dim1, out_height * out_width) base_y0 = base + y0 * dim2 base_y1 = base + y1 * dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels in the flat image and restore # channels dim im_flat = tf.reshape(im, tf.stack([-1, channels])) im_flat = tf.cast(im_flat, 'float32') Ia = tf.gather(im_flat, idx_a) Ib = tf.gather(im_flat, idx_b) Ic = tf.gather(im_flat, idx_c) Id = tf.gather(im_flat, idx_d) # and finally calculate interpolated values x0_f = tf.cast(x0, 'float32') x1_f = tf.cast(x1, 'float32') y0_f = tf.cast(y0, 'float32') y1_f = tf.cast(y1, 'float32') wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1) wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1) wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1) output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) return output
# Weights W1 = tf.Variable(tf.random_uniform([n_input, n_hidden], -1.0, 1.0)) W2 = tf.Variable(tf.random_uniform([n_hidden, n_output], -1.0, 1.0)) # Bias b1 = tf.Variable(tf.zeros([n_hidden])) b2 = tf.Variable(tf.zeros([n_output])) L2 = tf.sigmoid(tf.matmul(X, W1) + b1) hy = tf.sigmoid(tf.matmul(L2, W2) + b2) cost = tf.reduce_mean(-Y*tf.log(hy) - (1-Y) * tf.log(1-hy)) optimizer = tf.train.GradientDescentOptimizer(lr).minimize(cost) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(epochs): _, c = sess.run([optimizer, cost], feed_dict = {X: x_data, Y: y_data}) if step % display_step == 0: print("Cost: ", c) answer = tf.equal(tf.floor(hy + 0.1), Y) accuracy = tf.reduce_mean(tf.cast(answer, "float")) print(sess.run([hy], feed_dict = {X: x_data, Y: y_data})) print("Accuracy: ", accuracy.eval({X: x_data, Y: y_data}))
def _bilinear_interpolate_3D(self, input_feature, x, y, z, name): with tf.variable_scope(name + "/_bilinear_interpolate"): # flatten to 1D x = tf.reshape(x, [-1]) y = tf.reshape(y, [-1]) z = tf.reshape(z, [-1]) #[N*3W*3H*3D] # data type convertion x = tf.cast(x, "float32") y = tf.cast(y, "float32") z = tf.cast(z, "float32") zero = tf.zeros([], dtype="int32") max_x = tf.cast(self.width - 1, "int32") max_y = tf.cast(self.height - 1, "int32") max_z = tf.cast(self.depth - 1, "int32") # find 8 grid locations x0 = tf.cast(tf.floor(x), "int32") x1 = x0 + 1 y0 = tf.cast(tf.floor(y), "int32") y1 = y0 + 1 z0 = tf.cast(tf.floor(z), "int32") z1 = z0 + 1 # clip out coordinates exceeding feature map volume以外的点 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) z0 = tf.clip_by_value(z0, zero, max_z) z1 = tf.clip_by_value(z1, zero, max_z) #[N*3H*3W*3D] # convert input_feature and coordinate X, Y to 3D,for gathering input_feature_flat = tf.reshape(input_feature, tf.stack([-1, self.num_channels ])) #[N*H*W*D,C] dimension_3 = self.depth dimension_2 = self.depth * self.width dimension_1 = self.depth * self.width * self.height base = tf.range(self.num_batch) * dimension_1 repeat = tf.transpose( tf.expand_dims( tf.ones(shape=(tf.stack([ self.num_points * self.height * self.width * self.depth, ]))), 1), [1, 0]) #[1,H*W*D*27] repeat = tf.cast(repeat, "int32") #[1,H*W*D*27] base = tf.matmul(tf.reshape(base, (-1, 1)), repeat) # [N,1] * [1,H*W*D*27] ==> [N,H*W*D*27] base = tf.reshape(base, [-1]) #[H*W*D*27] base_x0 = base + x0 * dimension_3 base_x1 = base + x1 * dimension_3 base_y0 = base + y0 * dimension_2 base_y1 = base + y1 * dimension_2 #top rectangle of the neighbourhood volume index_a0 = base_y0 + base_x0 - base + z0 index_b0 = base_y0 + base_x1 - base + z0 index_c0 = base_y0 + base_x0 - base + z1 index_d0 = base_y0 + base_x1 - base + z1 #[N*3H*3W*3D] #bottom rectangle of the neighbourhood volume index_a1 = base_y1 + base_x0 - base + z0 index_b1 = base_y1 + base_x1 - base + z0 index_c1 = base_y1 + base_x0 - base + z1 index_d1 = base_y1 + base_x1 - base + z1 #[N*3H*3W*3D] # get 8 grid values ([N*H*W*D,C], [N*H*W*D*27]) value_a0 = tf.gather(input_feature_flat, index_a0) value_b0 = tf.gather(input_feature_flat, index_b0) value_c0 = tf.gather(input_feature_flat, index_c0) value_d0 = tf.gather(input_feature_flat, index_d0) #[N*3H*3W*3D, C] value_a1 = tf.gather(input_feature_flat, index_a1) value_b1 = tf.gather(input_feature_flat, index_b1) value_c1 = tf.gather(input_feature_flat, index_c1) value_d1 = tf.gather(input_feature_flat, index_d1) #[N*3H*3W*3D, C] # calculate 8 volumes : need to be diagonal volume for corresponding point x0_float = tf.cast(x0, "float32") x1_float = tf.cast(x1, "float32") y0_float = tf.cast(y0, "float32") y1_float = tf.cast(y1, "float32") z0_float = tf.cast(z0, "float32") z1_float = tf.cast(z1, "float32") vol_a0 = tf.expand_dims( ((x1_float - x) * (y1_float - y) * (z1_float - z)), 1) vol_b0 = tf.expand_dims( ((x - x0_float) * (y1_float - y) * (z1_float - z)), 1) vol_c0 = tf.expand_dims( ((x1_float - x) * (y1_float - y) * (z - z0_float)), 1) vol_d0 = tf.expand_dims( ((x - x0_float) * (y1_float - y) * (z - z0_float)), 1) vol_a1 = tf.expand_dims( ((x1_float - x) * (y - y0_float) * (z1_float - z)), 1) vol_b1 = tf.expand_dims( ((x - x0_float) * (y - y0_float) * (z1_float - z)), 1) vol_c1 = tf.expand_dims( ((x1_float - x) * (y - y0_float) * (z - z0_float)), 1) vol_d1 = tf.expand_dims( ((x - x0_float) * (y - y0_float) * (z - z0_float)), 1) #[N*3H*3W*3D, 1] ######################## outputs = tf.add_n([ value_a0 * vol_a0, value_b0 * vol_b0, value_c0 * vol_c0, value_d0 * vol_d0, value_a1 * vol_a1, value_b1 * vol_b1, value_c1 * vol_c1, value_d1 * vol_d1 ]) outputs = tf.reshape(outputs, [ self.num_batch, self.kernel_size[0] * self.height, self.kernel_size[1] * self.width, self.kernel_size[2] * self.depth, self.num_channels ]) return outputs #[N,3W,3H,3D,C]
def _randomly_negate_tensor(tensor): """With 50% prob turn the tensor negative.""" should_flip = tf.cast(tf.floor(tf.random.uniform([]) + 0.5), tf.bool) final_tensor = tf.cond(should_flip, lambda: tensor, lambda: -tensor) return final_tensor
def _randint(self, shape: tf.TensorShape, min: int, max: int): uniform = self.rand_generator.uniform(shape=shape, minval=min, maxval=max) return tf.cast(tf.floor(uniform), tf.int32)
def ce_loss(logits, labels, mask=None, top_k_percentage=None, deterministic=False): """Computes the cross-entropy loss. Optionally a mask and a top-k percentage for the used pixels can be specified. The top-k mask can be produced deterministically or sampled. Args: logits: A tensor of shape (b,h,w,num_classes) labels: A tensor of shape (b,h,w,num_classes) mask: None or a tensor of shape (b,h,w). top_k_percentage: None or a float in (0.,1.]. If None, a standard cross-entropy loss is calculated. deterministic: A Boolean indicating whether or not to produce the prospective top-k mask deterministically. Returns: A dictionary holding the mean and the pixelwise sum of the loss for the batch as well as the employed loss mask. """ num_classes = logits.shape.as_list()[-1] y_flat = tf.reshape(logits, (-1, num_classes), name='reshape_y') t_flat = tf.reshape(labels, (-1, num_classes), name='reshape_t') if mask is None: mask = tf.ones(shape=(t_flat.shape.as_list()[0], )) else: assert mask.shape.as_list()[:3] == labels.shape.as_list()[:3],\ 'The loss mask shape differs from the target shape: {} vs. {}.'.format( mask.shape.as_list(), labels.shape.as_list()[:3]) mask = tf.reshape(mask, (-1, ), name='reshape_mask') n_pixels_in_batch = y_flat.shape.as_list()[0] xe = tf.nn.softmax_cross_entropy_with_logits_v2(labels=t_flat, logits=y_flat) if top_k_percentage is not None: assert 0.0 < top_k_percentage <= 1.0 k_pixels = tf.cast(tf.floor(n_pixels_in_batch * top_k_percentage), tf.int32) stopgrad_xe = tf.stop_gradient(xe) norm_xe = stopgrad_xe / tf.reduce_sum(stopgrad_xe) if deterministic: score = tf.log(norm_xe) else: # Use the Gumbel trick to sample the top-k pixels, equivalent to sampling # from a categorical distribution over pixels whose probabilities are # given by the normalized cross-entropy loss values. This is done by # adding Gumbel noise to the logarithmic normalized cross-entropy loss # (followed by choosing the top-k pixels). score = tf.log(norm_xe) + _sample_gumbel(norm_xe.shape.as_list()) score = score + tf.log(mask) top_k_mask = _topk_mask(score, k_pixels) mask = mask * top_k_mask # Calculate batch-averages for the sum and mean of the loss batch_size = labels.shape.as_list()[0] xe = tf.reshape(xe, shape=(batch_size, -1)) mask = tf.reshape(mask, shape=(batch_size, -1)) ce_sum_per_instance = tf.reduce_sum(mask * xe, axis=1) ce_sum = tf.reduce_mean(ce_sum_per_instance, axis=0) ce_mean = tf.reduce_sum(mask * xe) / tf.reduce_sum(mask) return {'mean': ce_mean, 'sum': ce_sum, 'mask': mask}
def data_loader(FLAGS): with tf.device('/cpu:0'): # Define the returned data batches Data = collections.namedtuple( 'Data', 'paths_LR, paths_HR, inputs, targets, image_count, steps_per_epoch' ) #Check the input directory if (FLAGS.input_dir_LR == 'None') or (FLAGS.input_dir_HR == 'None'): raise ValueError('Input directory is not provided') if (not os.path.exists(FLAGS.input_dir_LR)) or (not os.path.exists( FLAGS.input_dir_HR)): raise ValueError('Input directory not found') image_list_LR = os.listdir(FLAGS.input_dir_LR) image_list_LR = [_ for _ in image_list_LR if _.endswith('.png')] if len(image_list_LR) == 0: raise Exception('No png files in the input directory') image_list_LR_temp = sorted(image_list_LR) image_list_LR = [ os.path.join(FLAGS.input_dir_LR, _) for _ in image_list_LR_temp ] image_list_HR = [ os.path.join(FLAGS.input_dir_HR, _) for _ in image_list_LR_temp ] image_list_LR_tensor = tf.convert_to_tensor(image_list_LR, dtype=tf.string) image_list_HR_tensor = tf.convert_to_tensor(image_list_HR, dtype=tf.string) with tf.variable_scope('load_image'): # define the image list queue # image_list_LR_queue = tf.train.string_input_producer(image_list_LR, shuffle=False, capacity=FLAGS.name_queue_capacity) # image_list_HR_queue = tf.train.string_input_producer(image_list_HR, shuffle=False, capacity=FLAGS.name_queue_capacity) #print('[Queue] image list queue use shuffle: %s'%(FLAGS.mode == 'Train')) output = tf.train.slice_input_producer( [image_list_LR_tensor, image_list_HR_tensor], shuffle=False, capacity=FLAGS.name_queue_capacity) # Reading and decode the images reader = tf.WholeFileReader(name='image_reader') image_LR = tf.read_file(output[0]) image_HR = tf.read_file(output[1]) input_image_LR = tf.image.decode_png(image_LR, channels=3) input_image_HR = tf.image.decode_png(image_HR, channels=3) input_image_LR = tf.image.convert_image_dtype(input_image_LR, dtype=tf.float32) input_image_HR = tf.image.convert_image_dtype(input_image_HR, dtype=tf.float32) assertion = tf.assert_equal( tf.shape(input_image_LR)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): input_image_LR = tf.identity(input_image_LR) input_image_HR = tf.identity(input_image_HR) # Normalize the low resolution image to [0, 1], high resolution to [-1, 1] a_image = preprocessLR(input_image_LR) b_image = preprocess(input_image_HR) inputs, targets = [a_image, b_image] # The data augmentation part with tf.name_scope('data_preprocessing'): with tf.name_scope('random_crop'): # Check whether perform crop if (FLAGS.random_crop is True) and FLAGS.mode == 'train': print('[Config] Use random crop') # Set the shape of the input image. the target will have 4X size input_size = tf.shape(inputs) target_size = tf.shape(targets) offset_w = tf.cast(tf.floor( tf.random_uniform([], 0, tf.cast(input_size[1], tf.float32) - FLAGS.crop_size)), dtype=tf.int32) offset_h = tf.cast(tf.floor( tf.random_uniform([], 0, tf.cast(input_size[0], tf.float32) - FLAGS.crop_size)), dtype=tf.int32) if FLAGS.task == 'SRGAN' or FLAGS.task == 'SRResnet': inputs = tf.image.crop_to_bounding_box( inputs, offset_h, offset_w, FLAGS.crop_size, FLAGS.crop_size) targets = tf.image.crop_to_bounding_box( targets, offset_h * 4, offset_w * 4, FLAGS.crop_size * 4, FLAGS.crop_size * 4) elif FLAGS.task == 'denoise': inputs = tf.image.crop_to_bounding_box( inputs, offset_h, offset_w, FLAGS.crop_size, FLAGS.crop_size) targets = tf.image.crop_to_bounding_box( targets, offset_h, offset_w, FLAGS.crop_size, FLAGS.crop_size) # Do not perform crop else: inputs = tf.identity(inputs) targets = tf.identity(targets) with tf.variable_scope('random_flip'): # Check for random flip: if (FLAGS.flip is True) and (FLAGS.mode == 'train'): print('[Config] Use random flip') # Produce the decision of random flip decision = tf.random_uniform([], 0, 1, dtype=tf.float32) input_images = random_flip(inputs, decision) target_images = random_flip(targets, decision) else: input_images = tf.identity(inputs) target_images = tf.identity(targets) if FLAGS.task == 'SRGAN' or FLAGS.task == 'SRResnet': input_images.set_shape([FLAGS.crop_size, FLAGS.crop_size, 3]) target_images.set_shape( [FLAGS.crop_size * 4, FLAGS.crop_size * 4, 3]) elif FLAGS.task == 'denoise': input_images.set_shape([FLAGS.crop_size, FLAGS.crop_size, 3]) target_images.set_shape([FLAGS.crop_size, FLAGS.crop_size, 3]) if FLAGS.mode == 'train': paths_LR_batch, paths_HR_batch, inputs_batch, targets_batch = tf.train.shuffle_batch( [output[0], output[1], input_images, target_images], batch_size=FLAGS.batch_size, capacity=FLAGS.image_queue_capacity + 4 * FLAGS.batch_size, min_after_dequeue=FLAGS.image_queue_capacity, num_threads=FLAGS.queue_thread) else: paths_LR_batch, paths_HR_batch, inputs_batch, targets_batch = tf.train.batch( [output[0], output[1], input_images, target_images], batch_size=FLAGS.batch_size, num_threads=FLAGS.queue_thread, allow_smaller_final_batch=True) steps_per_epoch = int(math.ceil(len(image_list_LR) / FLAGS.batch_size)) if FLAGS.task == 'SRGAN' or FLAGS.task == 'SRResnet': inputs_batch.set_shape( [FLAGS.batch_size, FLAGS.crop_size, FLAGS.crop_size, 3]) targets_batch.set_shape([ FLAGS.batch_size, FLAGS.crop_size * 4, FLAGS.crop_size * 4, 3 ]) elif FLAGS.task == 'denoise': inputs_batch.set_shape( [FLAGS.batch_size, FLAGS.crop_size, FLAGS.crop_size, 3]) targets_batch.set_shape( [FLAGS.batch_size, FLAGS.crop_size, FLAGS.crop_size, 3]) return Data(paths_LR=paths_LR_batch, paths_HR=paths_HR_batch, inputs=inputs_batch, targets=targets_batch, image_count=len(image_list_LR), steps_per_epoch=steps_per_epoch)
def _tf_half_up(data: tf.Tensor, name: str = 'half_up_rounding') -> tf.Tensor: with tf.name_scope(name): return tf.floor(data + tf.constant(0.5, dtype=data.dtype))
def yolo_loss(_y_true, _y_pred): _anchor = [float(_an.strip()) for _an in _anchors.split(',')] _anchor = np.reshape(_anchor, [1, 1, 1, 5, 2]) _pred_box_xy = tf.sigmoid(_y_pred[:, :, :, :, :2]) _pred_box_wh = tf.exp(_y_pred[:, :, :, :, 2:4]) * _anchor _pred_box_wh = tf.sqrt( _pred_box_wh / np.reshape([float(_grid_w), float(_grid_h)], [1, 1, 1, 1, 2])) _pred_box_conf = tf.expand_dims(tf.sigmoid(_y_pred[:, :, :, :, 4]), -1) _pred_box_prob = tf.nn.softmax(_y_pred[:, :, :, :, 5:]) # _pred_box_prob = tf.expand_dims(tf.sigmoid(_y_pred[:,:,:,:,5]), -1) _y_pred = tf.concat( [_pred_box_xy, _pred_box_wh, _pred_box_conf, _pred_box_prob], 4) print("Y_pred shape: {}".format(_y_pred.shape)) _center_xy = .5 * (_y_true[:, :, :, :, 0:2] + _y_true[:, :, :, :, 2:4]) _center_xy = _center_xy / np.reshape([(float(_norm_w) / _grid_w), (float(_norm_h) / _grid_h)], [1, 1, 1, 1, 2]) _true_box_xy = _center_xy - tf.floor(_center_xy) _true_box_wh = (_y_true[:, :, :, :, 2:4] - _y_true[:, :, :, :, 0:2]) _true_box_wh = tf.sqrt( _true_box_wh / np.reshape([float(_norm_w), float(_norm_h)], [1, 1, 1, 1, 2])) _pred_tem_wh = tf.pow(_pred_box_wh, 2) * np.reshape([_grid_w, _grid_h], [1, 1, 1, 1, 2]) _pred_box_area = _pred_tem_wh[:, :, :, :, 0] * _pred_tem_wh[:, :, :, :, 1] _pred_box_ul = _pred_box_xy - 0.5 * _pred_tem_wh _pred_box_br = _pred_box_xy + 0.5 * _pred_tem_wh _true_tem_wh = tf.pow(_true_box_wh, 2) * np.reshape([_grid_w, _grid_h], [1, 1, 1, 1, 2]) _true_box_area = _true_tem_wh[:, :, :, :, 0] * _true_tem_wh[:, :, :, :, 1] _true_box_ul = _true_box_xy - 0.5 * _true_tem_wh _true_box_br = _true_box_xy + 0.5 * _true_tem_wh _intersect_ul = tf.maximum(_pred_box_ul, _true_box_ul) _intersect_br = tf.minimum(_pred_box_br, _true_box_br) _intersect_wh = _intersect_br - _intersect_ul _intersect_wh = tf.maximum(_intersect_wh, 0.0) _intersect_area = _intersect_wh[:, :, :, :, 0] * _intersect_wh[:, :, :, :, 1] _iou = tf.truediv(_intersect_area, _true_box_area + _pred_box_area - _intersect_area) print("iou shape: {}".format(_iou.shape)) # https://blog.csdn.net/dmy88888/article/details/81144835 _reduce_max = tf.reduce_max(_iou, [3], True) print("reduce_max shape: {}".format(_reduce_max.shape)) _best_box = tf.equal(_iou, _reduce_max) _best_box = tf.to_float(_best_box) print("best_box shape{}".format(_best_box.shape)) _true_box_conf = tf.expand_dims(_best_box * _y_true[:, :, :, :, 4], -1) _true_box_prob = _y_true[:, :, :, :, 5:] _y_true = tf.concat( [_true_box_xy, _true_box_wh, _true_box_conf, _true_box_prob], 4) print("Y_true shape: {}".format(_y_true.shape)) # https://github.com/magee256/yolo_v2/blob/master/training/yolo_loss.py # https://medium.com/@jonathan_hui/real-time-object-detection-with-yolo-yolov2-28b1b93e2088 # https://trungthanhnguyen0502.github.io/computer%20vision/2018/12/10/yolo_tutorial-2-yolo2-algorithms/ _weight_coor = tf.concat(4 * [_true_box_conf], 4) _weight_coor = _scale_coor * _weight_coor _weight_conf = _scale_nood * ( 1. - _true_box_conf) + _scale_conf * _true_box_conf _weight_prob = tf.concat(_class * [_true_box_conf], 4) _weight_prob = _scale_prob * _weight_prob _weight = tf.concat([_weight_coor, _weight_conf, _weight_prob], 4) _loss = tf.pow(_y_pred - _y_true, 2) _loss = _loss * _weight _loss = tf.reshape(_loss, [-1, _grid_h * _grid_w * _box * (4 + 1 + _class)]) _loss = tf.reduce_sum(_loss, 1) _loss = .5 * tf.reduce_mean(_loss) return _loss
def salmap_u_rendering(salmap_u_lens, ray_u_depths, lfsize): with tf.variable_scope('salmap_u_rendering') as scope: b_sz = tf.shape(salmap_u_lens)[0] y_sz = tf.shape(salmap_u_lens)[1] x_sz = tf.shape(salmap_u_lens)[2] u_sz = lfsize[2] # create and reparameterize light field grid b_vals = tf.to_float(tf.range(b_sz)) u_vals = -1 * (tf.to_float(tf.range(u_sz)) - tf.to_float(u_sz - 1) / 2.0) y_vals = tf.to_float(tf.range(y_sz)) x_vals = tf.to_float(tf.range(x_sz)) b, y, x, u = tf.meshgrid(b_vals, y_vals, x_vals, u_vals, indexing='ij') # warp coordinates by ray depths y_t = y x_t = x - u * ray_u_depths u_r = -1 * u + tf.to_float(u_sz - 1) / 2.0 # indices for linear interpolation b_1 = tf.to_int32(b) y_1 = tf.to_int32(tf.floor(y_t)) y_2 = y_1 + 1 x_1 = tf.to_int32(tf.floor(x_t)) x_2 = x_1 + 1 u_1 = tf.to_int32(u_r) y_1 = tf.clip_by_value(y_1, 0, y_sz - 1) y_2 = tf.clip_by_value(y_2, 0, y_sz - 1) x_1 = tf.clip_by_value(x_1, 0, x_sz - 1) x_2 = tf.clip_by_value(x_2, 0, x_sz - 1) # assemble interpolation indices interp_pts_1 = tf.stack([b_1, y_1, x_1, u_1], -1) interp_pts_2 = tf.stack([b_1, y_2, x_1, u_1], -1) interp_pts_3 = tf.stack([b_1, y_1, x_2, u_1], -1) interp_pts_4 = tf.stack([b_1, y_2, x_2, u_1], -1) # gather light fields to be interpolated lf_1 = tf.gather_nd(salmap_u_lens, interp_pts_1) lf_2 = tf.gather_nd(salmap_u_lens, interp_pts_2) lf_3 = tf.gather_nd(salmap_u_lens, interp_pts_3) lf_4 = tf.gather_nd(salmap_u_lens, interp_pts_4) # calculate interpolation weights y_1_f = tf.to_float(y_1) x_1_f = tf.to_float(x_1) d_y_1 = 1.0 - (y_t - y_1_f) d_y_2 = 1.0 - d_y_1 d_x_1 = 1.0 - (x_t - x_1_f) d_x_2 = 1.0 - d_x_1 w1 = d_y_1 * d_x_1 w2 = d_y_2 * d_x_1 w3 = d_y_1 * d_x_2 w4 = d_y_2 * d_x_2 lf = tf.add_n([w1 * lf_1, w2 * lf_2, w3 * lf_3, w4 * lf_4]) return lf
def sample(probs): #Takes in a vector of probabilities, and returns a random vector of 0s and 1s sampled from the input vector return tf.floor(probs + tf.random_uniform(tf.shape(probs), 0, 1))
def _should_apply(prob): """Helper function to create bool tensor with probability""" return tf.cast(tf.floor(tf.random_uniform([], dtype=tf.float32) + prob), tf.bool)
def get_neighbours(inputs, height): outputs0 = tf.floor(inputs) outputs1 = outputs0 + 1 outputs0 = tf.clip_by_value(outputs0, 0, height - 1) outputs1 = tf.clip_by_value(outputs1, 0, height - 1) return outputs0, outputs1
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ filtered_labels = tf.boolean_mask(labels, tf.cast(labels[:, 4], dtype=tf.bool)) label = filtered_labels[num:num + 1, :] label = tf.reshape(label, [-1]) # label = tf.Print(label, [label], "\n\nLABEL: ", summarize=5) # calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - (label[2] / 2)) * self.cell_size max_x = (label[0] + (label[2] / 2)) * self.cell_size min_y = (label[1] - (label[3] / 2)) * self.cell_size max_y = (label[1] + (label[3] / 2)) * self.cell_size # due to rouding error the bounding box can slightly leave the picture, # which might result in index out of bounds, so clip it min_x = tf.clip_by_value(tf.floor(min_x), 0, self.cell_size - 1) min_y = tf.clip_by_value(tf.floor(min_y), 0, self.cell_size - 1) max_x = tf.clip_by_value(tf.ceil(max_x), 0, self.cell_size - 1) max_y = tf.clip_by_value(tf.ceil(max_y), 0, self.cell_size - 1) temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32) # temp = tf.Print(temp, [min_x, min_y, max_x, max_y], "\n\nMaximums: ", summarize=4) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.stack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) # temp = tf.Print(temp, [temp], "\n\nPadding for objects: ", summarize=4) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") # calculate objects tensor [CELL_SIZE, CELL_SIZE] # calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] * self.cell_size center_x = tf.floor(center_x) center_y = label[1] * self.cell_size center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.stack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") # objects = response # calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ 1 / self.cell_size, 1 / self.cell_size, 1, 1 ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): base_boxes[x, y, :] = [ x / self.cell_size, y / self.cell_size, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) # calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) # calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] # calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) # calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) # calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) # calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def _bilinear_sampler(im, x, y, name='blinear_sampler'): """Perform bilinear sampling on im given list of x, y coordinates. Implements the differentiable sampling mechanism with bilinear kernel in https://arxiv.org/abs/1506.02025. x,y are tensors specifying normalized coordinates [-1, 1] to be sampled on im. For example, (-1, -1) in (x, y) corresponds to pixel location (0, 0) in im, and (1, 1) in (x, y) corresponds to the bottom right pixel in im. Args: im: Batch of images with shape [B, h, w, channels]. x: Tensor of normalized x coordinates in [-1, 1], with shape [B, h, w, 1]. y: Tensor of normalized y coordinates in [-1, 1], with shape [B, h, w, 1]. name: Name scope for ops. Returns: Sampled image with shape [B, h, w, channels]. Principled mask with shape [B, h, w, 1], dtype:float32. A value of 1.0 in the mask indicates that the corresponding coordinate in the sampled image is valid. """ with tf.variable_scope(name): x = tf.reshape(x, [-1]) y = tf.reshape(y, [-1]) # Constants. batch_size = tf.shape(im)[0] _, height, width, channels = im.get_shape().as_list() x = tf.to_float(x) y = tf.to_float(y) height_f = tf.cast(height, 'float32') width_f = tf.cast(width, 'float32') zero = tf.constant(0, dtype=tf.int32) max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') # Scale indices from [-1, 1] to [0, width - 1] or [0, height - 1]. x = (x + 1.0) * (width_f - 1.0) / 2.0 y = (y + 1.0) * (height_f - 1.0) / 2.0 # Compute the coordinates of the 4 pixels to sample from. x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 mask = tf.logical_and(tf.logical_and(x0 >= zero, x1 <= max_x), tf.logical_and(y0 >= zero, y1 <= max_y)) mask = tf.to_float(mask) x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) dim2 = width dim1 = width * height # Create base index. base = tf.range(batch_size) * dim1 base = tf.reshape(base, [-1, 1]) base = tf.tile(base, [1, height * width]) base = tf.reshape(base, [-1]) base_y0 = base + y0 * dim2 base_y1 = base + y1 * dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # Use indices to lookup pixels in the flat image and restore channels dim. im_flat = tf.reshape(im, tf.stack([-1, channels])) im_flat = tf.to_float(im_flat) pixel_a = tf.gather(im_flat, idx_a) pixel_b = tf.gather(im_flat, idx_b) pixel_c = tf.gather(im_flat, idx_c) pixel_d = tf.gather(im_flat, idx_d) x1_f = tf.to_float(x1) y1_f = tf.to_float(y1) # And finally calculate interpolated values. wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) output = tf.add_n( [wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) output = tf.reshape(output, tf.stack([batch_size, height, width, channels])) mask = tf.reshape(mask, tf.stack([batch_size, height, width, 1])) return output, mask
def __init__( self, train_envs, n_act_dim=envs.N_ACT_DIM, n_obs_dim=envs.N_OBS_DIM, true_dynamics=None, ): """ Args: train_envs: List of environments. Uses known rewards. true_dynamics: If None, use dynamics from first training task. """ n_train_tasks = len(train_envs) demo_obs_t_ph = tf.placeholder(tf.int32, [None]) demo_act_t_ph = tf.placeholder(tf.int32, [None]) demo_task_t_ph = tf.placeholder(tf.int32, [None]) demo_batch_size_ph = tf.placeholder(tf.int32) demo_batch_idxes = tf.reshape( tf.range(0, demo_batch_size_ph, 1), [demo_batch_size_ph, 1]) demo_q_t = tf.stack([ self._build_mlp( self._featurize_obs(demo_obs_t_ph, n_obs_dim), n_act_dim, q_scope+'-'+str(train_task_idx), n_layers=q_n_layers, size=q_layer_size, activation=q_activation, output_activation=q_output_activation, ) for train_task_idx in range(n_train_tasks) ], axis=0) demo_q_t = tf.gather_nd(demo_q_t, tf.concat( [tf.expand_dims(demo_task_t_ph, 1), demo_batch_idxes], axis=1)) demo_act_idxes = tf.concat([demo_batch_idxes, tf.reshape( demo_act_t_ph, [demo_batch_size_ph, 1])], axis=1) demo_act_val_t = tf.gather_nd(demo_q_t, demo_act_idxes) state_val_t = tf.reduce_logsumexp(demo_q_t, axis=1) act_log_likelihoods = demo_act_val_t - state_val_t neg_avg_log_likelihood = -tf.reduce_mean(act_log_likelihoods) obs_for_obs_tp1_probs = tf.cast(tf.floor( tf.range(0, n_obs_dim*n_act_dim, 1) / n_act_dim), dtype=tf.int32) act_for_obs_tp1_probs = tf.floormod(tf.range( 0, n_obs_dim*n_act_dim, 1), n_act_dim) obs_tp1_probs_in = tf.one_hot( obs_for_obs_tp1_probs*n_act_dim+act_for_obs_tp1_probs, n_obs_dim*n_act_dim) obs_tp1_probs = self._build_mlp( obs_tp1_probs_in, n_obs_dim, im_scope, n_layers=n_layers, size=layer_size, activation=activation, output_activation=output_activation ) obs_tp1_probs = tf.reshape( obs_tp1_probs, [n_obs_dim, n_act_dim, n_obs_dim]) q_tp1 = tf.stack([ self._build_mlp( self._featurize_obs(tf.range(0, n_obs_dim, 1), n_obs_dim), n_act_dim, q_scope+'-'+str(train_task_idx), n_layers=q_n_layers, size=q_layer_size, activation=q_activation, output_activation=q_output_activation, reuse=True, ) for train_task_idx in range(n_train_tasks) ], axis=0) v_tp1 = tf.reduce_logsumexp(q_tp1, axis=2) all_rew = tf.convert_to_tensor(np.stack( [env.unwrapped.R for env in train_envs], axis=0), dtype=tf.float32) v_tp1_broad = tf.reshape(v_tp1, [n_train_tasks, 1, 1, n_obs_dim]) obs_tp1_probs_broad = tf.expand_dims(obs_tp1_probs, 0) exp_v_tp1 = tf.reduce_sum(obs_tp1_probs_broad * v_tp1_broad, axis=3) exp_rew_t = tf.reduce_sum(obs_tp1_probs_broad * all_rew, axis=3) target_t = exp_rew_t + gamma * exp_v_tp1 q_t = tf.stack([ self._build_mlp( self._featurize_obs(tf.range(0, n_obs_dim, 1), n_obs_dim), n_act_dim, q_scope+'-'+str(train_task_idx), n_layers=q_n_layers, size=q_layer_size, activation=q_activation, output_activation=q_output_activation, reuse=True, ) for train_task_idx in range(n_train_tasks) ], axis=0) td_err = q_t - target_t sq_td_err = tf.reduce_mean(td_err**2) loss = neg_avg_log_likelihood + sq_td_err_penalty * sq_td_err update_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) self.n_act_dim = n_act_dim self.n_obs_dim = n_obs_dim self.demo_obs_t_ph = demo_obs_t_ph self.demo_act_t_ph = demo_act_t_ph self.demo_task_t_ph = demo_task_t_ph self.demo_batch_size_ph = demo_batch_size_ph self.q_t = q_t self.loss = loss self.neg_avg_log_likelihood = neg_avg_log_likelihood self.sq_td_err = sq_td_err self.update_op = update_op self.obs_tp1_probs = obs_tp1_probs if true_dynamics is None: self.true_dynamics = np.argmax(train_envs[0].unwrapped.T, axis=2) else: self.true_dynamics = true_dynamics
#path, _, _, label = tf.decode_csv(val, record_defaults=record_defaults) readfile = tf.read_file(path) image = tf.image.decode_jpeg(readfile, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.cast(image, dtype=tf.float32) image = tf.image.resize_images(image, (model_size, model_size)) if a.augm is True: h, w, ch = image.get_shape() print(image.get_shape()) # transform params CROP_SIZE = int(h) SCALE_SIZE = int(h + 20) rot90_times = tf.random_uniform([1], 0, 5, dtype=tf.int32)[0] crop_offset = tf.cast(tf.floor( tf.random_uniform([2], 0, SCALE_SIZE - CROP_SIZE + 1, seed=seed)), dtype=tf.int32) def transform(img, rot90_times, crop_offset, scale_size=SCALE_SIZE, crop_size=CROP_SIZE): with tf.name_scope('transform'): r = img # rotation r = tf.image.rot90(r, k=rot90_times) # random crop r = tf.image.resize_images(r, [scale_size, scale_size], method=tf.image.ResizeMethod.AREA) r = tf.image.crop_to_bounding_box(r, crop_offset[0],