def test_forward_ceil(): ishape = (1, 3, 10, 10) inp_array = np.random.uniform(size=ishape).astype(np.float32) with tf.Graph().as_default(): in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype) tf.ceil(in1) compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Ceil:0')
def pad_to_multiple(tensor, multiple): """Returns the tensor zero padded to the specified multiple. Appends 0s to the end of the first and second dimension (height and width) of the tensor until both dimensions are a multiple of the input argument 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input multiple of 4, PadToMultiple will append 0s so that the resulting tensor will be of shape [1, 4, 8, 1]. Args: tensor: rank 4 float32 tensor, where tensor -> [batch_size, height, width, channels]. multiple: the multiple to pad to. Returns: padded_tensor: the tensor zero padded to the specified multiple. """ tensor_shape = tensor.get_shape() batch_size = static_shape.get_batch_size(tensor_shape) tensor_height = static_shape.get_height(tensor_shape) tensor_width = static_shape.get_width(tensor_shape) tensor_depth = static_shape.get_depth(tensor_shape) if batch_size is None: batch_size = tf.shape(tensor)[0] if tensor_height is None: tensor_height = tf.shape(tensor)[1] padded_tensor_height = tf.to_int32( tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple else: padded_tensor_height = int( math.ceil(float(tensor_height) / multiple) * multiple) if tensor_width is None: tensor_width = tf.shape(tensor)[2] padded_tensor_width = tf.to_int32( tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple else: padded_tensor_width = int( math.ceil(float(tensor_width) / multiple) * multiple) if tensor_depth is None: tensor_depth = tf.shape(tensor)[3] # Use tf.concat instead of tf.pad to preserve static shape if padded_tensor_height != tensor_height: height_pad = tf.zeros([ batch_size, padded_tensor_height - tensor_height, tensor_width, tensor_depth ]) tensor = tf.concat([tensor, height_pad], 1) if padded_tensor_width != tensor_width: width_pad = tf.zeros([ batch_size, padded_tensor_height, padded_tensor_width - tensor_width, tensor_depth ]) tensor = tf.concat([tensor, width_pad], 2) return tensor
def _update_lipschitz(self,v,i): config = self.config if len(v.shape) > 1: k = self.config.weight_constraint_k or 100.0000 wi_hat = v if len(v.shape) == 4: #fij = tf.reduce_sum(tf.abs(wi_hat), axis=[0,1]) fij = wi_hat fij = tf.reduce_sum(tf.abs(fij), axis=[1]) fij = tf.reduce_max(fij, axis=[0]) else: fij = wi_hat if self.config.ortho_pnorm == "inf": wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=0), axis=0) else: # conv wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=1), axis=0) ratio = (1.0/tf.maximum(1.0, wp/k)) if self.config.weight_bounce: bounce = tf.minimum(1.0, tf.ceil(wp/k-0.999)) ratio -= tf.maximum(0.0, bounce) * 0.2 if self.config.weight_scaleup: up = tf.minimum(1.0, tf.ceil(0.02-wp/k)) ratio += tf.maximum(0.0, up) * k/wp * 0.2 wi = ratio*(wi_hat) #self.gan.metrics['wi'+str(i)]=wp #self.gan.metrics['wk'+str(i)]=ratio #self.gan.metrics['bouce'+str(i)]=bounce return tf.assign(v, wi) return None
def _anchor_component_tf(self): print('Use TF anchors') with tf.variable_scope('ANCHOR_' + self._tag) as scope: # just to get the shape right height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0]))) self._anchors, self._anchor_length = generate_anchors_pre_tf( height, width, self._feat_stride[0], self._anchor_scales, self._anchor_ratios)
def _anchor_component(self): with tf.variable_scope('ANCHOR_' + self._tag) as scope: # just to get the shape right height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0]))) anchors, anchor_length = tf.py_func(generate_anchors_pre, [height, width, self._feat_stride, self._anchor_scales, self._anchor_ratios], [tf.float32, tf.int32], name="generate_anchors") anchors.set_shape([None, 4]) anchor_length.set_shape([]) self._anchors = anchors self._anchor_length = anchor_length
def sample_img(img, n_samples): sx = tf.random_uniform((n_samples,), 0, 1) * 27 sy = tf.random_uniform((n_samples,), 0, 1) * 27 sx_lower = tf.cast(tf.floor(sx), tf.int32) sx_upper = tf.cast(tf.ceil(sx), tf.int32) sy_lower = tf.cast(tf.floor(sy), tf.int32) sy_upper = tf.cast(tf.ceil(sy), tf.int32) sx_nearest = tf.cast(tf.round(sx), tf.int32) sy_nearest = tf.cast(tf.round(sy), tf.int32) inds = tf.pack([sx_nearest, sy_nearest]) samples = tf.gather(tf.reshape(img, (-1,)), sx_nearest + sy_nearest*28) return sx/27, sy/27, samples
def _survival_function(self, y): low = self._low high = self._high # Recall the promise: # survival_function(y) := P[Y > y] # = 0, if y >= high, # = 1, if y < low, # = P[X > y], otherwise. # P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in # between. j = tf.ceil(y) # P[X > j], used when low < X < high. result_so_far = self.distribution.survival_function(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += tf.zeros_like(result_so_far) # Re-define values at the cutoffs. if low is not None: result_so_far = tf.where(j < low, tf.ones_like(result_so_far), result_so_far) if high is not None: result_so_far = tf.where(j >= high, tf.zeros_like(result_so_far), result_so_far) return result_so_far
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): shape2d = tf.shape(image)[2:] mult = float(cfg.FPN.RESOLUTION_REQUIREMENT) new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) pad_shape2d = new_shape2d - shape2d assert len(num_blocks) == 4, num_blocks with resnet_argscope(): chan = image.shape[1] pad_base = maybe_reverse_pad(2, 3) l = tf.pad(image, tf.stack( [[0, 0], [0, 0], [pad_base[0], pad_base[1] + pad_shape2d[0]], [pad_base[0], pad_base[1] + pad_shape2d[1]]])) l.set_shape([None, chan, None, None]) l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) if freeze_c2: c2 = tf.stop_gradient(c2) c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2) # 32x downsampling up to now # size of c5: ceil(input/32) return c2, c3, c4, c5
def crop_or_pad(waves, length, channels): """Crop or pad wave to have shape [N, length, channels]. Args: waves: A 3D `Tensor` of NLC format. length: A Python scalar. The output wave size. channels: Number of output waves channels. Returns: A 3D `Tensor` of NLC format with shape [N, length, channels]. """ waves = tf.convert_to_tensor(waves) batch_size = waves.shape[0].value waves_shape = tf.shape(waves) # Force audio length. pad = tf.maximum(0, length - waves_shape[1]) right_pad = tf.to_int32(tf.to_float(pad) / 2.0) left_pad = pad - right_pad waves = tf.pad(waves, [[0, 0], [left_pad, right_pad], [0, 0]]) waves = waves[:, :length, :] # Force number of channels. num_repeats = tf.to_int32( tf.ceil(tf.to_float(channels) / tf.to_float(waves_shape[2]))) waves = tf.tile(waves, [1, 1, num_repeats])[:, :, :channels] waves.set_shape([batch_size, length, channels]) return waves
def non_zero_tokens(tokens): """Receives a vector of tokens (float) which are zero-padded. Returns a vector of the same size, which has the value 1.0 in positions with actual tokens and 0.0 in positions with zero-padding. :param tokens: :return: """ return tf.ceil(tokens / tf.reduce_max(tokens, [1], keep_dims=True))
def reshape_seqs(x, avg_window_size=3, **kwargs): B = tf.shape(x)[0] L = tf.cast(tf.shape(x)[1], tf.float32) D = x.get_shape().as_list()[-1] b = tf.transpose(x, [0, 2, 1]) extra_pads = tf.cast(tf.ceil(L / avg_window_size) * avg_window_size - L, tf.int32) c = tf.pad(b, tf.concat([tf.zeros([2, 2], dtype=tf.int32), [[0, extra_pads]]], axis=0)) return tf.reshape(c, [B, D, avg_window_size, -1])
def imageWarpIm(imageBatch,pMtrxBatch,opt,name=None): with tf.name_scope("ImWarp"): imageBatch = tf.expand_dims(imageBatch,-1) batchSize = tf.shape(imageBatch)[0] imageH,imageW = opt.H,opt.H H,W = opt.H,opt.W warpGTmtrxBatch = tf.tile(tf.expand_dims(opt.warpGTmtrx,0),[batchSize,1,1]) transMtrxBatch = tf.matmul(warpGTmtrxBatch,pMtrxBatch) # warp the canonical coordinates X,Y = np.meshgrid(np.linspace(-1,1,W),np.linspace(-1,1,H)) XYhom = tf.transpose(tf.stack([X.reshape([-1]),Y.reshape([-1]),np.ones([X.size])],axis=1)) XYhomBatch = tf.tile(tf.expand_dims(XYhom,0),[batchSize,1,1]) XYwarpHomBatch = tf.matmul(transMtrxBatch,tf.to_float(XYhomBatch)) XwarpHom,YwarpHom,ZwarpHom = tf.split(XYwarpHomBatch,3,1) Xwarp = tf.reshape(XwarpHom/ZwarpHom,[batchSize,H,W]) Ywarp = tf.reshape(YwarpHom/ZwarpHom,[batchSize,H,W]) # get the integer sampling coordinates Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp) Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp) XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil) YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil) imageIdx = tf.tile(tf.reshape(tf.range(batchSize),[batchSize,1,1]),[1,H,W]) imageVec = tf.reshape(imageBatch,[-1,tf.shape(imageBatch)[3]]) imageVecOutside = tf.concat([imageVec,tf.zeros([1,tf.shape(imageBatch)[3]])],0) idxUL = (imageIdx*imageH+YfloorInt)*imageW+XfloorInt idxUR = (imageIdx*imageH+YfloorInt)*imageW+XceilInt idxBL = (imageIdx*imageH+YceilInt)*imageW+XfloorInt idxBR = (imageIdx*imageH+YceilInt)*imageW+XceilInt idxOutside = tf.fill([batchSize,H,W],batchSize*imageH*imageW) def insideIm(Xint,Yint): return (Xint>=0)&(Xint<imageW)&(Yint>=0)&(Yint<imageH) idxUL = tf.where(insideIm(XfloorInt,YfloorInt),idxUL,idxOutside) idxUR = tf.where(insideIm(XceilInt,YfloorInt),idxUR,idxOutside) idxBL = tf.where(insideIm(XfloorInt,YceilInt),idxBL,idxOutside) idxBR = tf.where(insideIm(XceilInt,YceilInt),idxBR,idxOutside) # bilinear interpolation Xratio = tf.reshape(Xwarp-Xfloor,[batchSize,H,W,1]) Yratio = tf.reshape(Ywarp-Yfloor,[batchSize,H,W,1]) ImUL = tf.to_float(tf.gather(imageVecOutside,idxUL))*(1-Xratio)*(1-Yratio) ImUR = tf.to_float(tf.gather(imageVecOutside,idxUR))*(Xratio)*(1-Yratio) ImBL = tf.to_float(tf.gather(imageVecOutside,idxBL))*(1-Xratio)*(Yratio) ImBR = tf.to_float(tf.gather(imageVecOutside,idxBR))*(Xratio)*(Yratio) ImWarpBatch = ImUL+ImUR+ImBL+ImBR ImWarpBatch = tf.identity(ImWarpBatch,name=name) return ImWarpBatch
def cnn(model, config, scope, connect = None): with tf.variable_scope(scope), tf.name_scope(scope): with tf.variable_scope('inputs'), tf.name_scope('inputs'): sizes = {size: config.getint(scope, '%s_size' %size) for size in ['clength', 'cstep', 'plength', 'pstep']} if connect is None: model['%s_in0length' %scope] = config.getint('global', 'batch_size') model['%s_in1length' %scope] = config.getint('global', 'input_size') model['%s_in2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_in2length' %scope) model['%s_maxin2length' %scope] = config.getint('global', 'time_size') model['%s_inputs' %scope] = tf.placeholder(tf.float32, [model['%s_maxin2length' %scope], model['%s_in0length' %scope], model['%s_in1length' %scope]], '%s_inputs' %scope) else: model['%s_in0length' %scope] = model['%s_out0length' %connect] model['%s_in1length' %scope] = model['%s_out1length' %connect] model['%s_in2length' %scope] = model['%s_out2length' %connect] model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect] model['%s_inputs' %scope] = model['%s_outputs' %connect] model['%s_transform' %scope] = tf.transpose(tf.reshape(model['%s_inputs' %scope], [model['%s_maxin2length' %scope], model['%s_in0length' %scope], model['%s_in1length' %scope], 1]), [1, 0, 2, 3], '%s_transform' %scope) model['%s_out0length' %scope] = model['%s_in0length' %scope] model['%s_out1length' %scope] = model['%s_in1length' %scope] model['%s_out2length' %scope] = model['%s_in2length' %scope] model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope] for _ in xrange(config.getint(scope, 'layer_size')): if _ == 0: model['%s_transform%i' %(scope, _)] = model['%s_transform' %scope] else: model['%s_transform%i' %(scope, _)] = model['%s_pooling%i' %(scope, _ - 1)] with tf.variable_scope('filter%i' %_), tf.name_scope('filter%s' %_): model['%s_filter%i' %(scope, _)] = tf.Variable(tf.truncated_normal([sizes['clength'], sizes['clength'], 1, 1])) model['%s_stride%i' %(scope, _)] = [1, sizes['cstep'], sizes['cstep'], 1] with tf.variable_scope('convolution%i' %_), tf.name_scope('convolution%i' %_): model['%s_convolution%i' %(scope, _)] = tf.nn.conv2d(model['%s_transform%i' %(scope, _)], model['%s_filter%i' %(scope, _)], model['%s_stride%i' %(scope, _)], 'VALID') model['%s_out1length' %scope] = int(math.ceil(float(model['%s_out1length' %scope] - sizes['clength'] + 1) / float(sizes['cstep']))) model['%s_out2length' %scope] = tf.to_int32(tf.ceil(tf.div(tf.to_float(tf.subtract(model['%s_out2length' %scope], sizes['clength'] - 1)), tf.to_float(sizes['cstep'])))) model['%s_maxout2length' %scope] = int(math.ceil(float(model['%s_maxout2length' %scope] - sizes['clength'] + 1) / float(sizes['cstep']))) model['%s_pooling%i' %(scope, _)] = getattr(tf.nn, '%s_pool' %config.get(scope, 'pool'))(model['%s_convolution%i' %(scope, _)], [1, sizes['plength'], sizes['plength'], 1], [1, sizes['pstep'], sizes['pstep'], 1], 'VALID') model['%s_out1length' %scope] = int(math.ceil(float(model['%s_out1length' %scope] - sizes['plength'] + 1) / float(sizes['pstep']))) model['%s_out2length' %scope] = tf.to_int32(tf.ceil(tf.div(tf.to_float(tf.subtract(model['%s_out2length' %scope], sizes['plength'] - 1)), tf.to_float(sizes['pstep'])))) model['%s_maxout2length' %scope] = int(math.ceil(float(model['%s_maxout2length' %scope] - sizes['plength'] + 1) / float(sizes['pstep']))) with tf.variable_scope('outputs'), tf.name_scope('outputs'): model['%s_outputs' %scope] = tf.transpose(tf.squeeze(model['%s_pooling%i' %(scope, _)], [3], '%s_outputs' %scope), [1, 0, 2]) return model
def _compare(self, x, use_gpu): np_floor, np_ceil = np.floor(x), np.ceil(x) with self.test_session(use_gpu=use_gpu) as sess: inx = tf.convert_to_tensor(x) ofloor, oceil = tf.floor(inx), tf.ceil(inx) tf_floor, tf_ceil = sess.run([ofloor, oceil]) self.assertAllEqual(np_floor, tf_floor) self.assertAllEqual(np_ceil, tf_ceil) self.assertShapeEqual(np_floor, ofloor) self.assertShapeEqual(np_ceil, oceil)
def integral(lower, upper): val = tf.cond( tf.logical_or( tf.is_inf(tf.ceil(tf.cast(lower, config.dtype))), tf.is_inf(tf.floor(tf.cast(upper, config.dtype))) ), lambda: tf.constant(1, dtype=config.dtype), lambda: tf.cast(upper, config.dtype) - tf.cast(lower, config.dtype), ) return val
def clampSlice(self, shouldCeil, transformedCoordinates, index): coordinateSlice = tf.slice(transformedCoordinates, [0, index], [tf.shape(transformedCoordinates)[0], 1]) if not shouldCeil: result = tf.floor(coordinateSlice) else: result = tf.ceil(coordinateSlice) return result
def interp(w, i, channel_dim): ''' Input: w: A 4D block tensor of shape (n, h, w, c) i: A list of 3-tuples [(x_1, y_1, z_1), (x_2, y_2, z_2), ...], each having type (int, float, float) The 4D block represents a batch of 3D image feature volumes with c channels. The input i is a list of points to index into w via interpolation. Direct indexing is not possible due to y_1 and z_1 being float values. Output: A list of the values: [ w[x_1, y_1, z_1, :] w[x_2, y_2, z_2, :] ... w[x_k, y_k, z_k, :] ] of the same length == len(i) ''' w_as_vector = tf.reshape(w, [-1, channel_dim]) # gather expects w to be 1-d upper_l = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.floor(i[:, 1:2]), tf.floor(i[:, 2:3])])) upper_r = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.floor(i[:, 1:2]), tf.ceil(i[:, 2:3])])) lower_l = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.ceil(i[:, 1:2]), tf.floor(i[:, 2:3])])) lower_r = tf.to_int32(tf.concat(1, [i[:, 0:1], tf.ceil(i[:, 1:2]), tf.ceil(i[:, 2:3])])) upper_l_idx = to_idx(upper_l, tf.shape(w)) upper_r_idx = to_idx(upper_r, tf.shape(w)) lower_l_idx = to_idx(lower_l, tf.shape(w)) lower_r_idx = to_idx(lower_r, tf.shape(w)) upper_l_value = tf.gather(w_as_vector, upper_l_idx) upper_r_value = tf.gather(w_as_vector, upper_r_idx) lower_l_value = tf.gather(w_as_vector, lower_l_idx) lower_r_value = tf.gather(w_as_vector, lower_r_idx) alpha_lr = tf.expand_dims(i[:, 2] - tf.floor(i[:, 2]), 1) alpha_ud = tf.expand_dims(i[:, 1] - tf.floor(i[:, 1]), 1) upper_value = (1 - alpha_lr) * upper_l_value + (alpha_lr) * upper_r_value lower_value = (1 - alpha_lr) * lower_l_value + (alpha_lr) * lower_r_value value = (1 - alpha_ud) * upper_value + (alpha_ud) * lower_value return value
def slice_feature_and_anchors(self, image_shape2d, p23456, anchors): for i, stride in enumerate(cfg.FPN.ANCHOR_STRIDES): with tf.name_scope('FPN_slice_lvl{}'.format(i)): if i < 3: # Images are padded for p5, which are too large for p2-p4. # This seems to have no effect on mAP. pi = p23456[i] target_shape = tf.to_int32(tf.ceil(tf.to_float(image_shape2d) * (1.0 / stride))) p23456[i] = tf.slice(pi, [0, 0, 0, 0], tf.concat([[-1, -1], target_shape], axis=0)) p23456[i].set_shape([1, pi.shape[1], None, None]) anchors[i] = anchors[i].narrow_to(p23456[i])
def bernoulli_sample(x): """ Uses a tensor whose values are in [0,1] to sample a tensor with values in {0, 1}, using the straight through estimator for the gradient. E.g., if x is 0.6, bernoulliSample(x) will be 1 with probability 0.6, and 0 otherwise, and the gradient will be pass-through (identity). """ g = tf.get_default_graph() with ops.name_scope("BernoulliSample") as name: with g.gradient_override_map({"Ceil": "Identity", "Sub": "BernoulliSample_ST"}): return tf.ceil(x - tf.random_uniform(tf.shape(x)), name=name)
def testProbAndGradGivesFiniteResultsForCommonEvents(self): with self.test_session(): mu = tf.Variable(0.0, name="mu") sigma = tf.Variable(1.0, name="sigma") qdist = distributions.QuantizedDistribution(distribution=distributions.Normal(mu=mu, sigma=sigma)) x = tf.ceil(4 * rng.rand(100).astype(np.float32) - 2) tf.global_variables_initializer().run() proba = qdist.prob(x) self._assert_all_finite(proba.eval()) grads = tf.gradients(proba, [mu, sigma]) self._assert_all_finite(grads[0].eval()) self._assert_all_finite(grads[1].eval())
def _nearest_neighbor_features_per_object_in_chunks( reference_embeddings_flat, query_embeddings_flat, reference_labels_flat, ref_obj_ids, k_nearest_neighbors, n_chunks): """Calculates the nearest neighbor features per object in chunks to save mem. Uses chunking to bound the memory use. Args: reference_embeddings_flat: Tensor of shape [n, embedding_dim], the embedding vectors for the reference frame. query_embeddings_flat: Tensor of shape [m, embedding_dim], the embedding vectors for the query frames. reference_labels_flat: Tensor of shape [n], the class labels of the reference frame. ref_obj_ids: int tensor of unique object ids in the reference labels. k_nearest_neighbors: Integer, the number of nearest neighbors to use. n_chunks: Integer, the number of chunks to use to save memory (set to 1 for no chunking). Returns: nn_features: A float32 tensor of nearest neighbor features of shape [m, n_objects, feature_dim]. """ chunk_size = tf.cast(tf.ceil(tf.cast(tf.shape(query_embeddings_flat)[0], tf.float32) / n_chunks), tf.int32) wrong_label_mask = tf.not_equal(reference_labels_flat, ref_obj_ids[:, tf.newaxis]) all_features = [] for n in range(n_chunks): if n_chunks == 1: query_embeddings_flat_chunk = query_embeddings_flat else: chunk_start = n * chunk_size chunk_end = (n + 1) * chunk_size query_embeddings_flat_chunk = query_embeddings_flat[chunk_start:chunk_end] # Use control dependencies to make sure that the chunks are not processed # in parallel which would prevent any peak memory savings. with tf.control_dependencies(all_features): features = _nn_features_per_object_for_chunk( reference_embeddings_flat, query_embeddings_flat_chunk, wrong_label_mask, k_nearest_neighbors ) all_features.append(features) if n_chunks == 1: nn_features = all_features[0] else: nn_features = tf.concat(all_features, axis=0) return nn_features
def __init__(self, pool_size=1, **kwargs): """ :param pool_size int: size of the pool to take median of (is also used as stride size) """ super(BatchMedianPoolingLayer, self).__init__(**kwargs) input_placeholder = self.input_data.get_placeholder_as_batch_major() # get median over pooled batches # - reshape input for usage with tf.nn.top_k reshaped_input = tf.reshape(tf.transpose(input_placeholder, [1, 2, 0]), shape=(tf.shape(input_placeholder)[1], tf.shape(input_placeholder)[2], tf.shape(input_placeholder)[0] / pool_size, pool_size)) # - get median of each pool median = tf.nn.top_k(reshaped_input, k=tf.cast(tf.ceil(tf.constant(pool_size, dtype=tf.float32) / 2), dtype=tf.int32)).values[:, :, :, -1] median_batch_major = tf.transpose(median, [2, 0, 1]) self.output.placeholder = median_batch_major self.output.size_placeholder = {self.output.time_dim_axis_excluding_batch: tf.strided_slice(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch], [0], tf.shape(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch]), [pool_size])}
def test_prob_and_grad_gives_finite_results_for_common_events(self): with self.test_session(): mu = tf.Variable(0.0, name="mu") sigma = tf.Variable(1.0, name="sigma") qdist = distributions.QuantizedDistribution( base_dist_cls=distributions.Normal, mu=mu, sigma=sigma) x = tf.ceil(4 * self._rng.rand(100).astype(np.float32) - 2) tf.initialize_all_variables().run() proba = qdist.prob(x) self._assert_all_finite(proba.eval()) grads = tf.gradients(proba, [mu, sigma]) self._assert_all_finite(grads[0].eval()) self._assert_all_finite(grads[1].eval())
def _sample_n(self, n, seed=None): low = self._low high = self._high with tf.name_scope("transform"): n = tf.convert_to_tensor(n, name="n") x_samps = self.distribution.sample(n, seed=seed) ones = tf.ones_like(x_samps) # Snap values to the intervals (j - 1, j]. result_so_far = tf.ceil(x_samps) if low is not None: result_so_far = tf.where(result_so_far < low, low * ones, result_so_far) if high is not None: result_so_far = tf.where(result_so_far > high, high * ones, result_so_far) return result_so_far
def encode(self, inputs, sequence_length=None, mode=tf.estimator.ModeKeys.TRAIN): encoder_state = [] for layer_index, layer in enumerate(self.layers): input_depth = inputs.get_shape().as_list()[-1] if layer_index == 0: # For the first input, make the number of timesteps a multiple of the # total reduction factor. total_reduction_factor = pow(self.reduction_factor, len(self.layers) - 1) current_length = tf.shape(inputs)[1] factor = tf.divide(tf.cast(current_length, tf.float32), total_reduction_factor) new_length = tf.cast(tf.ceil(factor), tf.int32) * total_reduction_factor padding = new_length - current_length inputs = tf.pad( inputs, [[0, 0], [0, padding], [0, 0]]) inputs.set_shape((None, None, input_depth)) else: # In other cases, reduce the time dimension. inputs = tf.reshape( inputs, [tf.shape(inputs)[0], -1, input_depth * self.reduction_factor]) if sequence_length is not None: sequence_length = tf.div(sequence_length, self.reduction_factor) with tf.variable_scope("layer_{}".format(layer_index)): outputs, state, sequence_length = layer.encode( inputs, sequence_length=sequence_length, mode=mode) encoder_state.append(state) inputs = outputs return ( outputs, self.state_reducer.reduce(encoder_state), sequence_length)
def sample_patch(image, patch_height, patch_width, colors): """Crops image to the desired aspect ratio shape and resizes it. If the image has shape H x W, crops a square in the center of shape min(H,W) x min(H,W). Args: image: A 3D `Tensor` of HWC format. patch_height: A Python integer. The output images height. patch_width: A Python integer. The output images width. colors: Number of output image channels. Defaults to 3. Returns: A 3D `Tensor` of HWC format with shape [patch_height, patch_width, colors]. """ image_shape = tf.shape(image) h, w = image_shape[0], image_shape[1] h_major_target_h = h h_major_target_w = tf.maximum(1, tf.to_int32( (h * patch_width) / patch_height)) w_major_target_h = tf.maximum(1, tf.to_int32( (w * patch_height) / patch_width)) w_major_target_w = w target_hw = tf.cond( h_major_target_w <= w, lambda: tf.convert_to_tensor([h_major_target_h, h_major_target_w]), lambda: tf.convert_to_tensor([w_major_target_h, w_major_target_w])) # Cut a patch of shape (target_h, target_w). image = tf.image.resize_image_with_crop_or_pad(image, target_hw[0], target_hw[1]) # Resize the cropped image to (patch_h, patch_w). image = tf.image.resize_images([image], [patch_height, patch_width])[0] # Force number of channels: repeat the channel dimension enough # number of times and then slice the first `colors` channels. num_repeats = tf.to_int32(tf.ceil(colors / image_shape[2])) image = tf.tile(image, [1, 1, num_repeats]) image = tf.slice(image, [0, 0, 0], [-1, -1, colors]) image.set_shape([patch_height, patch_width, colors]) return image
def binary_stochastic_REINFORCE(x, loss_op_name="loss_by_example"): """ Sigmoid followed by a random sample from a bernoulli distribution according to the result (binary stochastic neuron). Uses the REINFORCE estimator. See https://arxiv.org/abs/1308.3432. NOTE: Requires a loss operation with name matching the argument for loss_op_name in the graph. This loss operation should be broken out by example (i.e., not a single number for the entire batch). """ g = tf.get_default_graph() with ops.name_scope("BinaryStochasticREINFORCE"): with g.gradient_override_map({"Sigmoid": "BinaryStochastic_REINFORCE", "Ceil": "Identity"}): p = tf.sigmoid(x) reinforce_collection = g.get_collection("REINFORCE") if not reinforce_collection: g.add_to_collection("REINFORCE", {}) reinforce_collection = g.get_collection("REINFORCE") reinforce_collection[0][p.op.name] = loss_op_name return tf.ceil(p - tf.random_uniform(tf.shape(x)))
def _resample_inv_dst_weighting(self, inputs, sample_coords): in_size = inputs.shape.as_list() in_spatial_size = in_size[1:-1] in_spatial_rank = infer_spatial_rank(inputs) out_rank = len(sample_coords.shape.as_list()) self.N = 2 ** in_spatial_rank binary_neighbour_ids = [ [int(c) for c in format(i, '0%ib' % in_spatial_rank)] for i in range(self.N)] weight_id = [[[c, i] for i, c in enumerate(bc)] for bc in binary_neighbour_ids] sample_coords = tf.transpose( sample_coords, [out_rank - 1, 0] + list(range(1, out_rank - 1))) # broadcasting input spatial size for boundary functions b_size = tf.reshape(in_spatial_size, [len(in_spatial_size)] + [1] * (out_rank - 1)) # find floor and ceil coordinates all_coords_f = tf.stack([ self.boundary_func(tf.floor(sample_coords), b_size), self.boundary_func(tf.ceil(sample_coords), b_size)]) # find N weights associated to each output point diff = tf.stack( [tf.squared_difference(sample_coords - EPS, all_coords_f[0]), tf.squared_difference(sample_coords + EPS, all_coords_f[1])]) # gather_nd for both matrices, the same as: # point_weights = tf.gather_nd(diff, weight_id) # knots_id = tf.gather_nd(all_coords_f, weight_id) n_val = tf.gather_nd(tf.stack([diff, all_coords_f], axis=-1), weight_id) n_val = tf.unstack(n_val, axis=-1) point_weights, knots_id = n_val[0], n_val[1] # inverse distance weighting # sum_i (w_i*p_i/(sum_j w_j)) w_i = 1/((p-p_i)^2) # point_weights shape: # `[N, input_rank, b, sp_dim_0, ..., sp_dim_K]` # where: # `N` is 2**source data spatial rank # `b` is batch size, # `sp_dim_0` is the output spatial output 0, # # `point_weights` represents (p - p_i)^2 # with i= 0...2**source_rank neighbours # (to do: these operations could be refactored as a resampling kernel) point_weights = tf.reduce_sum(point_weights, axis=1) # skip this as power = 2.0: # self.power = 1.0 # point_weights = tf.pow(point_weights, self.power / 2.0) point_weights = tf.reciprocal(point_weights) point_weights = point_weights / tf.reduce_sum(point_weights, axis=0) # find N neighbours associated to each output point knots_id = tf.transpose(tf.cast(knots_id, COORDINATES_TYPE), [0] + list(range(2, out_rank + 1)) + [1]) # get values of N neighbours samples = [ tf.gather_nd(img, knots) for (img, knots) in zip(tf.unstack(inputs, axis=0), tf.unstack(knots_id, axis=1))] samples = tf.stack(samples, axis=1) # weighted average over N neighbours return tf.reduce_sum( samples * tf.expand_dims(point_weights, axis=-1), axis=0)
def resize_to_range(image, label=None, min_size=None, max_size=None, factor=None, align_corners=True, label_layout_is_chw=False, scope=None, method=tf.image.ResizeMethod.BILINEAR): """Resizes image or label so their sides are within the provided range. The output size can be described by two cases: 1. If the image can be rescaled so its minimum size is equal to min_size without the other side exceeding max_size, then do so. 2. Otherwise, resize so the largest side is equal to max_size. An integer in `range(factor)` is added to the computed sides so that the final dimensions are multiples of `factor` plus one. Args: image: A 3D tensor of shape [height, width, channels]. label: (optional) A 3D tensor of shape [height, width, channels] (default) or [channels, height, width] when label_layout_is_chw = True. min_size: (scalar) desired size of the smaller image side. max_size: (scalar) maximum allowed size of the larger image side. Note that the output dimension is no larger than max_size and may be slightly smaller than min_size when factor is not None. factor: Make output size multiple of factor plus one. align_corners: If True, exactly align all 4 corners of input and output. label_layout_is_chw: If true, the label has shape [channel, height, width]. We support this case because for some instance segmentation dataset, the instance segmentation is saved as [num_instances, height, width]. scope: Optional name scope. method: Image resize method. Defaults to tf.image.ResizeMethod.BILINEAR. Returns: A 3-D tensor of shape [new_height, new_width, channels], where the image has been resized (with the specified method) so that min(new_height, new_width) == ceil(min_size) or max(new_height, new_width) == ceil(max_size). Raises: ValueError: If the image is not a 3D tensor. """ with tf.name_scope(scope, 'resize_to_range', [image]): new_tensor_list = [] min_size = tf.cast(min_size, tf.float32) if max_size is not None: max_size = tf.cast(max_size, tf.float32) # Modify the max_size to be a multiple of factor plus 1 and make sure the # max dimension after resizing is no larger than max_size. if factor is not None: max_size = (max_size + (factor - (max_size - 1) % factor) % factor - factor) [orig_height, orig_width, _] = resolve_shape(image, rank=3) orig_height = tf.cast(orig_height, tf.float32) orig_width = tf.cast(orig_width, tf.float32) orig_min_size = tf.minimum(orig_height, orig_width) # Calculate the larger of the possible sizes large_scale_factor = min_size / orig_min_size large_height = tf.to_int32(tf.ceil(orig_height * large_scale_factor)) large_width = tf.to_int32(tf.ceil(orig_width * large_scale_factor)) large_size = tf.stack([large_height, large_width]) new_size = large_size if max_size is not None: # Calculate the smaller of the possible sizes, use that if the larger # is too big. orig_max_size = tf.maximum(orig_height, orig_width) small_scale_factor = max_size / orig_max_size small_height = tf.to_int32(tf.ceil(orig_height * small_scale_factor)) small_width = tf.to_int32(tf.ceil(orig_width * small_scale_factor)) small_size = tf.stack([small_height, small_width]) new_size = tf.cond( tf.cast(tf.reduce_max(large_size), tf.float32) > max_size, lambda: small_size, lambda: large_size) # Ensure that both output sides are multiples of factor plus one. if factor is not None: new_size += (factor - (new_size - 1) % factor) % factor new_tensor_list.append(tf.image.resize_images( image, new_size, method=method, align_corners=align_corners)) if label is not None: if label_layout_is_chw: # Input label has shape [channel, height, width]. resized_label = tf.expand_dims(label, 3) resized_label = tf.image.resize_nearest_neighbor( resized_label, new_size, align_corners=align_corners) resized_label = tf.squeeze(resized_label, 3) else: # Input label has shape [height, width, channel]. resized_label = tf.image.resize_images( label, new_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=align_corners) new_tensor_list.append(resized_label) else: new_tensor_list.append(None) return new_tensor_list
def depth_compress(l_args): """Compresses an image.""" # Load input image and add batch dimension. x_rgbd = load_image_rgbd(l_args.input) x_rgbd = tf.expand_dims(x_rgbd, 0) x_rgbd.set_shape([1, None, None, 4]) # ======================== Input image dim should be multiple of 16 x_shape = tf.shape(x_rgbd) x_shape = tf.ceil(x_shape / 16) * 16 x_rgbd = tf.image.resize_images(x_rgbd, (x_shape[1], x_shape[2])) # ======================== # Transform and compress the image, then remove batch dimension. x, depth = tf.split(x_rgbd, [3, 1], 3) y = depth_analysis_transform_3(x, depth, l_args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() string = entropy_bottleneck.compress(y) string = tf.squeeze(string, axis=0) # Transform the quantized image back (if requested). y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat, l_args.num_filters) num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) # Total number of bits divided by number of pixels. eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint( checkpoint_dir=l_args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) string, x_shape, y_shape = sess.run([string, tf.shape(x), tf.shape(y)]) # Write a binary file with the shape information and the compressed string. with open(l_args.output, "wb") as f: f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes()) f.write(string) # If requested, transform the quantized image back and measure performance. eval_bpp, mse, psnr, msssim, num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) # The actual bits per pixel including overhead. bpp = (8 + len(string)) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(eval_bpp)) print("Actual bits per pixel: {:0.4f}".format(bpp)) msssim_db = (-10 * np.log10(1 - msssim)) return mse, psnr, msssim, msssim_db, eval_bpp, bpp
def body1(self, num, objectNum, loss, predict, labels, nilboy): ''' Calculate loss. Args: num: spedify which image is to be processed objectNum: #objects in an image loss: [class loss, object loss, no object loss, coord loss] predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels: [max_objects, 5] (x_center, y_center, w, h, class) --- > class and coord --- > x_center is the x value of resized image. the same to y_center nilboy: has/no objects ''' #Get label form labels by the varibale num label = labels[num] label = tf.reshape(label, [-1]) minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize) maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize) minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize) maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize) #Determine which cell is the object belongs to. minX = tf.floor(minX) minY = tf.floor(minY) maxX = tf.ceil(maxX) maxY = tf.ceil(maxY) #temp: if a cell contains an object, temp = 1, else 0 temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) #temp: if a cell doesn't contains an object, temp = 0 #Which means pad it to S*S scale. temp = tf.cast(tf.stack( [minY, self.cellSize - maxY, minX, self.cellSize - maxX]), dtype=tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, 'CONSTANT') #Calculate which cell contains the center point of the object. centerX = label[0] / (self.imageSize / self.cellSize) centerX = tf.floor(centerX) centerY = label[1] / (self.imageSize / self.cellSize) centerY = tf.floor(centerY) response = tf.ones([1, 1], tf.float32) # pad to S*S scale. temp = tf.cast(tf.stack([ centerY, self.cellSize - centerY - 1, centerX, self.cellSize - centerX - 1 ]), dtype=tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, 'CONSTANT') #predictBoxes: predicted boxes predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:] # 7 * 7 * 2 * 4 predictBoxes = tf.reshape( predictBoxes, [self.cellSize, self.cellSize, self.boxesPerCell, 4]) # get real size form 0-1 predicted size predictBoxes = predictBoxes * [ self.imageSize / self.cellSize, self.imageSize / self.cellSize, self.imageSize, self.imageSize ] #grid cell coord baseBoxes = np.zeros([self.cellSize, self.cellSize, 4]) for y in range(self.cellSize): for x in range(self.cellSize): baseBoxes[y, x, :] = [ self.imageSize / self.cellSize * x, self.imageSize / self.cellSize * y, 0, 0 ] #Make the shape of baseBoxes is the same with predictedBoxes. baseBoxes = np.tile( np.resize(baseBoxes, [self.cellSize, self.cellSize, 1, 4]), [1, 1, self.boxesPerCell, 1]) # predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image. predictBoxes = baseBoxes + predictBoxes #iou for each cell 7 * 7 * 1 iouPredictTruth = self.iou(predictBoxes, label[0:4]) # filter out the cells that don't have objects C = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1]) # I = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1]) #get the maximum iou for each cell's boxes maxI = tf.reduce_max(I, 2, keepdims=True) # the max iou for the cell contains the center point I = tf.cast((I >= maxI), tf.float32) * tf.reshape( response, (self.cellSize, self.cellSize, 1)) #noI: [cell size, cell size, boxes per cell] noI = tf.ones_like(I, dtype=tf.float32) - I # B confidences pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell] #real x center, y center x = label[0] y = label[1] sqrtW = tf.sqrt(tf.abs(label[2])) sqrtH = tf.sqrt(tf.abs(label[3])) # real predicted x center and y center pX = predictBoxes[:, :, :, 0] pY = predictBoxes[:, :, :, 1] #square root of predicted boxes' width and height pSqrtW = tf.sqrt( tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 2]))) pSqrtH = tf.sqrt( tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 3]))) # one hot encoding P = tf.one_hot(tf.cast(label[4], tf.int32), self.numClasses, dtype=tf.float32) #predict classes pP = predict[:, :, 0:self.numClasses] #classLoss: only cells containing objects classLoss = tf.nn.l2_loss( tf.reshape(objects, (self.cellSize, self.cellSize, 1)) * (pP - P)) * self.classScale #objectLoss: object center location loss objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale coordLoss = ( tf.nn.l2_loss(I * (pX - x) / (self.imageSize / self.cellSize)) + tf.nn.l2_loss(I * (pY - y) / (self.imageSize / self.cellSize)) + tf.nn.l2_loss(I * (pSqrtW - sqrtW)) / self.imageSize + tf.nn.l2_loss(I * (pSqrtH - sqrtH)) / self.imageSize) + self.coordScale nilboy = I return num + 1, objectNum, [ loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss, loss[3] + coordLoss ], predict, labels, nilboy
def calc_loss__slda__tensorflow_graph( param_vec=None, dim_P=None, dataset=None, convex_alpha_minus_1=None, tau=1.1, delta=0.1, lambda_w=0.001, weight_x=1.0, weight_y=1.0, weight_pi=1.0, return_dict=False, rescale_total_loss_by_n_tokens=True, frac_train_laps_completed=1.0, pi_max_iters_first_train_lap=DefaultDocTopicOptKwargs['pi_max_iters'], pi_max_iters=DefaultDocTopicOptKwargs['pi_max_iters'], active_proba_thr=0.005, **unused_kwargs): ''' Compute log probability of bow dataset under topic model. Returns ------- log_proba : avg. log probability of dataset under provided LDA model. Scaled by number of docs in the dataset. ''' # Unpack dataset doc_indptr_Dp1 = dataset['doc_indptr_Dp1'] word_id_U = dataset['word_id_U'] word_ct_U = dataset['word_ct_U'] n_docs = dataset['n_docs'] y_DC = dataset['y_DC'] y_rowmask = dataset['y_rowmask'] ## Unpack params assert param_vec is not None param_dict = _unflatten_to_common_param_dict__tf_graph(param_vec, **dim_P) topics_KV = param_dict['topics_KV'] w_CK = param_dict['w_CK'] K, _ = topics_KV.get_shape().as_list() C, _ = w_CK.get_shape().as_list() ## Establish kwargs for pi optimization step # Use 'ramp up' strategy to gradually increase per-doc iteration costs. # At first, perform only pi_max_iters_first_train_lap. # Linearly increase until reaching pi_max_iters, # which is designed to happen 50% of way through training. # # frac_progress : float within (0.0, 1.0) # 0.0 when frac_lap == 0 # 0.5 when frac_lap == 0.25 # 1.0 when frac_lap >= 0.5 # cur_pi_max_iters : int # Number of pi iters to run now assert pi_max_iters_first_train_lap <= pi_max_iters frac_progress = tf.minimum( tf.cast(1.0, tf.float64), 2.0 * frac_train_laps_completed) cur_pi_max_iters = tf.cast( pi_max_iters_first_train_lap + tf.ceil(frac_progress * (pi_max_iters - pi_max_iters_first_train_lap)), tf.int32) # Pack up into the kwargs handed to pi optimization pi_opt_kwargs = dict(**DefaultDocTopicOptKwargs) pi_opt_kwargs['pi_max_iters'] = cur_pi_max_iters def has_docs_left( d, avg_log_proba_x, avg_log_proba_y, avg_log_proba_pi, pi_arr, y_arr): return d < n_docs def update_doc( d, avg_log_proba_x, avg_log_proba_y, avg_log_proba_pi, pi_arr, y_arr): start_d = doc_indptr_Dp1[d] stop_d = doc_indptr_Dp1[d+1] word_id_d_Ud = word_id_U[start_d:stop_d] word_ct_d_Ud = word_ct_U[start_d:stop_d] pi_d_K, topics_KUd, _, _ = \ _calc_nef_map_pi_d_K__tensorflow_graph( _word_id_d_Ud=word_id_d_Ud, _word_ct_d_Ud=word_ct_d_Ud, _topics_KV=topics_KV, convex_alpha_minus_1=convex_alpha_minus_1, **pi_opt_kwargs) pi_arr = pi_arr.write(d, pi_d_K) avg_log_proba_pi_d = weight_pi * tf.reduce_sum( convex_alpha_minus_1 * tf.log(1e-9 + pi_d_K)) avg_log_proba_x_d = tf.reduce_sum( word_ct_d_Ud * tf.log(tf.matmul(tf.reshape(pi_d_K, (1,K)), topics_KUd))) avg_log_proba_x_d += ( tf.lgamma(1.0 + tf.reduce_sum(word_ct_d_Ud)) - tf.reduce_sum(tf.lgamma(1.0 + word_ct_d_Ud))) log_proba_y_d_C = tf.reduce_sum( w_CK * tf.reshape(pi_d_K, (1,K)), axis=1) avg_log_proba_y_d = tf.cond( y_rowmask[d] > 0, lambda: -1.0 * tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=log_proba_y_d_C, labels=y_DC[d])), lambda: tf.constant(0.0, dtype=tf.float64)) y_arr = y_arr.write(d, tf.sigmoid(log_proba_y_d_C)) return ( d+1, avg_log_proba_x + weight_x * avg_log_proba_x_d, avg_log_proba_y + weight_y * avg_log_proba_y_d, avg_log_proba_pi + avg_log_proba_pi_d, pi_arr, y_arr) _avg_log_proba_x = tf.constant(0.0, dtype=tf.float64) _avg_log_proba_y = tf.constant(0.0, dtype=tf.float64) _avg_log_proba_pi = tf.constant(0.0, dtype=tf.float64) _K = tf.cast(K, tf.float64) _convex_alpha_minus_1 = tf.cast(convex_alpha_minus_1, tf.float64) _d = 0 _pi_arr = tf.TensorArray(dtype=tf.float64, size=n_docs) _y_arr = tf.TensorArray(dtype=tf.float64, size=n_docs) (_d, _avg_log_proba_x, _avg_log_proba_y, _avg_log_proba_pi, _pi_arr, _y_arr) = tf.while_loop( has_docs_left, update_doc, loop_vars=[ _d, _avg_log_proba_x, _avg_log_proba_y, _avg_log_proba_pi, _pi_arr, _y_arr]) _pi_DK = tf.reshape(_pi_arr.concat(), (n_docs, K)) _y_proba_DC = tf.reshape(_y_arr.concat(), (n_docs, C)) _avg_log_proba_topics = (tau - 1.0) * tf.reduce_sum(tf.log(topics_KV)) _avg_log_proba_w = -1.0 * ( weight_y * lambda_w * tf.reduce_sum(tf.square(w_CK))) scale_ttl = tf.reduce_sum(word_ct_U) _avg_log_proba_x /= scale_ttl _avg_log_proba_pi /= scale_ttl _avg_log_proba_y /= scale_ttl _avg_log_proba_topics /= scale_ttl _avg_log_proba_w /= scale_ttl return ( -1.0 * _avg_log_proba_x, -1.0 * _avg_log_proba_y, -1.0 * _avg_log_proba_pi, -1.0 * _avg_log_proba_topics, -1.0 * _avg_log_proba_w, _pi_DK, _y_proba_DC)
def compute_num_leapfrog_steps(self, step_size): return tf.cast(tf.ceil(self.trajectory_length / step_size), tf.int64)
def percentile(x, q, axis=None, interpolation=None, keep_dims=False, validate_args=False, name=None): """Compute the `q`-th percentile(s) of `x`. Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the way from the minimum to the maximum in a sorted copy of `x`. The values and distances of the two nearest neighbors as well as the `interpolation` parameter will determine the percentile if the normalized ranking does not match the location of `q` exactly. This function is the same as the median if `q = 50`, the same as the minimum if `q = 0` and the same as the maximum if `q = 100`. Multiple percentiles can be computed at once by using `1-D` vector `q`. Dimension zero of the returned `Tensor` will index the different percentiles. ```python # Get 30th percentile with default ('nearest') interpolation. x = [1., 2., 3., 4.] tfp.stats.percentile(x, q=30.) ==> 2.0 # Get 30th and 70th percentiles with 'lower' interpolation x = [1., 2., 3., 4.] tfp.stats.percentile(x, q=[30., 70.], interpolation='lower') ==> [1., 3.] # Get 100th percentile (maximum). By default, this is computed over every dim x = [[1., 2.] [3., 4.]] tfp.stats.percentile(x, q=100.) ==> 4. # Treat the leading dim as indexing samples, and find the 100th quantile (max) # over all such samples. x = [[1., 2.] [3., 4.]] tfp.stats.percentile(x, q=100., axis=[0]) ==> [3., 4.] ``` Compare to `numpy.percentile`. Args: x: Floating point `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, `x` must have statically known number of dimensions. q: Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s). axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis that hold independent samples over which to return the desired percentile. If `None` (the default), treat every dimension as a sample dimension, returning a scalar. interpolation : {'lower', 'higher', 'nearest'}. Default: 'nearest' This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points `i < j`: * lower: `i`. * higher: `j`. * nearest: `i` or `j`, whichever is nearest. keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1 If `False`, the last dimension is removed from the output shape. validate_args: Whether to add runtime checks of argument validity. If False, and arguments are incorrect, correct behavior is not guaranteed. name: A Python string name to give this `Op`. Default is 'percentile' Returns: A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if `axis` is `None`, a `rank(q)` `Tensor`. The first `rank(q)` dimensions index quantiles for different values of `q`. Raises: ValueError: If argument 'interpolation' is not an allowed type. """ name = name or 'percentile' allowed_interpolations = {'lower', 'higher', 'nearest'} if interpolation is None: interpolation = 'nearest' else: if interpolation not in allowed_interpolations: raise ValueError('Argument `interpolation` must be in %s. Found %s' % (allowed_interpolations, interpolation)) with tf.name_scope(name, values=[x, q]): x = tf.convert_to_tensor(x, name='x') # Double is needed here and below, else we get the wrong index if the array # is huge along axis. q = tf.to_double(q, name='q') _get_static_ndims(q, expect_ndims_no_more_than=1) if validate_args: q = control_flow_ops.with_dependencies([ tf.assert_rank_in(q, [0, 1]), tf.assert_greater_equal(q, tf.to_double(0.)), tf.assert_less_equal(q, tf.to_double(100.)) ], q) if axis is None: y = tf.reshape(x, [-1]) else: axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32) tf.assert_integer(axis) axis_ndims = _get_static_ndims( axis, expect_static=True, expect_ndims_no_more_than=1) axis_const = tensor_util.constant_value(axis) if axis_const is None: raise ValueError( 'Expected argument `axis` to be statically available. Found: %s' % axis) axis = axis_const if axis_ndims == 0: axis = [axis] axis = [int(a) for a in axis] x_ndims = _get_static_ndims( x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative(axis, x_ndims) # Move dims in axis to the end, since _sort_tensor, which calls top_k, # only sorts the last dim. y = _move_dims_to_flat_end(x, axis, x_ndims) frac_at_q_or_above = 1. - q / 100. d = tf.to_double(tf.shape(y)[-1]) if interpolation == 'lower': indices = tf.ceil((d - 1) * frac_at_q_or_above) elif interpolation == 'higher': indices = tf.floor((d - 1) * frac_at_q_or_above) elif interpolation == 'nearest': indices = tf.round((d - 1) * frac_at_q_or_above) # If d is gigantic, then we would have d == d - 1, even in double... So # let's use max/min to avoid out of bounds errors. d = tf.shape(y)[-1] # d - 1 will be distinct from d in int32. indices = tf.clip_by_value(tf.to_int32(indices), 0, d - 1) # Sort everything, not just the top 'k' entries, which allows multiple calls # to sort only once (under the hood) and use CSE. sorted_y = _sort_tensor(y) # Gather the indices along the sorted (last) dimension. # If q is a vector, the last dim of gathered_y indexes different q[i]. gathered_y = tf.gather(sorted_y, indices, axis=-1) if keep_dims: if axis is None: ones_vec = tf.ones( shape=[_get_best_effort_ndims(x) + _get_best_effort_ndims(q)], dtype=tf.int32) gathered_y *= tf.ones(ones_vec, dtype=x.dtype) else: gathered_y = _insert_back_keep_dims(gathered_y, axis) # If q is a scalar, then result has the right shape. # If q is a vector, then result has trailing dim of shape q.shape, which # needs to be rotated to dim 0. return util.rotate_transpose(gathered_y, tf.rank(q))
def auto_correlation(x, axis=-1, max_lags=None, center=True, normalize=True, name='auto_correlation'): """Auto correlation along one axis. Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation `RXX` may be defined as (with `E` expectation and `Conj` complex conjugate) ``` RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) }, W[n] := (X[n] - MU) / S, MU := E{ X[0] }, S**2 := E{ (X[0] - MU) Conj(X[0] - MU) }. ``` This function takes the viewpoint that `x` is (along one axis) a finite sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an estimate of `RXX[m]` as follows: After extending `x` from length `L` to `inf` by zero padding, the auto correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as ``` rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]), w[n] := (x[n] - mu) / s, mu := L**-1 sum_n x[n], s**2 := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu) ``` The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users often set `max_lags` small enough so that the entire output is meaningful. Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation contains a slight bias, which goes to zero as `len(x) - m --> infinity`. Args: x: `float32` or `complex64` `Tensor`. axis: Python `int`. The axis number along which to compute correlation. Other dimensions index different batch members. max_lags: Positive `int` tensor. The maximum value of `m` to consider (in equation above). If `max_lags >= x.shape[axis]`, we effectively re-set `max_lags` to `x.shape[axis] - 1`. center: Python `bool`. If `False`, do not subtract the mean estimate `mu` from `x[n]` when forming `w[n]`. normalize: Python `bool`. If `False`, do not divide by the variance estimate `s**2` when forming `w[n]`. name: `String` name to prepend to created ops. Returns: `rxx`: `Tensor` of same `dtype` as `x`. `rxx.shape[i] = x.shape[i]` for `i != axis`, and `rxx.shape[axis] = max_lags + 1`. Raises: TypeError: If `x` is not a supported type. """ # Implementation details: # Extend length N / 2 1-D array x to length N by zero padding onto the end. # Then, set # F[x]_k := sum_n x_n exp{-i 2 pi k n / N }. # It is not hard to see that # F[x]_k Conj(F[x]_k) = F[R]_k, where # R_m := sum_n x_n Conj(x_{(n - m) mod N}). # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m]. # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT # based version of estimating RXX. # Note that this is a special case of the Wiener-Khinchin Theorem. with tf.name_scope(name, values=[x]): x = tf.convert_to_tensor(x, name='x') # Rotate dimensions of x in order to put axis at the rightmost dim. # FFT op requires this. rank = util.prefer_static_rank(x) if axis < 0: axis = rank + axis shift = rank - 1 - axis # Suppose x.shape[axis] = T, so there are T 'time' steps. # ==> x_rotated.shape = B + [T], # where B is x_rotated's batch shape. x_rotated = util.rotate_transpose(x, shift) if center: x_rotated -= tf.reduce_mean(x_rotated, axis=-1, keepdims=True) # x_len = N / 2 from above explanation. The length of x along axis. # Get a value for x_len that works in all cases. x_len = util.prefer_static_shape(x_rotated)[-1] # TODO(langmore) Investigate whether this zero padding helps or hurts. At # the moment is necessary so that all FFT implementations work. # Zero pad to the next power of 2 greater than 2 * x_len, which equals # 2**(ceil(Log_2(2 * x_len))). Note: Log_2(X) = Log_e(X) / Log_e(2). x_len_float64 = tf.cast(x_len, np.float64) target_length = tf.pow( np.float64(2.), tf.ceil(tf.log(x_len_float64 * 2) / np.log(2.))) pad_length = tf.cast(target_length - x_len_float64, np.int32) # We should have: # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length] # = B + [T + pad_length] x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length) dtype = x.dtype if not dtype.is_complex: if not dtype.is_floating: raise TypeError('Argument x must have either float or complex dtype' ' found: {}'.format(dtype)) x_rotated_pad = tf.complex(x_rotated_pad, dtype.real_dtype.as_numpy_dtype(0.)) # Autocorrelation is IFFT of power-spectral density (up to some scaling). fft_x_rotated_pad = tf.fft(x_rotated_pad) spectral_density = fft_x_rotated_pad * tf.conj(fft_x_rotated_pad) # shifted_product is R[m] from above detailed explanation. # It is the inner product sum_n X[n] * Conj(X[n - m]). shifted_product = tf.ifft(spectral_density) # Cast back to real-valued if x was real to begin with. shifted_product = tf.cast(shifted_product, dtype) # Figure out if we can deduce the final static shape, and set max_lags. # Use x_rotated as a reference, because it has the time dimension in the far # right, and was created before we performed all sorts of crazy shape # manipulations. know_static_shape = True if not x_rotated.shape.is_fully_defined(): know_static_shape = False if max_lags is None: max_lags = x_len - 1 else: max_lags = tf.convert_to_tensor(max_lags, name='max_lags') max_lags_ = tensor_util.constant_value(max_lags) if max_lags_ is None or not know_static_shape: know_static_shape = False max_lags = tf.minimum(x_len - 1, max_lags) else: max_lags = min(x_len - 1, max_lags_) # Chop off the padding. # We allow users to provide a huge max_lags, but cut it off here. # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags] shifted_product_chopped = shifted_product[..., :max_lags + 1] # If possible, set shape. if know_static_shape: chopped_shape = x_rotated.shape.as_list() chopped_shape[-1] = min(x_len, max_lags + 1) shifted_product_chopped.set_shape(chopped_shape) # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]). The # other terms were zeros arising only due to zero padding. # `denominator = (N / 2 - m)` (defined below) is the proper term to # divide by to make this an unbiased estimate of the expectation # E[X[n] Conj(X[n - m])]. x_len = tf.cast(x_len, dtype.real_dtype) max_lags = tf.cast(max_lags, dtype.real_dtype) denominator = x_len - tf.range(0., max_lags + 1.) denominator = tf.cast(denominator, dtype) shifted_product_rotated = shifted_product_chopped / denominator if normalize: shifted_product_rotated /= shifted_product_rotated[..., :1] # Transpose dimensions back to those of x. return util.rotate_transpose(shifted_product_rotated, -shift)
def selection_margin(masks, margin): selection = tf.nn.conv2d(masks, tf.ones([margin * 2 + 1, margin * 2 + 1, 1, 1]), [1, 1, 1, 1], 'SAME') selection = tf.clip_by_value(tf.abs(tf.ceil(selection)), 0, 1) return selection
def preprocess_image_tf(filename, bbox_tensor, keypoints_tensor, mask, D=D): """ Returns: resized_image (N,D,D,3) - cropped, padded (if needed), scaled to square image of size D resized_mask (N,D,D,1) - cropped, padded (if needed), scaled to square mask of size D pts (N,2,17) - keypoint coordinates (i,j) scaled to match up with resized_image labels (N,1,17) - values corresponding to pts: {0: invalid, 1:occluded, 2:valid} """ image_string = tf.read_file(filename) image_decoded = tf.image.decode_jpeg(image_string, channels=3) image = tf.cast(image_decoded, tf.float32) # subtract mean image = tf.subtract(image, tf.reduce_mean(image)) mask = tf.transpose([mask],[1,2,0]) bbox_tensor = tf.to_float(bbox_tensor) keypoints_tensor = tf.to_float(keypoints_tensor) sideLength = tf.reduce_max(bbox_tensor[2:],axis=0) centerX = tf.floor(bbox_tensor[0] + tf.divide(bbox_tensor[2],tf.constant(2.0))) centerY = tf.floor(bbox_tensor[1] + tf.divide(bbox_tensor[3],tf.constant(2.0))) center = tf.stack([centerX,centerY]) corner1 = tf.to_int32(tf.minimum(tf.maximum(tf.subtract(center, tf.divide(sideLength,tf.constant(2.0))),0), tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0])))) corner2 = tf.to_int32(tf.minimum(tf.maximum(tf.add(center, tf.divide(sideLength,tf.constant(2.0))),0), tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0])))) i_shape = tf.subtract(corner2,corner1) d_shape = tf.subtract(tf.to_int32(sideLength),i_shape) scale = tf.divide(tf.constant(D,tf.float32), sideLength) cropped_image = tf.image.crop_to_bounding_box(image,corner1[1],corner1[0], tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0]) cropped_mask = tf.image.crop_to_bounding_box(mask,corner1[1],corner1[0], tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0]) dX = tf.floor(tf.divide(d_shape,tf.constant(2))) dY = tf.ceil(tf.divide(d_shape,tf.constant(2))) pts, labels = tf.split(keypoints_tensor,[2,1],axis=1) pts = tf.subtract(pts,tf.to_float(corner1)) # shift keypoints pts = tf.add(pts,tf.to_float(dX)) # shift keypoints pts = tf.multiply(pts,scale) # scale keypoints # set invalid pts to 0 valid = tf.less(pts,tf.constant(D,tf.float32)) valid = tf.multiply(tf.to_int32(valid), tf.to_int32(tf.greater(pts,0))) pts = tf.multiply(pts,tf.to_float(valid)) pts = tf.transpose(pts,[1,0]) labels = tf.transpose(labels,[1,0]) labels = tf.to_float(tf.greater_equal(labels, 2)) padded_image = tf.image.pad_to_bounding_box(cropped_image,tf.to_int32(dX[1]),tf.to_int32(dX[0]), tf.to_int32(sideLength),tf.to_int32(sideLength)) padded_mask = tf.image.pad_to_bounding_box(cropped_mask,tf.to_int32(dX[1]),tf.to_int32(dX[0]), tf.to_int32(sideLength),tf.to_int32(sideLength)) # if image size is not square, set labels to zero (so loss will be zero padding won't affect training) is_padded = tf.reduce_min(tf.to_float(tf.less(dX, 1.0))) labels = is_padded * labels resized_image = tf.image.resize_images(padded_image,tf.constant([D,D]),tf.image.ResizeMethod.NEAREST_NEIGHBOR) # resized_image = resized_image - VGG_MEAN resized_mask = tf.image.resize_images(padded_mask,tf.constant([D,D]),tf.image.ResizeMethod.NEAREST_NEIGHBOR) return resized_image, resized_mask, pts, labels
def build_model(embedding, options): """ Builds the entire computational graph used for training """ # description string: #words x #samples with tf.device('/gpu:0'): with tf.variable_scope('input'): x = tf.placeholder(tf.int64, shape=[None, None, None], name='x') # 3D vector batch,N and instances(before embedding)40*32*13 x_mask = tf.placeholder(tf.float32, shape=[None, None], name='x_mask') # mask batch,N y = tf.placeholder(tf.int64, shape=[None], name='y') #group actual ##TODO important keep_prob = tf.placeholder(tf.float32, [], name='keep_prob') is_training = tf.placeholder(tf.bool, name='is_training') alpha_balance = tf.placeholder(tf.float32,[],name = 'alpha_balance') ##TODO important sequence_mask = tf.cast(tf.abs(tf.sign(x)), tf.float32) # 3D n_timesteps = tf.shape(x)[0] # time steps ##TODO word embedding emb = tf.nn.embedding_lookup(embedding, x) with tf.device('/gpu:0'): # fed into the input of BILSTM from the official document with tf.name_scope('sentence_enc'): batch = tf.shape(emb)[0] #32 N = tf.shape(emb)[1] #40 N instances in a group word = tf.shape(emb)[2] #13 ##TODO make instances prediction through attention encoding and MLP with tf.variable_scope(name_or_scope='sentence_enc', reuse=tf.AUTO_REUSE): word_level_inputs = tf.reshape(emb, [batch * N, word, options['dim_word']]) word_level_mask = tf.reshape(sequence_mask, [batch * N, word]) ##TODO word level LSTM word_encoder_out = bilstm_filter(word_level_inputs, word_level_mask, keep_prob,prefix='sequence_encode', dim=options['dim'],is_training=is_training) # output shape: batch*news,sequence,2*lstm_units(32*40)*12*600 word_encoder_out = tf.concat(word_encoder_out, 2) * tf.expand_dims(word_level_mask, -1) # h = [h->,h<-] ################################### TODO word-attention word_level_output = attention_v2(word_encoder_out, word_level_mask, name='word_attention', keep=keep_prob,r=10,is_training=is_training) if options['use_dropout']: word_level_output = layers.dropout(word_level_output, keep_prob=keep_prob, is_training=is_training,seed=None) #32*N,D att = tf.reshape(word_level_output, [batch, N, 2*options['dim']]) ##TODO att shape 32*40*600 with tf.name_scope('instance_prediction'): logit = tf.layers.dense(word_level_output, 150,activation=tf.nn.tanh,use_bias=True,kernel_initializer=layers.xavier_initializer(uniform=True,seed=None,dtype=tf.float32),name='inst_temp', reuse=tf.AUTO_REUSE) if options['use_dropout']: logit = layers.dropout(logit, keep_prob=keep_prob, is_training=is_training,seed=None) pred_sig_ = tf.layers.dense(logit, 1, activation=None, use_bias=True,kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),name='inst_pred', reuse=tf.AUTO_REUSE) inst_pred = tf.nn.sigmoid(pred_sig_)#32*N,1, float32 L = tf.reshape(inst_pred,[batch,N]) """with tf.name_scope('instance_prediction'): mini_batch = tf.shape(att)[0] #32 N = tf.shape(att)[1] #N emb_size = tf.shape(att)[2] #600/100 D = att.get_shape().as_list()[-1] att_input = tf.reshape(att,[mini_batch*N, emb_size]) #32*N,600 theta = tf.get_variable('theta', [D, 1],initializer=tf.random_normal_initializer(stddev=0.1)) ##TODO make instances prediction through softmax(sigmoid) function inst_pred = tf.sigmoid(tf.matmul(att_input,theta)) #32*N,1 L = tf.reshape(inst_pred,[mini_batch,N]) #32,N #print(inst_pred)""" ##TODO make group prediction through average instance predictions group_ = tf.reduce_sum(L,1)/tf.cast(N,tf.float32) # Do the instance_pred average 32, #group_ = tf.reduce_mean(L,1) group_pred = tf.cast(tf.ceil(group_-0.5),tf.int64) #32, ################################################################# ################################################################ ################################################### why group_pred all 0/1??? ##TODO new cost logger.info('Building f_cost...') x_simil = Euclidean_distance(att) #32,N,N 有placeholder l_diff = instance_diff(L) #32,N,N 有placeholder simil_cost = tf.reduce_sum(tf.multiply(x_simil,l_diff),[1,2])/tf.cast(N*N,tf.float32) #32, #group_cost = tf.cast(tf.square(y-group_pred),tf.float32) #32 group_cost = tf.cast(-y*tf.log(group_pred) - (1-y)*tf.log(1-group_pred),tf.float32) ## log_loss # cost由int64变为float32 total_cost = simil_cost + alpha_balance * group_cost #[32,1] cost = tf.reshape(total_cost,(1,-1)) #1,32 """pred = tf.layers.dense(logit, 2, activation=None, use_bias=True, kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32), name='fout', reuse=tf.AUTO_REUSE)#32,2 labels = tf.one_hot(y, depth=2, axis=1)#32,2 preds = tf.nn.softmax(pred, 1,name='softmax') #32,2 cost = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=labels) #1,32""" logger.info('Done') with tf.variable_scope('logging'): tf.summary.scalar('current_cost', tf.reduce_mean(cost)) tf.summary.histogram('predicted_value', group_pred) summary = tf.summary.merge_all() return is_training, cost, x, x_mask, y, n_timesteps, group_pred, summary
def _set_learning_rate(self): self.global_step = tf.get_variable( 'global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) if self.args.learning_rate_strategy == 'FIXED': self.lr = tf.minimum( self.args.learning_rate, self.args.learning_rate / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1)) elif self.args.learning_rate_strategy == 'HALF_COSINE_MAX': # from snapshot paper t_m = tf.constant( ceil(self.args.learning_rate_reset_epoch * self.args.num_total_samples / self.args.batch_size), dtype=tf.int32) self.lr = (self.args.learning_rate / 2.0) * (tf.cos( tf.constant(3.1415, tf.float32) * tf.cast(tf.mod(self.global_step, t_m), tf.float32) / tf.cast(t_m, tf.float32)) + 1.0) elif self.args.learning_rate_strategy == 'HALF_COSINE_ZERO': # from snapshot paper t_m = tf.constant( ceil(self.args.learning_rate_reset_epoch * self.args.num_total_samples / self.args.batch_size), dtype=tf.int32) self.lr = (self.args.learning_rate / 2.0) * (1.0 - tf.cos( tf.constant(3.1415, tf.float32) * tf.cast(tf.mod(self.global_step, t_m), tf.float32) / tf.cast(t_m, tf.float32))) elif self.args.learning_rate_strategy == 'COSINE_ZERO': t_m = tf.constant( ceil(self.args.learning_rate_reset_epoch * self.args.num_total_samples / self.args.batch_size), dtype=tf.int32) self.lr = (self.args.learning_rate / 2.0) * (1.0 - tf.cos( tf.constant(2 * 3.1415, tf.float32) * tf.cast(tf.mod(self.global_step, t_m), tf.float32) / tf.cast(t_m, tf.float32))) elif self.args.learning_rate_strategy == 'COSINE_MAX': t_m = tf.constant( ceil(self.args.learning_rate_reset_epoch * self.args.num_total_samples / self.args.batch_size), dtype=tf.int32) self.lr = (self.args.learning_rate / 2.0) * (1.0 + tf.cos( tf.constant(2 * 3.1415, tf.float32) * tf.cast(tf.mod(self.global_step, t_m), tf.float32) / tf.cast(t_m, tf.float32))) elif self.args.learning_rate_strategy == 'COSINE_ZERO_DECAY': t_m = tf.constant( ceil(self.args.learning_rate_reset_epoch * self.args.num_total_samples / self.args.batch_size), dtype=tf.int32) self.lr = (self.args.learning_rate / tf.ceil(tf.cast(self.global_step, tf.float32) / tf.cast(t_m, tf.float32)) + 1) \ * (1.0 - tf.cos(tf.constant(2 * 3.1415, tf.float32) * tf.cast(tf.mod(self.global_step, t_m), tf.float32) / tf.cast(t_m, tf.float32))) elif self.args.learning_rate_strategy in ['CYCLE_LINEAR', 'CYCLE_SIN']: self.lr = tf.get_variable('lr', shape=[], dtype=tf.float32, initializer=tf.constant_initializer( self.args.learning_rate), trainable=False) else: raise NotImplementedError
def augment_pipeline( images, # Input images: NCHW, float32, dynamic range [-1,+1]. labels, # Input labels. strength = 1, # Overall multiplier for augmentation probability; can be a Tensor. debug_percentile = None, # Percentile value for visualizing parameter ranges; None = normal operation. # Pixel blitting. xflip = 0, # Probability multiplier for x-flip. rotate90 = 0, # Probability multiplier for 90 degree rotations. xint = 0, # Probability multiplier for integer translation. xint_max = 0.125, # Range of integer translation, relative to image dimensions. # General geometric transformations. scale = 0, # Probability multiplier for isotropic scaling. rotate = 0, # Probability multiplier for arbitrary rotation. aniso = 0, # Probability multiplier for anisotropic scaling. xfrac = 0, # Probability multiplier for fractional translation. scale_std = 0.2, # Log2 standard deviation of isotropic scaling. rotate_max = 1, # Range of arbitrary rotation, 1 = full circle. aniso_std = 0.2, # Log2 standard deviation of anisotropic scaling. xfrac_std = 0.125, # Standard deviation of frational translation, relative to image dimensions. # Color transformations. brightness = 0, # Probability multiplier for brightness. contrast = 0, # Probability multiplier for contrast. lumaflip = 0, # Probability multiplier for luma flip. hue = 0, # Probability multiplier for hue rotation. saturation = 0, # Probability multiplier for saturation. brightness_std = 0.2, # Standard deviation of brightness. contrast_std = 0.5, # Log2 standard deviation of contrast. hue_max = 1, # Range of hue rotation, 1 = full circle. saturation_std = 1, # Log2 standard deviation of saturation. # Image-space filtering. imgfilter = 0, # Probability multiplier for image-space filtering. imgfilter_bands = [1,1,1,1], # Probability multipliers for individual frequency bands. imgfilter_std = 1, # Log2 standard deviation of image-space filter amplification. # Image-space corruptions. noise = 0, # Probability multiplier for additive RGB noise. cutout = 0, # Probability multiplier for cutout. noise_std = 0.1, # Standard deviation of additive RGB noise. cutout_size = 0.5, # Size of the cutout rectangle, relative to image dimensions. ): # Determine input shape. batch, channels, height, width = images.shape.as_list() if batch is None: batch = tf.shape(images)[0] # ------------------------------------- # Select parameters for pixel blitting. # ------------------------------------- # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in I_3 = tf.eye(3, batch_shape=[batch]) G_inv = I_3 # Apply x-flip with probability (xflip * strength). if xflip > 0: i = tf.floor(tf.random_uniform([batch], 0, 2)) i = gate_augment_params(xflip * strength, i, 0) if debug_percentile is not None: i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) G_inv @= scale_2d_inv(1 - 2 * i, 1) # Apply 90 degree rotations with probability (rotate90 * strength). if rotate90 > 0: i = tf.floor(tf.random_uniform([batch], 0, 4)) i = gate_augment_params(rotate90 * strength, i, 0) if debug_percentile is not None: i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 4) G_inv @= rotate_2d_inv(-np.pi / 2 * i) # Apply integer translation with probability (xint * strength). if xint > 0: t = tf.random_uniform([batch, 2], -xint_max, xint_max) t = gate_augment_params(xint * strength, t, 0) if debug_percentile is not None: t = (tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xint_max G_inv @= translate_2d_inv(tf.rint(t[:,0] * width), tf.rint(t[:,1] * height)) # -------------------------------------------------------- # Select parameters for general geometric transformations. # -------------------------------------------------------- # Apply isotropic scaling with probability (scale * strength). if scale > 0: s = 2 ** tf.random_normal([batch], 0, scale_std) s = gate_augment_params(scale * strength, s, 1) if debug_percentile is not None: s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * scale_std) G_inv @= scale_2d_inv(s, s) # Apply pre-rotation with probability p_rot. p_rot = 1 - tf.sqrt(tf.cast(tf.maximum(1 - rotate * strength, 0), tf.float32)) # P(pre OR post) = p if rotate > 0: theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) theta = gate_augment_params(p_rot, theta, 0) if debug_percentile is not None: theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * rotate_max G_inv @= rotate_2d_inv(-theta) # Before anisotropic scaling. # Apply anisotropic scaling with probability (aniso * strength). if aniso > 0: s = 2 ** tf.random_normal([batch], 0, aniso_std) s = gate_augment_params(aniso * strength, s, 1) if debug_percentile is not None: s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * aniso_std) G_inv @= scale_2d_inv(s, 1 / s) # Apply post-rotation with probability p_rot. if rotate > 0: theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) theta = gate_augment_params(p_rot, theta, 0) if debug_percentile is not None: theta = tf.zeros([batch]) G_inv @= rotate_2d_inv(-theta) # After anisotropic scaling. # Apply fractional translation with probability (xfrac * strength). if xfrac > 0: t = tf.random_normal([batch, 2], 0, xfrac_std) t = gate_augment_params(xfrac * strength, t, 0) if debug_percentile is not None: t = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xfrac_std G_inv @= translate_2d_inv(t[:,0] * width, t[:,1] * height) # ---------------------------------- # Execute geometric transformations. # ---------------------------------- # Execute if the transform is not identity. if G_inv is not I_3: # Setup orthogonal lowpass filter. Hz = wavelets['sym6'] Hz = np.asarray(Hz, dtype=np.float32) Hz = np.reshape(Hz, [-1, 1, 1]).repeat(channels, axis=1) # [tap, channel, 1] Hz_pad = Hz.shape[0] // 4 # Calculate padding. cx = (width - 1) / 2 cy = (height - 1) / 2 cp = np.transpose([[-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1]]) # [xyz, idx] cp = G_inv @ cp[np.newaxis] # [batch, xyz, idx] cp = cp[:, :2, :] # [batch, xy, idx] m_lo = tf.ceil(tf.reduce_max(-cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) m_hi = tf.ceil(tf.reduce_max( cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) m_lo = tf.clip_by_value(m_lo, [0, 0], [width-1, height-1]) m_hi = tf.clip_by_value(m_hi, [0, 0], [width-1, height-1]) # Pad image and adjust origin. images = tf.transpose(images, [0, 2, 3, 1]) # NCHW => NHWC pad = [[0, 0], [m_lo[1], m_hi[1]], [m_lo[0], m_hi[0]], [0, 0]] images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') T_in = translate_2d(cx + m_lo[0], cy + m_lo[1]) T_out = translate_2d_inv(cx + Hz_pad, cy + Hz_pad) G_inv = T_in @ G_inv @ T_out # Upsample. shape = [batch, tf.shape(images)[1] * 2, tf.shape(images)[2] * 2, channels] images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[np.newaxis, :], out_backprop=images, strides=[1,2,2,1], padding='SAME', data_format='NHWC') images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[:, np.newaxis], out_backprop=images, strides=[1,1,1,1], padding='SAME', data_format='NHWC') G_inv = scale_2d(2, 2) @ G_inv @ scale_2d_inv(2, 2) # Account for the increased resolution. # Execute transformation. transforms = tf.reshape(G_inv, [-1, 9])[:, :8] shape = [(height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2] images = tf.contrib.image.transform(images=images, transforms=transforms, output_shape=shape, interpolation='BILINEAR') # Downsample and crop. images = tf.nn.depthwise_conv2d(input=images, filter=Hz[np.newaxis,:], strides=[1,1,1,1], padding='SAME', data_format='NHWC') images = tf.nn.depthwise_conv2d(input=images, filter=Hz[:,np.newaxis], strides=[1,2,2,1], padding='SAME', data_format='NHWC') images = images[:, Hz_pad : height + Hz_pad, Hz_pad : width + Hz_pad, :] images = tf.transpose(images, [0, 3, 1, 2]) # NHWC => NCHW # -------------------------------------------- # Select parameters for color transformations. # -------------------------------------------- # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out I_4 = tf.eye(4, batch_shape=[batch]) C = I_4 # Apply brightness with probability (brightness * strength). if brightness > 0: b = tf.random_normal([batch], 0, brightness_std) b = gate_augment_params(brightness * strength, b, 0) if debug_percentile is not None: b = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * brightness_std C = translate_3d(b, b, b) @ C # Apply contrast with probability (contrast * strength). if contrast > 0: c = 2 ** tf.random_normal([batch], 0, contrast_std) c = gate_augment_params(contrast * strength, c, 1) if debug_percentile is not None: c = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * contrast_std) C = scale_3d(c, c, c) @ C # Apply luma flip with probability (lumaflip * strength). v = np.array([1, 1, 1, 0]) / np.sqrt(3) # Luma axis. if lumaflip > 0: i = tf.floor(tf.random_uniform([batch], 0, 2)) i = gate_augment_params(lumaflip * strength, i, 0) if debug_percentile is not None: i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) i = tf.reshape(i, [batch, 1, 1]) C = (I_4 - 2 * np.outer(v, v) * i) @ C # Householder reflection. # Apply hue rotation with probability (hue * strength). if hue > 0 and channels > 1: theta = tf.random_uniform([batch], -np.pi * hue_max, np.pi * hue_max) theta = gate_augment_params(hue * strength, theta, 0) if debug_percentile is not None: theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * hue_max C = rotate_3d(v, theta) @ C # Rotate around v. # Apply saturation with probability (saturation * strength). if saturation > 0 and channels > 1: s = 2 ** tf.random_normal([batch], 0, saturation_std) s = gate_augment_params(saturation * strength, s, 1) if debug_percentile is not None: s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * saturation_std) s = tf.reshape(s, [batch, 1, 1]) C = (np.outer(v, v) + (I_4 - np.outer(v, v)) * s) @ C # ------------------------------ # Execute color transformations. # ------------------------------ # Execute if the transform is not identity. if C is not I_4: images = tf.reshape(images, [batch, channels, height * width]) if channels == 3: images = C[:, :3, :3] @ images + C[:, :3, 3:] elif channels == 1: C = tf.reduce_mean(C[:, :3, :], axis=1, keepdims=True) images = images * tf.reduce_sum(C[:, :, :3], axis=2, keepdims=True) + C[:, :, 3:] else: raise ValueError('Image must be RGB (3 channels) or L (1 channel)') images = tf.reshape(images, [batch, channels, height, width]) # ---------------------- # Image-space filtering. # ---------------------- if imgfilter > 0: num_bands = 4 assert len(imgfilter_bands) == num_bands expected_power = np.array([10, 1, 1, 1]) / 13 # Expected power spectrum (1/f). # Apply amplification for each band with probability (imgfilter * strength * band_strength). g = tf.ones([batch, num_bands]) # Global gain vector (identity). for i, band_strength in enumerate(imgfilter_bands): t_i = 2 ** tf.random_normal([batch], 0, imgfilter_std) t_i = gate_augment_params(imgfilter * strength * band_strength, t_i, 1) if debug_percentile is not None: t_i = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * imgfilter_std) if band_strength > 0 else tf.ones([batch]) t = tf.ones([batch, num_bands]) # Temporary gain vector. t = tf.concat([t[:, :i], t_i[:, np.newaxis], t[:, i+1:]], axis=-1) # Replace i'th element. t /= tf.sqrt(tf.reduce_sum(expected_power * tf.square(t), axis=-1, keepdims=True)) # Normalize power. g *= t # Accumulate into global gain. # Construct filter bank. Hz_lo = wavelets['sym2'] Hz_lo = np.asarray(Hz_lo, dtype=np.float32) # H(z) Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z) Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2 # H(z) * H(z^-1) / 2 Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2 # H(-z) * H(-z^-1) / 2 Hz_bands = np.eye(num_bands, 1) # Bandpass(H(z), b_i) for i in range(1, num_bands): Hz_bands = np.dstack([Hz_bands, np.zeros_like(Hz_bands)]).reshape(num_bands, -1)[:, :-1] Hz_bands = scipy.signal.convolve(Hz_bands, [Hz_lo2]) Hz_bands[i, (Hz_bands.shape[1] - Hz_hi2.size) // 2 : (Hz_bands.shape[1] + Hz_hi2.size) // 2] += Hz_hi2 # Construct combined amplification filter. Hz_prime = g @ Hz_bands # [batch, tap] Hz_prime = tf.transpose(Hz_prime) # [tap, batch] Hz_prime = tf.tile(Hz_prime[:, :, np.newaxis], [1, 1, channels]) # [tap, batch, channels] Hz_prime = tf.reshape(Hz_prime, [-1, batch * channels, 1]) # [tap, batch * channels, 1] # Apply filter. images = tf.reshape(images, [1, -1, height, width]) pad = Hz_bands.shape[1] // 2 pad = [[0,0], [0,0], [pad, pad], [pad, pad]] images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[np.newaxis,:], strides=[1,1,1,1], padding='VALID', data_format='NCHW') images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[:,np.newaxis], strides=[1,1,1,1], padding='VALID', data_format='NCHW') images = tf.reshape(images, [-1, channels, height, width]) # ------------------------ # Image-space corruptions. # ------------------------ # Apply additive RGB noise with probability (noise * strength). if noise > 0: sigma = tf.abs(tf.random_normal([batch], 0, noise_std)) sigma = gate_augment_params(noise * strength, sigma, 0) if debug_percentile is not None: sigma = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch])) * noise_std sigma = tf.reshape(sigma, [-1, 1, 1, 1]) images += tf.random_normal([batch, channels, height, width]) * sigma # Apply cutout with probability (cutout * strength). if cutout > 0: size = tf.fill([batch, 2], cutout_size) size = gate_augment_params(cutout * strength, size, 0) center = tf.random_uniform([batch, 2], 0, 1) if debug_percentile is not None: size = tf.fill([batch, 2], cutout_size) center = tf.broadcast_to(debug_percentile, [batch, 2]) size = tf.reshape(size, [batch, 2, 1, 1, 1]) center = tf.reshape(center, [batch, 2, 1, 1, 1]) coord_x = tf.reshape(tf.range(width, dtype=tf.float32), [1, 1, 1, width]) coord_y = tf.reshape(tf.range(height, dtype=tf.float32), [1, 1, height, 1]) mask_x = (tf.abs((coord_x + 0.5) / width - center[:, 0]) >= size[:, 0] / 2) mask_y = (tf.abs((coord_y + 0.5) / height - center[:, 1]) >= size[:, 1] / 2) mask = tf.cast(tf.logical_or(mask_x, mask_y), tf.float32) images *= mask return images, labels
def pad_to_multiple(tensor, multiple): """Returns the tensor zero padded to the specified multiple. Appends 0s to the end of the first and second dimension (height and width) of the tensor until both dimensions are a multiple of the input argument 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input multiple of 4, PadToMultiple will append 0s so that the resulting tensor will be of shape [1, 4, 8, 1]. Args: tensor: rank 4 float32 tensor, where tensor -> [batch_size, height, width, channels]. multiple: the multiple to pad to. Returns: padded_tensor: the tensor zero padded to the specified multiple. """ if multiple == 1: return tensor tensor_shape = tensor.get_shape() batch_size = static_shape.get_batch_size(tensor_shape) tensor_height = static_shape.get_height(tensor_shape) tensor_width = static_shape.get_width(tensor_shape) tensor_depth = static_shape.get_depth(tensor_shape) if batch_size is None: batch_size = tf.shape(tensor)[0] if tensor_height is None: tensor_height = tf.shape(tensor)[1] padded_tensor_height = tf.cast(tf.ceil( tf.cast(tensor_height, dtype=tf.float32) / tf.cast(multiple, dtype=tf.float32)), dtype=tf.int32) * multiple else: padded_tensor_height = int( math.ceil(float(tensor_height) / multiple) * multiple) if tensor_width is None: tensor_width = tf.shape(tensor)[2] padded_tensor_width = tf.cast(tf.ceil( tf.cast(tensor_width, dtype=tf.float32) / tf.cast(multiple, dtype=tf.float32)), dtype=tf.int32) * multiple else: padded_tensor_width = int( math.ceil(float(tensor_width) / multiple) * multiple) if tensor_depth is None: tensor_depth = tf.shape(tensor)[3] # Use tf.concat instead of tf.pad to preserve static shape if padded_tensor_height != tensor_height: height_pad = tf.zeros([ batch_size, padded_tensor_height - tensor_height, tensor_width, tensor_depth ]) tensor = tf.concat([tensor, height_pad], 1) if padded_tensor_width != tensor_width: width_pad = tf.zeros([ batch_size, padded_tensor_height, padded_tensor_width - tensor_width, tensor_depth ]) tensor = tf.concat([tensor, width_pad], 2) return tensor
def fm(tensor): return tf.ceil(tf.math.subtract(tensor, MASK_THRESHOLD))
def main(): # Placeholders learning_rate = tf.placeholder(tf.float32) feature_seq = tf.placeholder( tf.float32, [args.batch_size, args.max_seqlen, args.feature_size]) labels = tf.placeholder(tf.float32, [args.batch_size, args.num_class]) seq_len = tf.cast( tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(feature_seq), axis=2)), axis=1), tf.int32) fseq = feature_seq[:, :tf.reduce_max(seq_len), :] sgn = tf.sign(tf.reduce_sum(tf.abs(fseq), keep_dims=True, axis=2)) seq_len = tf.cast( tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(fseq), axis=2)), axis=1), tf.int32) k = tf.cast(tf.ceil(tf.cast(seq_len, tf.float32) / 8), tf.int32) # Model with tf.device('/gpu:0'): with tf.variable_scope('Fully_Connected'): fc_W = _variable_with_weight_decay( 'fc_w', [args.feature_size, args.feature_size], 0.0005) fc_b = _variable_with_weight_decay('fc_b', [args.feature_size], 0.0000) feature = tf.matmul( fseq, tf.tile(tf.expand_dims(fc_W, 0), [args.batch_size, 1, 1])) + fc_b feature = tf.nn.relu(feature) feature = tf.nn.dropout(feature, 0.3) with tf.variable_scope('Attention') as an: atn_W = _variable_with_weight_decay( 'atn_w', [args.feature_size, args.num_class], 0.0005) atn_b = _variable_with_weight_decay('atn_b', [args.num_class], 0.0000) temporal_logits = tf.matmul( feature, tf.tile(tf.expand_dims(atn_W, 0), [args.batch_size, 1, 1])) + atn_b # MILL logits = [] for i in range(args.batch_size): tmp, _ = tf.nn.top_k(tf.transpose( temporal_logits[i, :seq_len[i], :], [1, 0]), k=k[i]) logits.append(tf.reduce_mean(tf.transpose(tmp, [1, 0]), axis=0)) logits = tf.stack(logits) mill = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) tf.add_to_collection('losses', mill * args.Lambda) tf.summary.scalar('MILL', mill) # CASL tmp = tf.exp(temporal_logits) * sgn attention = tf.div(tmp, tf.reduce_sum(tmp, axis=1, keep_dims=True)) attn_classwise_feat = tf.matmul(tf.transpose(feature, [0, 2, 1]), attention) norm_comp_attention = sgn * (1 - attention) / tf.cast( tf.expand_dims(tf.expand_dims(tf.maximum(seq_len - 1, 1), axis=1), axis=1), tf.float32) comp_attn_classwise_feat = tf.matmul(tf.transpose(feature, [0, 2, 1]), norm_comp_attention) casl, n_tmp = 0., 0. for i in range(0, args.num_similar * 2, 2): f1 = attn_classwise_feat[i, :, :] f2 = attn_classwise_feat[i + 1, :, :] f3 = comp_attn_classwise_feat[i, :, :] f4 = comp_attn_classwise_feat[i + 1, :, :] d1 = 1 - tf.reduce_sum( f1 * f2, axis=0) / (tf.norm(f1, axis=0) * tf.norm(f2, axis=0)) d2 = 1 - tf.reduce_sum( f1 * f4, axis=0) / (tf.norm(f1, axis=0) * tf.norm(f4, axis=0)) d3 = 1 - tf.reduce_sum( f2 * f3, axis=0) / (tf.norm(f2, axis=0) * tf.norm(f3, axis=0)) casl = casl + tf.reduce_sum( tf.maximum(0., d1 - d2 + 0.5) * 0.5 * tf.cast(tf.greater(labels[i, :], 0), tf.float32) * tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32)) casl = casl + tf.reduce_sum( tf.maximum(0., d1 - d3 + 0.5) * 0.5 * tf.cast(tf.greater(labels[i, :], 0), tf.float32) * tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32)) n_tmp = n_tmp + tf.reduce_sum( tf.cast(tf.greater(labels[i, :], 0), tf.float32) * tf.cast(tf.greater(labels[i + 1, :], 0), tf.float32)) casl = casl / n_tmp tf.add_to_collection('losses', casl * (1 - args.Lambda)) tf.summary.scalar('CASL', casl) total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss') tf.summary.scalar('Total Loss', total_loss) apply_gradient_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(total_loss) # Initialize tensorflow graph init = tf.global_variables_initializer() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('./tensorboards/' + args.model_name, sess.graph) saver = tf.train.Saver(max_to_keep=200) # Start from scratch or load model if args.pretrained_ckpt is None: iter_num = 0 else: iter_num = np.load('iter_num.npy') saver.restore( sess, tf.train.latest_checkpoint('./ckpt/' + args.pretrained_ckpt + '/')) # Initialize dataset dataset = Dataset(args) #Start training for i in range(iter_num, args.max_iter): # Train batch_feature_seq, batch_labels = dataset.load_data( n_similar=args.num_similar) batch_labels = batch_labels / np.sum( batch_labels, axis=1, keepdims=True) _, cost, sumry = sess.run( [apply_gradient_op, total_loss, merged], feed_dict={ feature_seq: batch_feature_seq, labels: batch_labels, learning_rate: args.lr }) train_writer.add_summary(sumry, i) print('Iteration: %d, Loss: %.5f' % (i, cost)) if i % 500 == 0: #sumry = sess.run(merged, feed_dict={feature_seq: batch_feature_seq, labels:batch_labels, learning_rate: lr, keep_prob: None}) #train_writer.add_summary(sumry, i) np.save('iter_num.npy', i) saver.save(sess, './ckpt/' + args.model_name + '/model', global_step=i) test(dataset, args, i)
def refine_feature_op(self, points, feature_map, name): h, w = tf.cast(tf.shape(feature_map)[1], tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32) xmin = tf.maximum(0.0, tf.floor(points[:, 0])) xmin = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(xmin)) ymin = tf.maximum(0.0, tf.floor(points[:, 1])) ymin = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(ymin)) xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0])) xmax = tf.maximum(0.0, tf.floor(xmax)) ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1])) ymax = tf.maximum(0.0, tf.floor(ymax)) left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)), tf.int32) right_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmax], axis=0)), tf.int32) left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)), tf.int32) right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)), tf.int32) feature_1x5 = slim.conv2d( inputs=feature_map, num_outputs=cfgs.FPN_CHANNEL, kernel_size=[1, 5], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x5_{}'.format(name)) feature5x1 = slim.conv2d( inputs=feature_1x5, num_outputs=cfgs.FPN_CHANNEL, kernel_size=[5, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_5x1_{}'.format(name)) feature_1x1 = slim.conv2d( inputs=feature_map, num_outputs=cfgs.FPN_CHANNEL, kernel_size=[1, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x1_{}'.format(name)) feature = feature5x1 + feature_1x1 left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top) right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom) left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom) right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top) refine_feature = right_bottom_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (points[:, 1] - ymin))), [-1, 1]), [1, cfgs.FPN_CHANNEL]) \ + left_top_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (ymax - points[:, 1]))), [-1, 1]), [1, cfgs.FPN_CHANNEL]) \ + right_top_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (ymax - points[:, 1]))), [-1, 1]), [1, cfgs.FPN_CHANNEL]) \ + left_bottom_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (points[:, 1] - ymin))), [-1, 1]), [1, cfgs.FPN_CHANNEL]) refine_feature = tf.reshape( refine_feature, [1, tf.cast(h, tf.int32), tf.cast(w, tf.int32), cfgs.FPN_CHANNEL]) # refine_feature = tf.reshape(refine_feature, [1, tf.cast(feature_size[1], tf.int32), # tf.cast(feature_size[0], tf.int32), 256]) return refine_feature + feature
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num + 1, :] label = tf.reshape(label, [-1]) #calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.pack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") #calculate objects tensor [CELL_SIZE, CELL_SIZE] #calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.pack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") #objects = response #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): #nilboy base_boxes[y, x, :] = [ self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) #calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] #calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) #sqrt_h = tf.abs(label[3]) #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) #p_sqrt_w = predict_boxes[:, :, :, 2] #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] #class_loss class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale #noobject_loss #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale #coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def cnn(model, config, scope, connect=None): with tf.variable_scope(scope), tf.name_scope(scope): with tf.variable_scope('inputs'), tf.name_scope('inputs'): sizes = { size: config.getint(scope, '%s_size' % size) for size in ['clength', 'cstep', 'plength', 'pstep'] } if connect is None: model['%s_in0length' % scope] = config.getint( 'global', 'batch_size') model['%s_in1length' % scope] = config.getint( 'global', 'input_size') model['%s_in2length' % scope] = tf.placeholder( tf.int32, [model['%s_in0length' % scope]], '%s_in2length' % scope) model['%s_maxin2length' % scope] = config.getint( 'global', 'time_size') model['%s_inputs' % scope] = tf.placeholder( tf.float32, [ model['%s_maxin2length' % scope], model['%s_in0length' % scope], model['%s_in1length' % scope] ], '%s_inputs' % scope) else: model['%s_in0length' % scope] = model['%s_out0length' % connect] model['%s_in1length' % scope] = model['%s_out1length' % connect] model['%s_in2length' % scope] = model['%s_out2length' % connect] model['%s_maxin2length' % scope] = model['%s_maxout2length' % connect] model['%s_inputs' % scope] = model['%s_outputs' % connect] model['%s_transform' % scope] = tf.transpose( tf.reshape(model['%s_inputs' % scope], [ model['%s_maxin2length' % scope], model['%s_in0length' % scope], model['%s_in1length' % scope], 1 ]), [1, 0, 2, 3], '%s_transform' % scope) model['%s_out0length' % scope] = model['%s_in0length' % scope] model['%s_out1length' % scope] = model['%s_in1length' % scope] model['%s_out2length' % scope] = model['%s_in2length' % scope] model['%s_maxout2length' % scope] = model['%s_maxin2length' % scope] for _ in xrange(config.getint(scope, 'layer_size')): if _ == 0: model['%s_transform%i' % (scope, _)] = model['%s_transform' % scope] else: model['%s_transform%i' % (scope, _)] = model['%s_pooling%i' % (scope, _ - 1)] with tf.variable_scope('filter%i' % _), tf.name_scope('filter%s' % _): model['%s_filter%i' % (scope, _)] = tf.Variable( tf.truncated_normal( [sizes['clength'], sizes['clength'], 1, 1])) model['%s_stride%i' % (scope, _)] = [1, sizes['cstep'], sizes['cstep'], 1] with tf.variable_scope('convolution%i' % _), tf.name_scope( 'convolution%i' % _): model['%s_convolution%i' % (scope, _)] = tf.nn.conv2d( model['%s_transform%i' % (scope, _)], model['%s_filter%i' % (scope, _)], model['%s_stride%i' % (scope, _)], 'VALID') model['%s_out1length' % scope] = int( math.ceil( float(model['%s_out1length' % scope] - sizes['clength'] + 1) / float(sizes['cstep']))) model['%s_out2length' % scope] = tf.to_int32( tf.ceil( tf.div( tf.to_float( tf.subtract(model['%s_out2length' % scope], sizes['clength'] - 1)), tf.to_float(sizes['cstep'])))) model['%s_maxout2length' % scope] = int( math.ceil( float(model['%s_maxout2length' % scope] - sizes['clength'] + 1) / float(sizes['cstep']))) model['%s_pooling%i' % (scope, _)] = getattr( tf.nn, '%s_pool' % config.get(scope, 'pool'))( model['%s_convolution%i' % (scope, _)], [1, sizes['plength'], sizes['plength'], 1], [1, sizes['pstep'], sizes['pstep'], 1], 'VALID') model['%s_out1length' % scope] = int( math.ceil( float(model['%s_out1length' % scope] - sizes['plength'] + 1) / float(sizes['pstep']))) model['%s_out2length' % scope] = tf.to_int32( tf.ceil( tf.div( tf.to_float( tf.subtract(model['%s_out2length' % scope], sizes['plength'] - 1)), tf.to_float(sizes['pstep'])))) model['%s_maxout2length' % scope] = int( math.ceil( float(model['%s_maxout2length' % scope] - sizes['plength'] + 1) / float(sizes['pstep']))) with tf.variable_scope('outputs'), tf.name_scope('outputs'): model['%s_outputs' % scope] = tf.transpose( tf.squeeze(model['%s_pooling%i' % (scope, _)], [3], '%s_outputs' % scope), [1, 0, 2]) return model
def build_bow_cnn_custompool_from_options_dict(x, x_lengths, keep_prob, options_dict): cnn = blocks.build_cnn(x, options_dict["input_shape"], options_dict["filter_shapes"], options_dict["pool_shapes"], padding="VALID") # Create mask n_padded_after_cnn = cnn.get_shape().as_list()[-2] # def get_lengths_after_cnn(): lengths_after_cnn = tf.cast(x_lengths, dtype=TF_DTYPE) for i_cnn_layer in xrange(len(options_dict["pool_shapes"])): lengths_after_cnn = tf.maximum( 1.0, lengths_after_cnn - options_dict["filter_shapes"][i_cnn_layer][1] + 1) # assert False, "check this" if options_dict["pool_shapes"][i_cnn_layer] is not None: lengths_after_cnn = tf.ceil( lengths_after_cnn / options_dict["pool_shapes"][i_cnn_layer][1]) # lengths_after_cnn = tf.cast(tf.minimum(float(n_padded_after_cnn), lengths_after_cnn), dtype=TF_ITYPE) mask = sequence_mask(lengths_after_cnn, n_padded_after_cnn) # Pooling with tf.variable_scope("pooling_final"): axis = 1 if options_dict["pooling"] == "mean": assert cnn.get_shape().as_list()[axis] == 1 cnn = tf.squeeze(cnn, [axis]) frame_scores = cnn cnn = cnn * tf.cast(mask, dtype=TF_DTYPE)[:, :, None] cnn = tf.reduce_sum(cnn, reduction_indices=axis) / tf.cast( lengths_after_cnn, dtype=TF_DTYPE)[:, None] print "Average pool layer shape:", cnn.get_shape().as_list() elif options_dict["pooling"] == "max": assert cnn.get_shape().as_list()[axis] == 1 cnn = tf.squeeze(cnn, [axis]) frame_scores = cnn cnn = cnn * tf.cast(mask, dtype=TF_DTYPE)[:, :, None] cnn = tf.reduce_max(cnn, reduction_indices=axis) print "Max pool layer shape:", cnn.get_shape().as_list() elif options_dict["pooling"] == "logsumexp": assert "r" in options_dict assert cnn.get_shape().as_list()[axis] == 1 # Logsumexp-trick to calculate logsumexp score cnn = tf.squeeze(cnn, [axis]) # frame_scores = cnn add_mask = tf.select( mask, tf.zeros_like(mask, dtype=tf.float32), -np.inf * tf.ones_like(mask, dtype=tf.float32)) frame_scores_masked = cnn + add_mask[:, :, None] max_vec = tf.reduce_max(options_dict["r"] * frame_scores_masked, reduction_indices=axis, keep_dims=True) sequence_score_logsumexp_trick = 1. / options_dict["r"] * (tf.log( 1. / tf.cast(lengths_after_cnn[:, None, None], dtype=TF_DTYPE) ) + tf.log( tf.reduce_sum( tf.exp(options_dict["r"] * frame_scores_masked - max_vec), reduction_indices=axis, keep_dims=True)) + max_vec) cnn = tf.squeeze(sequence_score_logsumexp_trick, [axis]) print "Logsumexp pool layer shape:", cnn.get_shape().as_list() else: assert False # Fully-connected and output layers, if specified if "n_hiddens" in options_dict: cnn = blocks.build_feedforward(cnn, options_dict["n_hiddens"], keep_prob=keep_prob) if "d_out" in options_dict: with tf.variable_scope("ff_layer_final"): cnn = blocks.build_linear(cnn, options_dict["d_out"]) print "Final linear layer shape:", cnn.get_shape().as_list() return cnn
def _inference(self): with tf.device('/cpu:0'): self.emb_sents = tf.nn.embedding_lookup( self.embeddings, self.sents) # Expand dimension so meet input requirement of 2d-conv self.emb_expand = tf.expand_dims(self.emb_sents, -1) # Convolution network with tf.name_scope('cnn'): # After conv and pooling, max_length = tf.reduce_max(self.sent_lengths) div_value = tf.div(tf.cast(max_length, tf.float32), self.paras.max_pool_size) reduced_size = tf.cast(tf.ceil(div_value), tf.int32) pooled_concat = [] for filter_size in self.paras.filter_sizes: with tf.name_scope('conv-pool-%s' % filter_size): # Padding zero to keep conv output has same dimention as input # shape is : [batch_size, sent_length, emb_size, channel] num_prio = (filter_size - 1) // 2 num_post = (filter_size - 1) - num_prio pad_prio = tf.concat([self.pad] * num_prio, 1) pad_post = tf.concat([self.pad] * num_post, 1) emb_pad = tf.concat([pad_prio, self.emb_expand, pad_post], 1) # Prepare filter for conv filter_ = tf.get_variable( name = 'filter-%s' % filter_size, shape = [filter_size, self.paras.embedding_size, 1, self.paras.num_filters]) # conv: [batch_size, sent_length, 1, num_filters] conv = tf.nn.conv2d( input = emb_pad, filter = filter_, strides = [1, 1, 1, 1], padding = 'VALID', name = 'conv') # Bias b = tf.get_variable( name = 'bias-%s' % filter_size, shape = [self.paras.num_filters]) h = tf.nn.relu(tf.nn.bias_add(conv, b)) # Max pooling over the outputs pooled = tf.nn.max_pool( value = h, ksize = [1, self.paras.max_pool_size, 1, 1], strides = [1, self.paras.max_pool_size, 1, 1], padding ='SAME', name ='pool') pooled = tf.reshape(pooled, [-1, reduced_size, self.paras.num_filters]) pooled_concat.append(pooled) # pooled_concat: (batch_size, reduced_size, filter_sizes * num_filters) self.pooled_concat = tf.concat(pooled_concat, 2) if self.mode == tf.contrib.learn.ModeKeys.TRAIN: self.pooled_concat = tf.nn.dropout(self.pooled_concat, 1.0 - self.paras.cnn_dropout) # RNN network with tf.name_scope('rnn'): cells_fw = model_helper.create_rnn_cell( 'lstm', self.paras.cell_num_units, self.paras.num_layers, self.paras.rnn_dropout, self.mode) cells_bw = model_helper.create_rnn_cell( 'lstm', self.paras.cell_num_units, self.paras.num_layers, self.paras.rnn_dropout, self.mode) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cells_fw, cells_bw, inputs = self.pooled_concat, dtype = tf.float32) # states_fw: (batch_size, reduced_size, cell_size) states_fw, states_bw = outputs concat_states = tf.concat([states_fw, states_bw], axis = 2) # sent_states: (batch_size, 2 * cell_size) self.sent_states = tf.reduce_max(concat_states, axis = 1) with tf.name_scope('classify'): hidden1 = tf.contrib.layers.fully_connected( inputs = self.sent_states, num_outputs = 512) hidden2 = tf.contrib.layers.fully_connected( inputs = hidden1, num_outputs = 5) self.predicts = tf.reduce_max(tf.contrib.layers.fully_connected( inputs = hidden2, activation_fn = None, num_outputs = 1), axis = 1) self.mse = tf.reduce_mean(tf.cast( tf.squared_difference( self.labels, tf.cast(tf.round(self.predicts), tf.int32)), tf.float32)) with tf.name_scope('accuracy'): correct_prediction = tf.equal(self.labels, tf.cast(tf.round(self.predicts), tf.int32)) self.accuracy = tf.reduce_mean(tf.cast( correct_prediction, tf.float32))
def test_Ceil(self): t = tf.ceil(self.random(4, 3) - 0.5) self.check(t)
def bernoulli_sample(x): """ return tensor with element yi turned "on" with probability xi """ return tf.ceil(x - tf.random_uniform(tf.shape(x), minval=0, maxval=1))
def __init__(self, session, num_actions, train_net): self.sess = session # Input self.x = tf.placeholder(name="state", dtype=tf.uint8, shape=(None, params.STATE_DIMENSIONS[0], params.STATE_DIMENSIONS[1], params.HISTORY_LEN)) self.normalized_x = tf.cast(self.x, dtype=tf.float32) / 255.0 with tf.variable_scope("common"): # Convolutional Layers self.conv_outputs = [] for CONV_LAYER_SPEC in params.CONVOLUTIONAL_LAYERS_SPEC: self.conv_outputs.append( tf.layers.conv2d( name="conv_layer_" + str(len(self.conv_outputs) + 1), inputs=self.normalized_x if len(self.conv_outputs) == 0 else self.conv_outputs[-1], filters=CONV_LAYER_SPEC["filters"], kernel_size=CONV_LAYER_SPEC["kernel_size"], strides=CONV_LAYER_SPEC["strides"], activation=tf.nn.relu)) # Flatten self.flattened_conv_output = tf.layers.flatten( name="conv_output_flattener", inputs=self.conv_outputs[-1]) # Hidden Layer self.dense_outputs = [] for DENSE_LAYER_SPEC in params.DENSE_LAYERS_SPEC: self.dense_outputs.append( tf.layers.dense(name="dense_layer_" + str(len(self.dense_outputs) + 1), inputs=self.flattened_conv_output if len(self.dense_outputs) == 0 else self.dense_outputs[-1], units=DENSE_LAYER_SPEC, activation=tf.nn.relu)) # State-Action-Value Distributions (as a flattened vector) self.flattened_q_dist = tf.layers.dense( name="flattened_action_value_dist_logits", inputs=self.dense_outputs[-1], units=num_actions * params.NB_ATOMS) # Unflatten self.q_dist_logits = tf.reshape( self.flattened_q_dist, [-1, num_actions, params.NB_ATOMS], name="reshape_q_dist_logits") # Softmax State-Action-Value Distributions (per action) self.q_dist = tf.nn.softmax(self.q_dist_logits, name="action_value_dist", axis=-1) # Multiply bin probabilities by value self.delta_z = (params.V_MAX - params.V_MIN) / (params.NB_ATOMS - 1) self.Z = tf.range(start=params.V_MIN, limit=params.V_MAX + self.delta_z, delta=self.delta_z) self.post_mul = self.q_dist * tf.reshape( self.Z, [1, 1, params.NB_ATOMS]) # Take sum to get the expected state-action values for each action self.actions = tf.reduce_sum(self.post_mul, axis=2) self.batch_size_range = tf.range(start=0, limit=tf.shape(self.x)[0]) if not train_net: self.targ_q_net_max = tf.summary.scalar( "targ_q_net_max", tf.reduce_max(self.actions)) self.targ_q_net_mean = tf.summary.scalar( "targ_q_net_mean", tf.reduce_mean(self.actions)) self.targ_q_net_min = tf.summary.scalar( "targ_q_net_min", tf.reduce_min(self.actions)) # Find argmax action given expected state-action values at next state self.argmax_action = tf.argmax(self.actions, axis=-1, output_type=tf.int32) # Get it's corresponding distribution (this is the target distribution) self.argmax_action_distribution = tf.gather_nd( self.q_dist, tf.stack((self.batch_size_range, self.argmax_action), axis=1)) # Axis = 1 => [N, 2] self.mean_argmax_next_state_value = tf.summary.scalar( "mean_argmax_q_target", tf.reduce_mean(self.Z * self.argmax_action_distribution)) # Placeholder for reward self.r = tf.placeholder(name="reward", dtype=tf.float32, shape=(None, )) self.t = tf.placeholder(name="terminal", dtype=tf.uint8, shape=(None, )) # Compute Tz (Bellman Operator) on atom of expected state-action-value # r + gamma * z clipped to [V_min, V_max] self.Tz = tf.clip_by_value( tf.reshape(self.r, [-1, 1]) + 0.99 * tf.cast(tf.reshape(self.t, [-1, 1]), tf.float32) * self.Z, clip_value_min=params.V_MIN, clip_value_max=params.V_MAX) # Compute bin number (will be floating point). self.b = (self.Tz - params.V_MIN) / self.delta_z # Lower and Upper Bins. self.l = tf.floor(self.b) self.u = tf.ceil(self.b) # Add weight to the lower bin based on distance from upper bin to # approximate bin index b. (0--b--1. If b = 0.3. Then, assign bin # 0, p(b) * 0.7 weight and bin 1, p(Z = z_b) * 0.3 weight.) self.indexable_l = tf.stack( ( tf.reshape(self.batch_size_range, [-1, 1]) * tf.ones( (1, params.NB_ATOMS), dtype=tf.int32), # BATCH_SIZE_RANGE x NB_ATOMS [[0, ...], [1, ...], ...] tf.cast(self.l, dtype=tf.int32)), axis=-1) self.m_l_vals = self.argmax_action_distribution * (self.u - self.b) self.m_l = tf.scatter_nd(tf.reshape(self.indexable_l, [-1, 2]), tf.reshape(self.m_l_vals, [-1]), tf.shape(self.l)) # Add weight to the lower bin based on distance from upper bin to # approximate bin index b. self.indexable_u = tf.stack( ( tf.reshape(self.batch_size_range, [-1, 1]) * tf.ones( (1, params.NB_ATOMS), dtype=tf.int32), # BATCH_SIZE_RANGE x NB_ATOMS [[0, ...], [1, ...], ...] tf.cast(self.u, dtype=tf.int32)), axis=-1) self.m_u_vals = self.argmax_action_distribution * (self.b - self.l) self.m_u = tf.scatter_nd(tf.reshape(self.indexable_u, [-1, 2]), tf.reshape(self.m_u_vals, [-1]), tf.shape(self.u)) # Add Contributions of both upper and lower parts and # stop gradient to not update the target network. self.m = tf.stop_gradient(tf.squeeze(self.m_l + self.m_u)) self.weighted_m = tf.clip_by_value(self.m * self.Z, clip_value_min=params.V_MIN, clip_value_max=params.V_MAX) self.weighted_m_mean = tf.summary.scalar( "mean_q_target", tf.reduce_mean(self.weighted_m)) self.targ_dist = tf.summary.histogram("target_distribution", self.weighted_m) self.targn_summary = tf.summary.merge([ self.targ_dist, self.weighted_m_mean, self.targ_q_net_max, self.targ_q_net_mean, self.targ_q_net_min, self.mean_argmax_next_state_value ]) else: self.trn_q_net_max = tf.summary.scalar( "trn_q_net_max", tf.reduce_max(self.actions)) self.trn_q_net_mean = tf.summary.scalar( "trn_q_net_mean", tf.reduce_mean(self.actions)) self.trn_q_net_min = tf.summary.scalar( "trn_q_net_min", tf.reduce_min(self.actions)) # Given you took this action. self.action_placeholder = tf.placeholder(name="action", dtype=tf.int32, shape=[ None, ]) # Compute Q-Dist. for the action. self.action_q_dist = tf.gather_nd( self.q_dist, tf.stack((self.batch_size_range, self.action_placeholder), axis=1)) self.weighted_q_dist = tf.clip_by_value( self.action_q_dist * self.Z, clip_value_min=params.V_MIN, clip_value_max=params.V_MAX) tnd_summary = tf.summary.histogram("training_net_distribution", self.weighted_q_dist) tnd_mean_summary = tf.summary.scalar( "training_net_distribution_mean", tf.reduce_mean(self.weighted_q_dist)) # Get target distribution. self.m_placeholder = tf.placeholder(dtype=tf.float32, shape=(None, params.NB_ATOMS), name="m_placeholder") self.loss_sum = -tf.reduce_sum( self.m_placeholder * tf.log(self.action_q_dist + 1e-5), axis=-1) self.loss = tf.reduce_mean(self.loss_sum) l_summary = tf.summary.scalar("loss", self.loss) self.optimizer = tf.train.AdamOptimizer( learning_rate=params.LEARNING_RATE, epsilon=params.EPSILON_ADAM) gradients, variables = zip( *self.optimizer.compute_gradients(self.loss)) grad_norm_summary = tf.summary.histogram( "grad_norm", tf.global_norm(gradients)) gradients, _ = tf.clip_by_global_norm(gradients, params.GRAD_NORM_CLIP) self.train_step = self.optimizer.apply_gradients( zip(gradients, variables)) self.trnn_summary = tf.summary.merge([ tnd_mean_summary, tnd_summary, l_summary, grad_norm_summary, self.trn_q_net_max, self.trn_q_net_mean, self.trn_q_net_min ])
def D_logistic_r1(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0): rotation_offset = 108 _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) fake_scores_out_without_rotation = tf.concat([ fake_scores_out[:, :rotation_offset], fake_scores_out[:, rotation_offset + 2:] ], axis=-1) real_scores_out_without_rotation = tf.concat([ real_scores_out[:, :rotation_offset], real_scores_out[:, rotation_offset + 2:] ], axis=-1) labels_rotation = labels[:, rotation_offset:rotation_offset + 2] real_rotations = real_scores_out[:, rotation_offset:rotation_offset + 2] fake_scores_out_sum = tf.reduce_sum(fake_scores_out_without_rotation, axis=1, keepdims=True) real_scores_out_sum = tf.reduce_sum(real_scores_out_without_rotation, axis=1, keepdims=True) rotation_distance = tf.norm(labels_rotation - real_rotations, axis=-1, keepdims=True) rotation_distance = rotation_distance * tf.reduce_max( tf.ceil(tf.abs(labels_rotation)), axis=-1) # remove non set rotation labels real_scores_out_sum = autosummary('Loss/scores/real', real_scores_out_sum) fake_scores_out_sum = autosummary('Loss/scores/fake', fake_scores_out_sum) rotation_distance = autosummary('Loss/rotation_distance/real', rotation_distance) loss = tf.nn.softplus( fake_scores_out_sum) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus( -real_scores_out_sum) # -log(sigmoid(real_scores_out)) loss = autosummary('Loss/discriminator_sum', loss) loss += tf.square(rotation_distance) * 10 with tf.name_scope('GradientPenalty'): real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0] gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1, 2, 3]) gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) reg = gradient_penalty * (gamma * 0.5) return loss, reg
def build(self, x): """Run the backprop version of the Circuit.""" self.prepare_tensors() # Calculate l2 hidden state size x_shape = tf.cast(tf.shape(x), tf.float32) if self.include_pooling and len(self.intermediate_ff): # pooling_factor = (len( # self.intermediate_ff)) * np.sum(self.pool_strides) array_pooling_factor = float(self.pool_strides[0]**len( self.intermediate_ff)) pooling_factor = tf.constant(array_pooling_factor, dtype=tf.float32) l2_shape = tf.stack([ x_shape[0], tf.ceil(x_shape[1] / pooling_factor), tf.ceil(x_shape[2] / pooling_factor), self.hgru_ids[1].values()[0] ]) else: l2_shape = tf.identity(x_shape) self.pooling_factor = 1 array_pooling_factor = 1 x_shape = tf.cast(x_shape, tf.int32) l2_shape = tf.cast(l2_shape, tf.int32) np_xsh = np.array(x.get_shape().as_list()).astype(float) np_xsh[1:3] /= array_pooling_factor if len(self.hgru_ids) > 1: np_xsh[-1] = self.hgru_ids[1].values()[0] print '*' * 20 print 'fgru embedding shape is: ' print np_xsh print '*' * 20 else: print '*' * 20 print 'Horizontal only: ' # Initialize hidden layer activities if self.hidden_init == 'identity': l1_h2 = tf.identity(x, dtype=self.dtype) l2_h2 = tf.zeros(l2_shape, dtype=self.dtype) fb_act_1 = tf.identity(x) elif self.hidden_init == 'random': l1_h2 = tf.random_normal(x_shape, dtype=self.dtype) l2_h2 = tf.random_normal(l2_shape, dtype=self.dtype) fb_act_1 = tf.random_normal(x_shape, dtype=self.dtype) elif self.hidden_init == 'zeros': l1_h2 = tf.zeros(x_shape, dtype=self.dtype) l2_h2 = tf.zeros(l2_shape, dtype=self.dtype) fb_act_1 = tf.zeros(x_shape, dtype=self.dtype) else: raise RuntimeError # While loop if self.while_loop: i0 = tf.constant(0) elems = [i0, x, l1_h2, l2_h2, fb_act_1] returned = tf.while_loop(self.condition, self.full, loop_vars=elems, back_prop=True, swap_memory=False) # Prepare output i0, x, l1_h2, l2_h2, fb_act_1 = returned else: i0 = 0 for idx in range(self.timesteps): i0, x, l1_h2, l2_h2, fb_act_1 = self.full(i0=i0, x=x, l1_h2=l1_h2, l2_h2=l2_h2, fb_act_1=fb_act_1) if self.readout == 'fb': return fb_act_1 else: raise NotImplementedError('Select an hGRU layer to readout from.')
def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0): _ = opt rotation_offset = 108 latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) all_rotations = tf.constant( [[1.0, 0.0], [0.7071, 0.7071], [0.0, 1.0], [-0.7071, 0.7071], [-1.0, 0.0], [-0.7071, -0.7071], [0.0, -1.0], [0.7071, -0.7071]], dtype=tf.float32) indices = tf.cast(tf.floor( tf.random_uniform(shape=[minibatch_size], minval=0, maxval=8)), dtype=tf.int32) random_rotation = tf.gather(all_rotations, indices) labels = tf.concat([ labels[:, :rotation_offset], random_rotation, labels[:, rotation_offset + 2:] ], axis=1) fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) fake_scores_out_without_rotation = tf.concat([ fake_scores_out[:, :rotation_offset], fake_scores_out[:, rotation_offset + 2:] ], axis=-1) labels_rotation = labels[:, rotation_offset:rotation_offset + 2] disc_pred_rotations = fake_scores_out[:, rotation_offset:rotation_offset + 2] loss = tf.nn.softplus( -tf.reduce_sum(fake_scores_out_without_rotation, axis=1, keepdims=True)) # -log(1-sigmoid(fake_scores_out)) loss = autosummary('Loss/generator', loss) rotation_distance = tf.norm(labels_rotation - disc_pred_rotations, axis=-1, keepdims=True) rotation_distance = rotation_distance * tf.reduce_max( tf.ceil(tf.abs(labels_rotation)), axis=-1) # remove non set rotation labels rotation_distance = autosummary('Loss/rotation_distance/generator', rotation_distance) loss += tf.square(rotation_distance) * 10 # Path length regularization. with tf.name_scope('PathReg'): # Evaluate the regularization term using a smaller minibatch to conserve memory. if pl_minibatch_shrink > 1: pl_minibatch = minibatch_size // pl_minibatch_shrink pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:]) pl_labels = training_set.get_random_labels_tf(pl_minibatch) fake_images_out, fake_dlatents_out = G.get_output_for( pl_latents, pl_labels, is_training=True, return_dlatents=True) # Compute |J*y|. pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt( np.prod(G.output_shape[2:])) pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0] pl_lengths = tf.sqrt( tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) pl_lengths = autosummary('Loss/pl_lengths', pl_lengths) # Track exponential moving average of |J*y|. with tf.control_dependencies(None): pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) pl_update = tf.assign(pl_mean_var, pl_mean) # Calculate (|J*y|-a)^2. with tf.control_dependencies([pl_update]): pl_penalty = tf.square(pl_lengths - pl_mean) pl_penalty = autosummary('Loss/pl_penalty', pl_penalty) # Apply weight. # # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: # # gamma_pl = pl_weight / num_pixels / num_affine_layers # = 2 / (r^2) / (log2(r) * 2 - 2) # = 1 / (r^2 * (log2(r) - 1)) # = ln(2) / (r^2 * (ln(r) - ln(2)) # reg = pl_penalty * pl_weight return loss, reg
def model_fn(features, labels, mode, params): """ This is a function for creating a computational tensorflow graph. The function is in format required by tf.estimator. """ is_training = mode == tf.estimator.ModeKeys.TRAIN def backbone(images, is_training): return mobilenet_v1(images, is_training, depth_multiplier=params['depth_multiplier']) subnet = KeypointSubnet(features['images'], is_training, backbone, params) if not is_training: predictions = subnet.get_predictions() if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = tf.estimator.export.PredictOutput({ name: tf.identity(tensor, name) for name, tensor in predictions.items() }) return tf.estimator.EstimatorSpec( mode, predictions=predictions, export_outputs={'outputs': export_outputs}) # add l2 regularization with tf.name_scope('weight_decay'): add_weight_decay(params['weight_decay']) regularization_loss = tf.losses.get_regularization_loss() tf.summary.scalar('regularization_loss', regularization_loss) with tf.name_scope('losses'): batch_size = tf.shape(labels['heatmaps'])[0] normalizer = tf.to_float(batch_size) heatmaps = labels['heatmaps'] segmentation_masks = tf.expand_dims(labels['segmentation_masks'], 3) loss_masks = tf.expand_dims(labels['loss_masks'], 3) heatmaps = tf.concat([heatmaps, segmentation_masks], axis=3) losses = { 'regression_loss': (1.0 / normalizer) * tf.nn.l2_loss(loss_masks * (subnet.heatmaps - heatmaps)) } for level in range(2, 6): p = subnet.enriched_features['p' + str(level)] f = tf.expand_dims(p[:, :, :, 0], 3) losses['segmentation_loss_at_level_' + str(level)] = ( 2.0 / normalizer) * tf.nn.l2_loss(f - segmentation_masks) shape = tf.shape(segmentation_masks) height, width = shape[1], shape[2] new_size = [ tf.to_int32(tf.ceil(height / 2)), tf.to_int32(tf.ceil(width / 2)) ] segmentation_masks = tf.image.resize_images(segmentation_masks, new_size, align_corners=True) for n, v in losses.items(): tf.losses.add_loss(v) tf.summary.scalar(n, v) total_loss = tf.losses.get_total_loss(add_regularization_losses=True) with tf.name_scope('eval_metrics'): h = tf.shape(heatmaps)[1] w = tf.shape(heatmaps)[2] area = tf.to_float(h * w) per_pixel_reg_loss = tf.nn.l2_loss( loss_masks * (subnet.heatmaps - heatmaps)) / (normalizer * area) tf.summary.scalar('per_pixel_reg_loss', per_pixel_reg_loss) if mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'eval_regression_loss': tf.metrics.mean(losses['regression_loss']), 'eval_per_pixel_reg_loss': tf.metrics.mean(per_pixel_reg_loss), 'eval_segmentation_loss_at_level_2': tf.metrics.mean(losses['segmentation_loss_at_level_2']) } return tf.estimator.EstimatorSpec(mode, loss=total_loss, eval_metric_ops=eval_metric_ops) assert mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('learning_rate'): global_step = tf.train.get_global_step() learning_rate = tf.train.piecewise_constant(global_step, params['lr_boundaries'], params['lr_values']) tf.summary.scalar('learning_rate', learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(total_loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step) for g, v in grads_and_vars: tf.summary.histogram(v.name[:-2] + '_hist', v) tf.summary.histogram(v.name[:-2] + '_grad_hist', g) with tf.control_dependencies([train_op]), tf.name_scope('ema'): ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) train_op = ema.apply(tf.trainable_variables()) return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
def build(self, depth_input, boxes, box_indices, ref_depth_min, ref_depth_max, n_split, mask_size, img_size, mask_quantile_level): """ masking the feature, if the median of depth_map in the bounding box is less than the ref_depth_min, and larger than ref_depth_max img_input: H * W * C depth_input: H * W boxes_norm: [num_boxes, 4] n_split: int that divides H and W ref_height: [num_boxes, depth] """ with tf.variable_scope("occ_mask"): self.n_split = n_split #self.n_batch = tf.cast(boxes.shape[0],tf.int32) self.n_batch = tf.shape(boxes)[0] sub_box = self.slice_box_gen(boxes) #duplicate the reference depth ref_depth_min = tf.expand_dims(ref_depth_min, -1) ref_depth_min_dup = tf.tile(ref_depth_min, [1, self.n_split**2]) ref_depth_min_dup = tf.reshape(ref_depth_min_dup, [self.n_batch * self.n_split**2, 1], name='duplicated_depth_min') ref_depth_max = tf.expand_dims(ref_depth_max, -1) ref_depth_max_dup = tf.tile(ref_depth_max, [1, self.n_split**2]) ref_depth_max_dup = tf.reshape(ref_depth_max_dup, [self.n_batch * self.n_split**2, 1], name='duplicated_depth_max') #duplicate the reference depth box_indices = tf.expand_dims(box_indices, -1) box_indices_dup = tf.tile(box_indices, [1, self.n_split**2]) box_indices_dup = tf.reshape(box_indices_dup, [self.n_batch * self.n_split**2], name='duplicated_box_indices') # must use nearest neighbour method depth_size = mask_size[0] * mask_size[1] crop_depth = tf.image.crop_and_resize(depth_input, sub_box, box_indices_dup, mask_size, method='nearest') crop_depth = tf.reshape( crop_depth, [self.n_batch * self.n_split**2, depth_size]) #map_params = (crop_depth,ref_depth_dup) method = 'median' #if method == 'median': #medidan # avoid empty case fill_in = tf.tile([[0.1, 100.0]], [self.n_batch * self.n_split**2, 1]) crop_depth = tf.concat([crop_depth, fill_in], axis=1) num_nonzero = tf.count_nonzero(crop_depth, axis=1) # roll out value = 0 and calculate the median of the rest quantile_idx = tf.ceil(depth_size - tf.cast(num_nonzero, dtype=tf.float32) * mask_quantile_level) quantile_idx = tf.cast(quantile_idx, dtype=tf.int32) quantile_idx = tf.expand_dims(quantile_idx, -1) batch_range = tf.expand_dims( tf.range(0, self.n_batch * self.n_split**2), -1) cat_idx = tf.concat([batch_range, quantile_idx], axis=1) sorted_crop_depth = tf.contrib.framework.sort(crop_depth, axis=1) depth_val = tf.gather_nd(sorted_crop_depth, cat_idx) """ f(x) = 1 if x_min <= x <= x_max 0 otherwise f(x) = g1(x,x_min) + g(x_max,x) - 1 where g(x, y) = 1 if x >= y 0 otherwise """ occ = self.step_f(depth_val, tf.squeeze( ref_depth_min_dup, 1)) + self.step_f( tf.squeeze(ref_depth_max_dup, 1), depth_val) - 1 dep_zero = tf.cast(tf.less(depth_val, 0.2), dtype=tf.float32) occ += dep_zero occ = tf.expand_dims(occ, 0) occ = tf.reshape(occ, [self.n_batch, self.n_split * self.n_split], name='occ_mask_base') num_masks = tf.count_nonzero(occ, axis=1) mask_weights = (tf.cast(num_masks, tf.float32) + 0.01) / (self.n_split * self.n_split) occ = occ / tf.expand_dims(mask_weights, -1) occ = tf.reshape(occ, [self.n_batch, self.n_split, self.n_split], name='occ_mask_base') occ = tf.expand_dims(occ, -1) occ_mask = tf.image.resize_nearest_neighbor(occ, img_size, name='occ_mask') #occ_mask = tf.squeeze(occ_mask,axis=-1,name='occ_mask') return occ_mask
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num+1, :] label = tf.reshape(label, [-1]) #calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast(tf.stack([min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") #calculate objects tensor [CELL_SIZE, CELL_SIZE] #calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast(tf.stack([center_y, self.cell_size - center_y - 1, center_x, self.cell_size -center_x - 1]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") #objects = response #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape(predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): #nilboy base_boxes[y, x, :] = [self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0] base_boxes = np.tile(np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) #calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape(response, (self.cell_size, self.cell_size, 1)) #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] #calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) #sqrt_h = tf.abs(label[3]) #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) #p_sqrt_w = predict_boxes[:, :, :, 2] #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] #class_loss class_loss = tf.nn.l2_loss(tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale #noobject_loss #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale #coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x)/(self.image_size/self.cell_size)) + tf.nn.l2_loss(I * (p_y - y)/(self.image_size/self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w))/ self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))/self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss], predict, labels, nilboy
number_of_classes = 2 log_folder = os.path.expanduser('segment_log_folder') vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt') # Convert image to float32 before subtracting the # mean pixel value image_float = tf.to_float(image_tensor, name='ToFloat') original_shape = tf.shape(image_float)[0:2] # Subtract the mean pixel value from each pixel mean_centered_image = _mean_image_subtraction(image_float, [_R_MEAN, _G_MEAN, _B_MEAN]) target_input_size_factor = tf.ceil( tf.div(tf.to_float(original_shape), tf.to_float(upsample_factor))) target_input_size = tf.to_int32( tf.multiply(target_input_size_factor, upsample_factor)) padding_size = (target_input_size - original_shape) // 2 mean_centered_image = tf.image.pad_to_bounding_box(mean_centered_image, padding_size[0], padding_size[1], target_input_size[0], target_input_size[1]) processed_images = tf.expand_dims(mean_centered_image, 0) upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes)
def prediction_layers( self, features, end_points, input_shape, reuse=None, is_training=False, scope="pose", ): cfg = self.cfg if "resnet" in cfg.net_type: num_layers = re.findall("resnet_([0-9]*)", cfg.net_type)[0] layer_name = ("resnet_v1_{}".format(num_layers) + "/block{}/unit_{}/bottleneck_v1") mid_pt = layer_name.format(2, 3) elif "mobilenet" in cfg.net_type: mid_pt = "layer_7" elif "efficientnet" in cfg.net_type: mid_pt = "block_" + parallel_layers[cfg.net_type.split('-')[1]] final_dims = tf.ceil( tf.divide(input_shape[1:3], tf.convert_to_tensor(16))) interim_dims = tf.scalar_mul(2, final_dims) interim_dims = tf.cast(interim_dims, tf.int32) bank_3 = end_points[mid_pt] bank_3 = tf.image.resize_images(bank_3, interim_dims) with slim.arg_scope( [slim.conv2d], padding="SAME", normalizer_fn=None, weights_regularizer=slim.l2_regularizer(cfg.weight_decay), ): with tf.variable_scope("decoder_filters"): bank_3 = slim.conv2d(bank_3, cfg.bank3, 1, scope="decoder_parallel_1") with slim.arg_scope( [slim.conv2d_transpose], padding="SAME", normalizer_fn=None, weights_regularizer=slim.l2_regularizer(cfg.weight_decay), ): with tf.variable_scope("upsampled_features"): upsampled_features = slim.conv2d_transpose(features, cfg.bank5, kernel_size=[3, 3], stride=2, scope="block4") net = tf.concat([bank_3, upsampled_features], 3) out = {} with tf.variable_scope(scope, reuse=reuse): out["part_pred"] = prediction_layer( cfg, net, "part_pred", cfg.num_joints + cfg.get("num_idchannel", 0)) if cfg.location_refinement: out["locref"] = prediction_layer(cfg, net, "locref_pred", cfg.num_joints * 2) if cfg.pairwise_predict and "multi-animal" not in cfg.dataset_type: out["pairwise_pred"] = prediction_layer( cfg, net, "pairwise_pred", cfg.num_joints * (cfg.num_joints - 1) * 2) if cfg.partaffinityfield_predict and "multi-animal" in cfg.dataset_type: out["pairwise_pred"] = prediction_layer( cfg, net, "pairwise_pred", cfg.num_limbs * 2) if cfg.intermediate_supervision and "efficientnet" not in cfg.net_type: if "mobilenet" in cfg.net_type: out["part_pred_interm"] = prediction_layer( cfg, end_points["layer_" + str(cfg["intermediate_supervision_layer"])], "intermediate_supervision", cfg.num_joints, ) elif "resnet" in cfg.net_type: interm_name = layer_name.format( 3, cfg.intermediate_supervision_layer) block_interm_out = end_points[interm_name] out["part_pred_interm"] = prediction_layer( cfg, block_interm_out, "intermediate_supervision", cfg.num_joints + cfg.get("num_idchannel", 0), ) return out
def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0, int_reg_clip=5.0, rotation_step_size=0.01): _ = opt rotation_offset = 108 latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) # Mirror some labels to balance the rotatinos random_vector = tf.random_uniform([minibatch_size]) < 0.5 rotation_cos = tf.expand_dims(labels[:, rotation_offset], axis=-1) rotation_sin = tf.expand_dims(labels[:, rotation_offset + 1], axis=-1) angle = tf.atan2(rotation_sin, rotation_cos) new_rotation_cos = tf.cos(angle) new_rotation_sin = tf.sin(angle) * -1 mirrored_labels = tf.concat([ labels[:, :rotation_offset], new_rotation_cos, new_rotation_sin, labels[:, rotation_offset + 2:] ], axis=1) labels = tf.where(random_vector, labels, mirrored_labels) # Remove half of front left and front right to balance the rotation label zero_rotation = tf.expand_dims(tf.zeros([minibatch_size]), axis=-1) removed_labels = tf.concat([ labels[:, :rotation_offset], zero_rotation, zero_rotation, labels[:, rotation_offset + 2:] ], axis=1) condition = tf.equal(labels[:, 108], 0.7071) random_vector = tf.random_uniform([minibatch_size]) < 0.5 remove_condition = tf.logical_and(condition, random_vector) labels = tf.where(remove_condition, removed_labels, labels) fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) fake_scores_out_without_rotation = tf.concat([ fake_scores_out[:, :rotation_offset], fake_scores_out[:, rotation_offset + 2:] ], axis=-1) labels_rotation = labels[:, rotation_offset:rotation_offset + 2] disc_pred_rotations = fake_scores_out[:, rotation_offset:rotation_offset + 2] loss = tf.nn.softplus( -tf.reduce_sum(fake_scores_out_without_rotation, axis=1, keepdims=True)) # -log(1-sigmoid(fake_scores_out)) loss = autosummary('Loss/generator', loss) rotation_distance = tf.norm(labels_rotation - disc_pred_rotations, axis=-1, keepdims=True) rotation_distance = rotation_distance * tf.reduce_max( tf.ceil(tf.abs(labels_rotation)), axis=-1) # remove non set rotation labels rotation_distance = autosummary('Loss/rotation_distance/generator', rotation_distance) loss += tf.square(rotation_distance) * 10 # Path length regularization. with tf.name_scope('PathReg'): # Evaluate the regularization term using a smaller minibatch to conserve memory. if pl_minibatch_shrink > 1: pl_minibatch = minibatch_size // pl_minibatch_shrink pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:]) pl_labels = training_set.get_random_labels_tf(pl_minibatch) fake_images_out, fake_dlatents_out = G.get_output_for( pl_latents, pl_labels, is_training=True, return_dlatents=True) # Compute |J*y|. pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt( np.prod(G.output_shape[2:])) pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0] pl_lengths = tf.sqrt( tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) pl_lengths = autosummary('Loss/pl_lengths', pl_lengths) # Track exponential moving average of |J*y|. with tf.control_dependencies(None): pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) pl_update = tf.assign(pl_mean_var, pl_mean) # Calculate (|J*y|-a)^2. with tf.control_dependencies([pl_update]): pl_penalty = tf.square(pl_lengths - pl_mean) pl_penalty = autosummary('Loss/pl_penalty', pl_penalty) # Apply weight. # # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: # # gamma_pl = pl_weight / num_pixels / num_affine_layers # = 2 / (r^2) / (log2(r) * 2 - 2) # = 1 / (r^2 * (log2(r) - 1)) # = ln(2) / (r^2 * (ln(r) - ln(2)) # reg = pl_penalty * pl_weight # Interpolation Reg label_int_pl = labels[:1] random_angle = tf.random_uniform([1]) * 2 * np.pi interpolation_rotation_cos = tf.expand_dims(tf.cos(random_angle), axis=-1) interpolation_rotation_sin = tf.expand_dims(tf.sin(random_angle), axis=-1) label_int_pl_1 = tf.concat([ label_int_pl[:, :rotation_offset], interpolation_rotation_cos, interpolation_rotation_sin, label_int_pl[:, rotation_offset + 2:] ], axis=1) random_sign = tf.where( tf.random_uniform([1], -1, 1) > 0, tf.ones([1]), -1 * tf.ones([1])) random_angle = random_angle + rotation_step_size * 2 * 2 * np.pi * random_sign interpolation_rotation_cos = tf.expand_dims(tf.cos(random_angle), axis=-1) interpolation_rotation_sin = tf.expand_dims(tf.sin(random_angle), axis=-1) label_int_pl_2 = tf.concat([ label_int_pl[:, :rotation_offset], interpolation_rotation_cos, interpolation_rotation_sin, label_int_pl[:, rotation_offset + 2:] ], axis=1) label_interpolate = tfutil.slerp(label_int_pl_1, label_int_pl_2, 0.5) pl_grads = tf.gradients( G.get_output_for(latents[:1], label_interpolate, randomize_noise=False), [label_int_pl_1])[0] int_pl_lengths = tf.norm(pl_grads, axis=-1, keepdims=True) with tf.control_dependencies(None): int_pl_mean_var = tf.Variable(name='int_pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) int_pl_mean = int_pl_mean_var + pl_decay * ( tf.reduce_mean(int_pl_lengths) - int_pl_mean_var) int_pl_update = tf.assign(int_pl_mean_var, int_pl_mean) with tf.control_dependencies([int_pl_update]): int_pl_penalty = tf.square(int_pl_lengths - int_pl_mean) # clip penalty int_pl_penalty = tf.clip_by_value(int_pl_penalty, 0.0, int_reg_clip) int_pl_penalty = autosummary('Loss/int_pl_penalty', int_pl_penalty) reg += int_pl_penalty return loss, reg