def simple_nms(self, scores, iterations, radius): """Performs non maximum suppression (NMS) on the heatmap using max-pooling. This method does not suppress contiguous points that have the same score. It is an approximate of the standard NMS and uses iterative propagation. Arguments: scores: the score heatmap, with shape `[B, H, W]`. size: an interger scalar, the radius of the NMS window. """ if iterations < 1: return scores radius = tf.constant(radius, name='radius') size = radius * 2 + 1 max_pool = lambda x: gen_nn_ops.max_pool_v2( # supports dynamic ksize x[..., None], ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME')[..., 0] zeros = tf.zeros_like(scores) max_mask = tf.equal(scores, max_pool(scores)) for _ in range(iterations - 1): supp_mask = tf.cast(max_pool(tf.to_float(max_mask)), tf.bool) supp_scores = tf.where(supp_mask, zeros, scores) new_max_mask = tf.equal(supp_scores, max_pool(supp_scores)) max_mask = max_mask | (new_max_mask & tf.logical_not(supp_mask)) return tf.where(max_mask, scores, zeros)
def max_pooling_layer(input_tensor, ksize, strides=None, padding="VALID", name='max_pool'): """ 最大池化 :param input_tensor: :param ksize: :param strides: :param padding: :param name: :return: """ if strides is None: strides = [1, 1, 1, 1] # 支持动态大小的池化 output = gen_nn_ops.max_pool_v2(input_tensor, ksize=ksize, strides=strides, padding=padding, name=name) # output = tf.nn.max_pool( # input_tensor, # ksize=ksize, # strides=strides, # padding=padding, # name=name # ) return output
def spp_layer2(self, input_tensor, levels=[2, 1], name='SPP_layer'): '''Multiple Level SPP layer. Works for levels=[1, 2, 3, 6].''' self.sp_tensor = input_tensor with tf.variable_scope(name): pool_outputs = [] for l in levels: pool = gen_nn_ops.max_pool_v2( self.sp_tensor, ksize=[ 1, tf.math.ceil( tf.math.divide(tf.shape(self.sp_tensor)[1], l)), tf.math.ceil( tf.math.divide(tf.shape(self.sp_tensor)[2], l)), 1 ], strides=[ 1, tf.math.floor( tf.math.divide(tf.shape(self.sp_tensor)[1], l)), tf.math.floor( tf.math.divide(tf.shape(self.sp_tensor)[2], l)), 1 ], padding='VALID') pool_outputs.append( tf.reshape(pool, [tf.shape(input_tensor)[0], -1])) spp_pool = tf.concat(pool_outputs, 1) spp_pool = tf.reshape(spp_pool, (-1, 4 * 256 + 256)) return spp_pool
def roi_pooling_single(self,box): """ inputs: 'box': a single tensor of shape [4,] of format [x1,y1,x2,y2], this is a box on the feature map for which we will perform roi pooling on. returns: 'roi_pool': a tensor of shape [pool_h,pool_w,channels] which is a max pooled slice of the feature map corresponding to the input region defined by 'box'. performs roi pooling on a single RoI box of the form [x1,y1,x2,y2]. we zero pad the cropped feature map to ensure that the resulting crop's height and width are evenly divisible by the pool height and width """ box_h = tf.cast(tf.math.ceil(box[3] - box[1]),dtype=tf.int32) box_w = tf.cast(tf.math.ceil(box[2] - box[0]),dtype=tf.int32) box_x = tf.cast(box[0],dtype=tf.int32) box_y = tf.cast(box[1],dtype=tf.int32) fm = self.layers['base_model/feature_map'] fm_chan = tf.shape(fm)[-1] crop = tf.image.crop_to_bounding_box(fm,box_y,box_x,box_h,box_w) pool_h = self.cfg.NET.ROI_POOL_HEIGHT pool_w = self.cfg.NET.ROI_POOL_WIDTH left_pad = tf.to_int32(tf.math.ceil(((pool_w*(tf.math.floordiv(box_w,pool_w) + 1))-box_w)/2)) right_pad = tf.to_int32(tf.math.floor(((pool_w*(tf.math.floordiv(box_w,pool_w) + 1))-box_w)/2)) top_pad = tf.to_int32(tf.math.ceil(((pool_h*(tf.math.floordiv(box_h,pool_h) + 1))-box_h)/2)) bottom_pad = tf.to_int32(tf.math.floor(((pool_h*(tf.math.floordiv(box_h,pool_h) + 1))-box_h)/2)) pads = [[0,0],[top_pad,bottom_pad],[left_pad,right_pad],[0,0]] crop = tf.pad(crop,pads) c_sh = tf.shape(crop) k_size = [1,tf.cast(c_sh[1]/7,dtype=tf.int32),tf.cast(c_sh[2]/7,dtype=tf.int32),1] mp = gen_nn_ops.max_pool_v2(crop,k_size,k_size,padding='VALID') return mp
def spatial_pyramid_pooling(bin_size_list, inputs, rois, feat_stride, padding="VALID"): """ Spatial pyramid pooling layer from SPP-Net :param bin_size_list: list of int Specify a pyramid level of multi-size bins, each number in list specifies a nxn bins which is a part of fixed output size requirement after pooling. For example [1,2,4] would be 3 regions with 1x1, 2x2 and 4x4 max pools, so 21 outputs per feature map :param inputs: output from final convolution-layer :param rois: batch indices, regions of interest on images :param feat_stride: convolution stride on last convolutional layer :param padding: default same :return: 2-D tensor of shape [batch, vector_size] which has same type as inputs and whose length fit the input size of next fc """ # Map images' rois to feature maps bboxes = tf.stop_gradient(proposal_util.map_rois_to_feature( tf.shape(inputs), rois[:, 1:], feat_stride)) outputs = [] # Crop all the rois in feature maps crops = tf.image.crop_to_bounding_box(inputs, bboxes) for crop in crops: shape = tf.shape(crop) roi_pools = [] for bin_size in bin_size_list: win_h = shape[1] / bin_size win_w = shape[2] / bin_size pool_size = [1, tf.to_int32(tf.ceil(win_h)), tf.to_int32(tf.ceil(win_w)), 1] pool_stride = [1, tf.to_int32(tf.floor(win_h)), tf.to_int32(tf.floor(win_w)), 1] # Original max_pooling function does not support tensor-like pool_size and pool_stride # One solution is that max_pooling can be replaced by reduce_max after some transformation # on inputs(https://github.com/Sarthak-02/keras-spp/blob/master/SpatialPyramidPooling.py), # another solution is reconstructing max_pooling, already done by # [https://github.com/yongtang, https://github.com/tensorflow/tensorflow/pull/11875] results = gen_nn_ops.max_pool_v2(inputs, pool_size, pool_stride, padding=padding) roi_pools = tf.concat(roi_pools.append(tf.layers.flatten(results)), axis=1) outputs.append(roi_pools) return tf.concat(outputs, axis=1)
def add_word_embeddings_op(self): """Defines self.word_embeddings If self.config.embeddings is not None and is a np array initialized with pre-trained word vectors, the word embeddings is just a look-up and we don't train the vectors. Otherwise, a random matrix with the correct shape is initialized. """ with tf.variable_scope("words"): if self.config.embeddings is None: print("WARNING: randomly initializing word vectors") _word_embeddings = tf.get_variable( name="_word_embeddings", dtype=tf.float32, shape=[self.config.nwords, self.config.dim_word]) else: _word_embeddings = tf.Variable( self.config.embeddings, name="_word_embeddings", dtype=tf.float32, trainable=self.config.train_embeddings) word_embeddings = tf.nn.embedding_lookup(_word_embeddings, self.word_ids, name="word_embeddings") self.model_unaware_embedding = tf.identity(word_embeddings) pooled_outputs = [] for i, filter_size in enumerate(self.config.filter_sizes): with tf.variable_scope("char_CNN" + str(i)): if self.config.use_chars: _char_embeddings = tf.get_variable( name="_char_embeddings", dtype=tf.float32, shape=[self.config.nchars, self.config.dim_char]) # [shape = (batch, sentence, word, dim of char emb)] char_embeddings = tf.nn.embedding_lookup(_char_embeddings, self.char_ids, name="char_embeddings") # put the time dimension on axis=1 s = tf.shape(char_embeddings) self.shape = s char_embeddings = tf.reshape(char_embeddings, shape=[s[0] * s[1], s[-2], self.config.dim_char, 1]) conv_weight = tf.get_variable( shape=[filter_size, self.config.dim_char, 1, self.config.hidden_size_char], initializer=tf.truncated_normal_initializer(stddev=0.01), name='conv_weights' ) conv_bias = tf.get_variable( shape=[self.config.hidden_size_char], initializer=tf.zeros_initializer(), name='conv_bias' ) # shape = [batch*sent_len, out_height, out_width, 2*self.config.dim_char] conv = tf.nn.conv2d(char_embeddings, conv_weight, strides=[1, 1, 1, 1], padding='VALID') conv = tf.nn.relu(tf.nn.bias_add(conv, conv_bias)) pooled = gen_nn_ops.max_pool_v2(conv, ksize=[1, s[-2] - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID') conv = tf.reshape(pooled, shape=[s[0], s[1], self.config.hidden_size_char]) conv = tf.nn.dropout(conv, self.dropout) pooled_outputs.append(conv) conv = tf.concat([op for op in pooled_outputs], axis=-1) self.word_embeddings = tf.concat([word_embeddings, conv], axis=-1)