Esempio n. 1
0
    def simple_nms(self, scores, iterations, radius):
        """Performs non maximum suppression (NMS) on the heatmap using max-pooling.
        This method does not suppress contiguous points that have the same score.
        It is an approximate of the standard NMS and uses iterative propagation.
        Arguments:
            scores: the score heatmap, with shape `[B, H, W]`.
            size: an interger scalar, the radius of the NMS window.
        """
        if iterations < 1: return scores
        radius = tf.constant(radius, name='radius')
        size = radius * 2 + 1

        max_pool = lambda x: gen_nn_ops.max_pool_v2(  # supports dynamic ksize
            x[..., None],
            ksize=[1, size, size, 1],
            strides=[1, 1, 1, 1],
            padding='SAME')[..., 0]
        zeros = tf.zeros_like(scores)
        max_mask = tf.equal(scores, max_pool(scores))
        for _ in range(iterations - 1):
            supp_mask = tf.cast(max_pool(tf.to_float(max_mask)), tf.bool)
            supp_scores = tf.where(supp_mask, zeros, scores)
            new_max_mask = tf.equal(supp_scores, max_pool(supp_scores))
            max_mask = max_mask | (new_max_mask & tf.logical_not(supp_mask))
        return tf.where(max_mask, scores, zeros)
Esempio n. 2
0
def max_pooling_layer(input_tensor,
                      ksize,
                      strides=None,
                      padding="VALID",
                      name='max_pool'):
    """
    最大池化
    :param input_tensor:
    :param ksize:
    :param strides:
    :param padding:
    :param name:
    :return:
    """
    if strides is None:
        strides = [1, 1, 1, 1]

    # 支持动态大小的池化
    output = gen_nn_ops.max_pool_v2(input_tensor,
                                    ksize=ksize,
                                    strides=strides,
                                    padding=padding,
                                    name=name)
    # output = tf.nn.max_pool(
    #     input_tensor,
    #     ksize=ksize,
    #     strides=strides,
    #     padding=padding,
    #     name=name
    # )
    return output
    def spp_layer2(self, input_tensor, levels=[2, 1], name='SPP_layer'):
        '''Multiple Level SPP layer.
               Works for levels=[1, 2, 3, 6].'''

        self.sp_tensor = input_tensor

        with tf.variable_scope(name):
            pool_outputs = []
            for l in levels:
                pool = gen_nn_ops.max_pool_v2(
                    self.sp_tensor,
                    ksize=[
                        1,
                        tf.math.ceil(
                            tf.math.divide(tf.shape(self.sp_tensor)[1], l)),
                        tf.math.ceil(
                            tf.math.divide(tf.shape(self.sp_tensor)[2], l)), 1
                    ],
                    strides=[
                        1,
                        tf.math.floor(
                            tf.math.divide(tf.shape(self.sp_tensor)[1], l)),
                        tf.math.floor(
                            tf.math.divide(tf.shape(self.sp_tensor)[2], l)), 1
                    ],
                    padding='VALID')

                pool_outputs.append(
                    tf.reshape(pool, [tf.shape(input_tensor)[0], -1]))

            spp_pool = tf.concat(pool_outputs, 1)

            spp_pool = tf.reshape(spp_pool, (-1, 4 * 256 + 256))
        return spp_pool
Esempio n. 4
0
    def roi_pooling_single(self,box):
        """
        inputs: 

        'box': a single tensor of shape [4,] of format
        [x1,y1,x2,y2], this is a box on the feature map 
        for which we will perform roi pooling on.

        returns:

        'roi_pool': a tensor of shape [pool_h,pool_w,channels] 
        which is a max pooled slice of the feature map corresponding
        to the input region defined by 'box'.

        performs roi pooling on a single RoI box of the 
        form [x1,y1,x2,y2]. 

        we zero pad the cropped feature map to ensure that the 
        resulting crop's height and width are evenly divisible
        by the pool height and width
        """
       
        box_h = tf.cast(tf.math.ceil(box[3] - box[1]),dtype=tf.int32)
        box_w = tf.cast(tf.math.ceil(box[2] - box[0]),dtype=tf.int32)
        box_x = tf.cast(box[0],dtype=tf.int32)
        box_y = tf.cast(box[1],dtype=tf.int32)
        fm = self.layers['base_model/feature_map']
        fm_chan = tf.shape(fm)[-1]

        crop = tf.image.crop_to_bounding_box(fm,box_y,box_x,box_h,box_w)   

        pool_h = self.cfg.NET.ROI_POOL_HEIGHT
        pool_w = self.cfg.NET.ROI_POOL_WIDTH

        left_pad = tf.to_int32(tf.math.ceil(((pool_w*(tf.math.floordiv(box_w,pool_w) + 1))-box_w)/2))
        right_pad = tf.to_int32(tf.math.floor(((pool_w*(tf.math.floordiv(box_w,pool_w) + 1))-box_w)/2))
        top_pad = tf.to_int32(tf.math.ceil(((pool_h*(tf.math.floordiv(box_h,pool_h) + 1))-box_h)/2))
        bottom_pad = tf.to_int32(tf.math.floor(((pool_h*(tf.math.floordiv(box_h,pool_h) + 1))-box_h)/2))

        pads = [[0,0],[top_pad,bottom_pad],[left_pad,right_pad],[0,0]]
        crop = tf.pad(crop,pads)
        c_sh = tf.shape(crop)

        k_size = [1,tf.cast(c_sh[1]/7,dtype=tf.int32),tf.cast(c_sh[2]/7,dtype=tf.int32),1]
 
        mp = gen_nn_ops.max_pool_v2(crop,k_size,k_size,padding='VALID')
    
        return mp
Esempio n. 5
0
def spatial_pyramid_pooling(bin_size_list, inputs, rois, feat_stride, padding="VALID"):
    """
    Spatial pyramid pooling layer from SPP-Net
    :param bin_size_list: list of int
    Specify a pyramid level of multi-size bins, each number in list specifies a nxn bins which is
    a part of fixed output size requirement after pooling. For example [1,2,4] would be 3
    regions with 1x1, 2x2 and 4x4 max pools, so 21 outputs per feature map
    :param inputs: output from final convolution-layer
    :param rois: batch indices, regions of interest on images
    :param feat_stride: convolution stride on last convolutional layer
    :param padding: default same
    :return: 2-D tensor of shape [batch, vector_size] which has same type as inputs and whose
    length fit the input size of next fc
    """
    # Map images' rois to feature maps
    bboxes = tf.stop_gradient(proposal_util.map_rois_to_feature(
        tf.shape(inputs), rois[:, 1:], feat_stride))
    outputs = []
    # Crop all the rois in feature maps
    crops = tf.image.crop_to_bounding_box(inputs, bboxes)
    for crop in crops:
        shape = tf.shape(crop)
        roi_pools = []
        for bin_size in bin_size_list:
            win_h = shape[1] / bin_size
            win_w = shape[2] / bin_size
            pool_size = [1, tf.to_int32(tf.ceil(win_h)), tf.to_int32(tf.ceil(win_w)), 1]
            pool_stride = [1, tf.to_int32(tf.floor(win_h)), tf.to_int32(tf.floor(win_w)), 1]
            # Original max_pooling function does not support tensor-like pool_size and pool_stride
            # One solution is that max_pooling can be replaced by reduce_max after some transformation
            # on inputs(https://github.com/Sarthak-02/keras-spp/blob/master/SpatialPyramidPooling.py),
            # another solution is reconstructing max_pooling, already done by
            # [https://github.com/yongtang, https://github.com/tensorflow/tensorflow/pull/11875]
            results = gen_nn_ops.max_pool_v2(inputs, pool_size, pool_stride, padding=padding)
            roi_pools = tf.concat(roi_pools.append(tf.layers.flatten(results)), axis=1)
        outputs.append(roi_pools)
    return tf.concat(outputs, axis=1)
    def add_word_embeddings_op(self):
        """Defines self.word_embeddings
        If self.config.embeddings is not None and is a np array initialized
        with pre-trained word vectors, the word embeddings is just a look-up
        and we don't train the vectors. Otherwise, a random matrix with
        the correct shape is initialized.
        """
        with tf.variable_scope("words"):
            if self.config.embeddings is None:
                print("WARNING: randomly initializing word vectors")
                _word_embeddings = tf.get_variable(
                    name="_word_embeddings",
                    dtype=tf.float32,
                    shape=[self.config.nwords, self.config.dim_word])
            else:
                _word_embeddings = tf.Variable(
                    self.config.embeddings,
                    name="_word_embeddings",
                    dtype=tf.float32,
                    trainable=self.config.train_embeddings)

            word_embeddings = tf.nn.embedding_lookup(_word_embeddings,
                                                     self.word_ids, name="word_embeddings")
        self.model_unaware_embedding = tf.identity(word_embeddings)

        pooled_outputs = []
        for i, filter_size in enumerate(self.config.filter_sizes):
            with tf.variable_scope("char_CNN" + str(i)):
                if self.config.use_chars:
                    _char_embeddings = tf.get_variable(
                        name="_char_embeddings",
                        dtype=tf.float32,
                        shape=[self.config.nchars, self.config.dim_char])

                    # [shape = (batch, sentence, word, dim of char emb)]
                    char_embeddings = tf.nn.embedding_lookup(_char_embeddings,
                                                             self.char_ids, name="char_embeddings")

                    # put the time dimension on axis=1
                    s = tf.shape(char_embeddings)
                    self.shape = s
                    char_embeddings = tf.reshape(char_embeddings,
                                                 shape=[s[0] * s[1], s[-2], self.config.dim_char, 1])

                    conv_weight = tf.get_variable(
                        shape=[filter_size, self.config.dim_char, 1, self.config.hidden_size_char],
                        initializer=tf.truncated_normal_initializer(stddev=0.01),
                        name='conv_weights'
                    )
                    conv_bias = tf.get_variable(
                        shape=[self.config.hidden_size_char],
                        initializer=tf.zeros_initializer(),
                        name='conv_bias'
                    )

                    # shape = [batch*sent_len, out_height, out_width, 2*self.config.dim_char]
                    conv = tf.nn.conv2d(char_embeddings, conv_weight, strides=[1, 1, 1, 1], padding='VALID')
                    conv = tf.nn.relu(tf.nn.bias_add(conv, conv_bias))
                    pooled = gen_nn_ops.max_pool_v2(conv,
                                                    ksize=[1, s[-2] - filter_size + 1, 1, 1],
                                                    strides=[1, 1, 1, 1],
                                                    padding='VALID')

                    conv = tf.reshape(pooled, shape=[s[0], s[1], self.config.hidden_size_char])
                    conv = tf.nn.dropout(conv, self.dropout)
                    pooled_outputs.append(conv)
            conv = tf.concat([op for op in pooled_outputs], axis=-1)

        self.word_embeddings = tf.concat([word_embeddings, conv], axis=-1)