예제 #1
0
def yolo_head(feats, anchors, num_classes, input_shape):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probs
예제 #2
0
    def get_split_averages(input_tensor, input_mask, indices):
        # Splits input tensor into three parts based on the indices and
        # returns average of values prior to index, values at the index and
        # average of values after the index.
        # input_tensor: (batch_size, input_length, input_dim)
        # input_mask: (batch_size, input_length)
        # indices: (batch_size, 1)
        # (1, input_length)
        length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0)
        # (batch_size, input_length)
        batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0)
        tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1)  # (batch_size, input_length)
        greater_mask = K.greater(batched_range, tiled_indices)  # (batch_size, input_length)
        lesser_mask = K.lesser(batched_range, tiled_indices)  # (batch_size, input_length)
        equal_mask = K.equal(batched_range, tiled_indices)  # (batch_size, input_length)

        # We also need to mask these masks using the input mask.
        # (batch_size, input_length)
        if input_mask is not None:
            greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask))
            lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask))

        post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)

        post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)
        pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)

        return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
예제 #3
0
파일: layers.py 프로젝트: chmp/flowly
 def call(self, x):
     r = K.cast(K.arange(self.num), K.floatx()) / float(self.num - 1)
     r = self.start + (self.stop - self.start) * r
     r = K.expand_dims(K.expand_dims(r), axis=0)
     r = K.cast(r, dtype=K.floatx())
     r = K.tile(r, (K.shape(x)[0], 1, 1))
     return r
예제 #4
0
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs
예제 #5
0
 def call(self, x, mask=None):
     # x[0]: (batch_size, input_length, input_dim)
     # x[1]: (batch_size, 1) indices of prepositions
     # Optional: x[2]: (batch_size, input_length - 2)
     assert isinstance(x, list) or isinstance(x, tuple)
     encoded_sentence = x[0]
     prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
     batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
     if self.with_attachment_probs:
         # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
         head_probs = x[2]
         head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
         # (batch_size, input_length)
         padded_head_probs = K.concatenate([head_probs, head_probs_padding])
         # (batch_size, 1)
         max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
         # (batch_size, input_length, 1)
         max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
         # (batch_size, input_length, input_dim)
         masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
         # (batch_size, input_dim)
         head_encoding = K.sum(masked_head_encoding, axis=1)
     else:
         head_indices = prep_indices - 1  # (batch_size,)
         head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
     prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
     child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
     '''
     prep_indices = x[1]
     sentence_mask = mask[0]
     if sentence_mask is not None:
         if K.ndim(sentence_mask) > 2:
             # This means this layer came after a Bidirectional layer. Keras has this bug which
             # concatenates input masks instead of output masks.
             # TODO: Fix Bidirectional instead.
             sentence_mask = K.any(sentence_mask, axis=(-2, -1))
     head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                            prep_indices)
     '''
     head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
     prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
     child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
     #(batch_size, proj_dim)
     if self.composition_type == 'HPCT':
         composed_projection = K.tanh(head_projection + prep_projection + child_projection)
     elif self.composition_type == 'HPC':
         prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
         composed_projection = K.tanh(head_projection + prep_child_projection)
     else:
         # Composition type in HC
         composed_projection = K.tanh(head_projection + child_projection)
     for hidden_layer in self.hidden_layers:
         composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
     # (batch_size, num_classes)
     class_scores = K.dot(composed_projection, self.scorer)
     label_probabilities = K.softmax(class_scores)
     return label_probabilities
예제 #6
0
    def call(self, x, mask=None):
        if isinstance(x, list): 
            x,_ = x
        if mask is not None and isinstance(mask, list):
            mask,_ = mask
        if 0. < self.dropout < 1.:
            retain_p = 1. - self.dropout
            dims = self.W._keras_shape[:-1]
            B = K.random_binomial(dims, p=retain_p) * (1. / retain_p)
            B = K.expand_dims(B)
            W = K.in_train_phase(self.W * B, self.W)
        else:
            W = self.W
        
        if self.mode == 'matrix':
            return K.gather(W,x)
        elif self.mode == 'tensor':
            # quick and dirty: only allowing for 3dim inputs when it's tensor mode
            assert K.ndim(x) == 3
            # put sequence on first; gather; take diagonal across shared batch dimension
            # in other words, W is (B, S, F)
            # incoming x is (B, S, A)
            inds = K.arange(self.W._keras_shape[0])
            #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3)
            #return K.permute_dimensions(out, (3,0,1,2))
            ### method above doesn't do grads =.=
            # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, 
            # x == a, so shape to xayzc == xxyzc
            # take diagonal on first two: xyzc 
            #out = K.colgather()
            out = K.gather(K.permute_dimensions(W, (1,0,2)), x) 
            out = K.permute_dimensions(out, (0,3,1,2,4))
            out = K.gather(out, (inds, inds))
            return out
        else:
            raise Exception('sanity check. should not be here.')

        #all_dims = T.arange(len(self.W._keras_shape))
        #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:]
        ## 1. take diagonal from 0th to
        ## chang eof tactics
        ## embed on time or embed on batch. that's all I'm supporting.  
        ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what
        ## i need to take the diagonal over. 
        ## now dim shuffle the xdims + 1 to the front.
        #todo: get second shuffle or maybe find diagonal calculations
        #out = K.gather(W, x)
        #return out

        ### reference
        #A = S(np.arange(60).reshape(3,4,5))
        #x = S(np.random.randint(0, 4, (3,4,10)))
        #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
예제 #7
0
 def idx2pos(self, pid):
     pid = K.cast(pid, 'float32')
     pid = K.expand_dims(pid, 2)
     pj = 1. / K.pow(
         10000.,
         2. / self.v_dim * K.arange(self.v_dim // 2, dtype='float32'))
     pj = K.expand_dims(pj, 0)
     pv = K.dot(pid, pj)
     pv1, pv2 = K.sin(pv), K.cos(pv)
     pv1, pv2 = K.expand_dims(pv1, 3), K.expand_dims(pv2, 3)
     pv = K.concatenate([pv1, pv2], 3)
     return K.reshape(pv, (K.shape(pv)[0], K.shape(pv)[1], self.v_dim))
예제 #8
0
    def __init__(self, landmarks, **kwargs):
        '''
        landmarks:
            fixed landmarks using
        '''
        super(GaussianKernel2, self).__init__(**kwargs)
        self.landmarks = landmarks.astype(np.float32)
        self.num_landmark, self.num_feature = landmarks.shape
        self.output_dim = self.num_landmark

        # for loop
        self.indx = K.arange(self.output_dim)
예제 #9
0
    def call(self, inputs, mask=None, **kwargs):
        input_len = K.shape(inputs)[1]

        if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            e = self._call_additive_emission(inputs)
        elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            e = self._call_multiplicative_emission(inputs)

        if self.attention_activation is not None:
            e = self.attention_activation(e)
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        if self.attention_width is not None:
            if self.history_only:
                lower = K.arange(input_len) - (self.attention_width - 1)
            else:
                lower = K.arange(input_len) - self.attention_width // 2
            lower = K.expand_dims(lower, axis=-1)
            upper = lower + self.attention_width
            indices = K.tile(K.expand_dims(K.arange(input_len), axis=0),
                             [input_len, 1])
            e = e * K.cast(lower <= indices, K.floatx()) * K.cast(
                indices < upper, K.floatx())
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            mask = K.expand_dims(mask)
            e = K.permute_dimensions(
                K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1))

        # a_{t} = \text{softmax}(e_t)
        s = K.sum(e, axis=-1, keepdims=True)
        a = e / (s + K.epsilon())

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, inputs)
        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(a))

        if self.return_attention:
            return [v, a]
        return v
예제 #10
0
    def _call_cat(self, inputs, mask=None):
        outputs = self._call_normal(inputs)

        xx, xy = tf.meshgrid(K.arange(inputs.shape[1]),
                             K.arange(inputs.shape[2]))
        xx = K.expand_dims(K.flatten(xx), 1)
        xy = K.expand_dims(K.flatten(xy), 1)
        xc = K.concatenate([xx, xy], axis=1)

        if mask is not None:
            xc = xc[mask, :]

        n_samples = min(self.n_samples, xc.shape[0])

        xx = xc[:n_samples, 0]
        yy = xc[:n_samples, 1]

        ## Need to understand of PyTorch tensor shaping ##

        ls = []
        for i, out in enumerate(outputs):
            b, w, h, c = out.shape
            x = out

            if i > 0 and out.shape[1] < outputs[i - 1].shape[1]:
                xx = K.cast(xx, 'float32') / 2.
                yy = K.cast(yy, 'float32') / 2.

            xx = K.cast(K.clip(xx, 0, out.shape[1] - 1), 'int32')
            yy = K.cast(K.clip(yy, 0, out.shape[2] - 1), 'int32')

            idx = xx * h + yy
            x = tf.gather(K.reshape(x, (-1, w * h, c)), idx, axis=1)
            x = K.expand_dims(x, axis=2)

            ls.append(x)  # NOTICE: the original code do clone() and detach()

        out = K.concatenate(
            ls, axis=-1)  # NOTICE: the original code do contiguous()
        return out
예제 #11
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):

    num_anchors = len(anchors)  #锚框个数

    # 将锚框数据转化为tensor,其维度为[1,1,1,num_anchors,2],即[batch, height, width, num_anchors, box_params]
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    # 网格数据,计算损失时使用
    grid_shape = K.shape(feats)[1:3]
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    # 对feats做reshape处理,[-1,height,width,num_anchors,num_classes+5]
    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # box的属性:point(xy)、宽高(wh)、置信度、所属类
    # 使用reshape后feats最后一维的相关数据(num_classes+5),来计算box的属性
    # 第0、1数据对应point(xy)
    # 第2、3数据对应宽高(wh)
    # 第4数据对应置信度confidence
    # 第5及之后的数据,对应各class的归属概率值
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # 如果计算损失,传回grid, feats, box_xy, box_wh
    if calc_loss == True:
        return grid, feats, box_xy, box_wh

    # 如果用作预测,传回box_xy, box_wh, box_confidence, box_class_probs
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #12
0
    def call(self, x, mask=None):
                 
        # e_{t, t'} = x_t^T W_a x_{t'} + b_a
        e = K.batch_dot(K.dot(x, self.Wa), K.permute_dimensions(inputs, (0, 2, 1)))
        
        lower = K.arange(0, input_len) - (self.attention_width - 1)
        lower = K.expand_dims(lower, axis=-1)
        upper = lower + self.attention_width
        indices = K.expand_dims(K.arange(0, input_len), axis=0)
        e = e * K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx())      
        
        # a_{t} = \text{softmax}(e_t)
        s = K.sum(e, axis=-1, keepdims=True)
        a = e / (s + K.epsilon())

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, x)

        return v


      
        attention_flat = K.exp( K.squeeze(K.dot(x,self.context), axis=-1) )
        attention2 = attention_flat /K.expand_dims(K.sum(attention_flat, axis=-1), -1)

        print('--- shapes: ', K.shape(x), K.shape(self.context))
        print('... shape k.dot', K.shape(K.dot(x, self.context)))
        print('... shape k.squezze', K.shape(K.squeeze(K.dot(x, self.context), axis=-1)))
        print('... shape k.exp', K.shape(K.exp(K.squeeze(K.dot(x, self.context), axis=-1))))

        if mask is not None:
            attention = attention * K.cast(mask, 'float32')

#        weighted_sum = K.batch_dot(K.permute_dimensions(x, [0, 2, 1]), attention)
        
        # multiplicative
        
        weighted_sum = K.batch_dot(K.permute_dimensions(x, [0, 2, 1]), attention2)
#       
        return weighted_sum
예제 #13
0
def kappa_keras(y_true, y_pred):

    y_true = K.cast(K.argmax(y_true, axis=-1), dtype='int32')
    y_pred = K.cast(K.argmax(y_pred, axis=-1), dtype='int32')

    # Figure out normalized expected values
    min_rating = K.minimum(K.min(y_true), K.min(y_pred))
    max_rating = K.maximum(K.max(y_true), K.max(y_pred))

    # shift the values so that the lowest value is 0
    # (to support scales that include negative values)
    y_true = K.map_fn(lambda y: y - min_rating, y_true, dtype='int32')
    y_pred = K.map_fn(lambda y: y - min_rating, y_pred, dtype='int32')

    # Build the observed/confusion matrix
    num_ratings = max_rating - min_rating + 1
    observed = tf.math.confusion_matrix(y_true,
                                        y_pred,
                                        num_classes=num_ratings)
    num_scored_items = K.shape(y_true)[0]

    weights = K.expand_dims(K.arange(num_ratings), axis=-1) - K.expand_dims(
        K.arange(num_ratings), axis=0)
    weights = K.cast(K.pow(weights, 2), dtype='float64')

    hist_true = tf.math.bincount(y_true, minlength=num_ratings)
    hist_true = hist_true[:num_ratings] / num_scored_items
    hist_pred = tf.math.bincount(y_pred, minlength=num_ratings)
    hist_pred = hist_pred[:num_ratings] / num_scored_items
    expected = K.dot(K.expand_dims(hist_true, axis=-1),
                     K.expand_dims(hist_pred, axis=0))

    # Normalize observed array
    observed = observed / num_scored_items

    # If all weights are zero, that means no disagreements matter.
    score = tf.where(K.any(K.not_equal(weights, 0)),
                     K.sum(weights * observed) / K.sum(weights * expected), 0)

    return 1. - score
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    # feats是一个4d tensor[bs, width, height, channel]
    # GAP->channel是每个anchor预测的bbox数3*(4+1+class)=21
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    # 每一个anchor都是一个维数为2的向量,所以最后一维的维数是2
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    # 就是取第二第三个的维度,结果是两个数字,所以grid_shape是一维[a,b]
    grid_shape = K.shape(feats)[1:3]  # 输出特征图的height, width,也就是[10,10]
    # K.tile(x, n) 将x在各个维度上重复n次,x为张量,n为与x维度数目相同的列表
    # reshape时-1所在的位置,通道数不定
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    # 这里得到的是一个坐标的集合,包含特征图上所有点的坐标,也就是最终需要微调的点的坐标
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    # 利用sigmoid得到输出的中心坐标微调值,和原坐标相加,再除以总长度,得到在原图中的相对比例位置
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    # 长和宽用exp来做
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # 训练时有label,并不需要objectness和各类别的可能性
    # 而推理时,需要输出当前的置信度
    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #15
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    # print("feats is", feats)
    # feats' shape is (?, ?, 255)
    # 这些工作都在输入数据前做完了,所以是先搭一个数据流,搭完以后,数据走完全程出结果?probably.
    # probably because yolo make predict in different scale, feats shape are different.
    # The above is not right.
    num_anchors = len(anchors)

    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width

    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # grid包含grid的位置
    # Tensor("Cast_4:0", shape=(?, ?, 1, 2), dtype=float32) Tensor("Reshape_9:0", shape=(?, ?, ?, 3, 85), dtype=float32) Tensor("truediv_8:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("truediv_9:0", shape=(?, ?, ?, 3, 2), dtype=float32)
    # Tensor("truediv_8:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("truediv_9:0", shape=(?, ?, ?, 3, 2), dtype=float32) Tensor("Sigmoid_4:0", shape=(?, ?, ?, 3, 1), dtype=float32) Tensor("Sigmoid_5:0", shape=(?, ?, ?, 3, 80), dtype=float32)

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #16
0
def yolo_head(feats,
              anchors,
              num_classes,
              input_shape,
              calc_loss=False):  ### 配合高斯版本2
    """Convert final layer features to bounding box parameters."""
    # (yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    ### grid = K.concatenate([grid_x, grid_y]) # grid = K.concatenate([grid_x, grid_y], axis=-1)
    grid = K.concatenate([grid_x, grid_y], axis=-1)
    ### grid = K.cast(grid, K.dtype(feats))  # [?,?,?,42]

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 9])
    grid = K.cast(grid, K.dtype(feats))  ### 从上边换到下边
    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))

    box_delta_xy = K.sigmoid(feats[..., :2])
    box_log_wh = feats[..., 2:4]
    box_sigma = K.sigmoid(feats[..., 4:8])
    box_confidence = K.sigmoid(feats[..., 8:9])
    box_class_probs = K.sigmoid(feats[..., 9:])

    if calc_loss == True:
        return grid, box_delta_xy, box_log_wh, box_sigma, box_xy, box_wh, box_confidence, box_class_probs
        # grid, raw_pred_delta_xy, raw_pred_log_wh, raw_pred_sigma, pred_xy, pred_wh, pred_confidence, pred_class
    return box_xy, box_wh, box_confidence, box_class_probs
 def call(self, x, **kwargs):
     if not self.d_pos_enc or self.mode == 'sum':
         self.d_pos_enc = int(x.shape[-1])
     position_j = 1. / K.pow(10000., 2 * K.arange(self.d_pos_enc/2, dtype='float32') / self.d_pos_enc)
     position_j = K.expand_dims(position_j, 0)
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1)-1  # K.arange不支持变长,只好用这种方法生成
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], -1)
예제 #18
0
def RBF_Soft_Loss(y_true, y_pred):
    lam = RBF_LAMBDA
    indices = softargmax(y_true)
    indices = tf.dtypes.cast(indices, tf.int32)
    y_pred = tf.dtypes.cast(y_pred, tf.float32)
    y_true = tf.dtypes.cast(y_true, tf.float32)
    row_ind = K.arange(K.shape(y_true)[0])
    full_indices = tf.stack([row_ind, indices], axis=1)
    d = tf.gather_nd(y_pred, full_indices)
    y_pred = K.log(1 + K.exp(lam - y_pred))
    S = K.sum(y_pred, axis=1) - K.log(1 + K.exp(lam - d))
    y = K.sum(d + S)
    return y
예제 #19
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    ''' feats[?,?,?,255], anchors[3,2][[],[],[]], num_classes, input_shape
    return 根据featuremap解码得到预测框 + ?+ ?
    '''
    num_anchors = len(anchors)
    # [1, 1, 1, num_anchors, 2] anchors设置成跟特征层的大小一样,方便后面计算
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    # 获得x,y的网格
    # (13, 13, 1, 2)
    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    # (batch_size,13,13,3,85)
    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # 将预测值调成真实值
    # box_xy对应框的中心点
    # box_wh对应框的宽和高
    # (cx+grid)/w; (cy+grid)/h)
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    # exp{fea_h * anchor_h}/416 ; exp{fea_w * anchor_w}/416
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # 在计算loss的时候返回如下参数
    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #20
0
    def _create_offset_map(self, output_shape):
        """
        In Yolo9000 paper, Grid map to calculate offsets for each cell in the output feature map
        """
        GRID_H = tf.cast(output_shape[1],
                         tf.int32)  # shape of output feature map
        GRID_W = tf.cast(output_shape[2], tf.int32)

        cx = tf.cast((K.arange(0, stop=GRID_W)), dtype=tf.float32)
        cx = K.tile(cx, [GRID_H])
        cx = K.reshape(cx, [-1, GRID_H, GRID_W, 1])

        cy = K.cast((K.arange(0, stop=GRID_H)), dtype=tf.float32)
        cy = K.reshape(cy, [-1, 1])
        cy = K.tile(cy, [1, GRID_W])
        cy = K.reshape(cy, [-1])
        cy = K.reshape(cy, [-1, GRID_H, GRID_W, 1])

        c_xy = tf.stack([cx, cy], -1)
        c_xy = K.cast(c_xy, tf.float32)

        return c_xy
예제 #21
0
 def set_subtensor(tensor, value, *indices):
     x_shape = K.shape(indices[-1])[0]
     z_shape = K.int_shape(tensor)[-1]
     indices = K.tf.transpose(
         K.tf.stack([repeat_elements(idx, z_shape) for idx in indices]))
     z = K.expand_dims(
         K.flatten(K.tf.tile(K.arange(0, z_shape), [x_shape])), -1)
     indices = K.concatenate([indices, z], axis=-1)
     binary_mask = K.tf.cast(
         K.tf.sparse_to_dense(indices, K.shape(tensor), 1), K.tf.bool)
     val_tensor = K.tf.sparse_to_dense(indices, K.shape(tensor),
                                       K.flatten(value))
     return K.tf.where(binary_mask, val_tensor, tensor)
예제 #22
0
def yolo_head(feats, anchors, num_classes):
    '''

    :param feats: the output of the model. one tensor of shape (n_sample, n_row_cell, n_column_cell, n_anchors*(5 + classes))
    :param anchors: anchor boxes
    :param num_classes: the number of classes
    :return: yolo_outputs, containging 4 tensors, the outputs being used for filtering
    '''
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
    # Dynamic implementation of conv dims for fully convolutional model
    conv_dims = K.shape(feats)[1:3]
    # In YOLO the height index is the inner most iteration
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
예제 #23
0
    def _compute_valid_seed_region(self, height, width, depth):
        positions = K.concatenate([
            K.expand_dims(K.tile(
                K.expand_dims(K.expand_dims(K.arange(height), axis=0), axis=0),
                [depth, width, 1]),
                          axis=-1),
            K.expand_dims(K.tile(
                K.expand_dims(K.expand_dims(K.arange(width), axis=1), axis=0),
                [depth, 1, height]),
                          axis=-1),
            K.expand_dims(K.tile(
                K.expand_dims(K.expand_dims(K.arange(depth), axis=1), axis=1),
                [1, width, height]),
                          axis=-1)
        ],
                                  axis=-1)

        half_block_size = self.block_size // 2

        valid_seed_region = K.switch(
            K.all(
                K.stack(
                    [
                        positions[:, :, :, 0] >= half_block_size,
                        positions[:, :, :, 1] >= half_block_size,
                        positions[:, :, :, 2] >= half_block_size,
                        positions[:, :, :, 0] < height - half_block_size,
                        positions[:, :, :, 1] < width - half_block_size,
                        positions[:, :, :, 2] < depth - half_block_size,
                    ],
                    axis=-1,
                ),
                axis=-1,
            ),
            K.ones((height, width, depth)),
            K.zeros((height, width, depth)),
        )

        return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
예제 #24
0
def yolo_head(feats,
              anchors,
              num_classes,
              input_shape,
              calc_loss=False):  #把预测的网格画出来
    num_anchors = len(anchors)
    # [1, 1, 1, num_anchors, 2]
    anchors_tensor = K.reshape(
        K.constant(anchors), [1, 1, 1, num_anchors, 2])  #先验框reshape,以便与特征层的计算

    # 获得x,y的网格  建立13*13网格
    # (13, 13, 1, 2)
    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    # (batch_size,13,13,3,85)       #特征层的reshape
    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # 将预测值调成真实值
    # box_xy对应框的中心点
    # box_wh对应框的宽和高         #网格的解码
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))  #前两个参数+grid/13,归一化
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))  #长宽取指数*先验框/(416*416)
    box_confidence = K.sigmoid(feats[..., 4:5])  #置信度归一化
    box_class_probs = K.sigmoid(feats[..., 5:])  #种类归一化

    # 在计算loss的时候返回如下参数
    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #25
0
def interpret_prediction(prediction, anchors, num_classes):
    N_CLASSES = num_classes
    N_ANCHORS = len(anchors)
    ANCHORS = anchors

    pred_shape = tf.shape(prediction)
    GRID_H, GRID_W = pred_shape[1], pred_shape[2]

    prediction = K.reshape(
        prediction,
        [-1, pred_shape[1], pred_shape[2], N_ANCHORS, N_CLASSES + 5])

    # Create off set map
    cx = tf.cast((K.arange(0, stop=GRID_W)), dtype=tf.float32)
    cx = K.tile(cx, [GRID_H])
    cx = K.reshape(cx, [-1, GRID_H, GRID_W, 1])

    cy = K.cast((K.arange(0, stop=GRID_H)), dtype=tf.float32)
    cy = K.reshape(cy, [-1, 1])
    cy = K.tile(cy, [1, GRID_W])
    cy = K.reshape(cy, [-1])
    cy = K.reshape(cy, [-1, GRID_H, GRID_W, 1])

    c_xy = tf.stack([cx, cy], -1)
    c_xy = tf.to_float(c_xy)

    anchors_tensor = tf.to_float(K.reshape(ANCHORS, [1, 1, 1, N_ANCHORS, 2]))
    netout_size = tf.to_float(K.reshape([GRID_W, GRID_H], [1, 1, 1, 1, 2]))

    box_xy = K.sigmoid(prediction[..., :2])
    box_wh = K.exp(prediction[..., 2:4])
    box_confidence = K.sigmoid(prediction[..., 4:5])
    box_class_probs = K.softmax(prediction[..., 5:])

    # Shift center points to its grid cell accordingly (Ref: YOLO-9000 loss function)
    box_xy = (box_xy + c_xy) / netout_size
    box_wh = (box_wh * anchors_tensor) / netout_size

    return box_xy, box_wh, box_confidence, box_class_probs
예제 #26
0
    def _pave_embedding(self, _embedding):
        dtype = _embedding.dtype
        start, seg_num = int(self.mask_zero), self.seg_num + self.mask_zero
        ele_num = self.input_dim * self._target_dim * seg_num
        indices = bk.arange(0, ele_num) % seg_num
        _embedding1 = bk.concatenate([_embedding[1:], _embedding[-1:]])
        _embedding_1 = bk.concatenate([_embedding[0:1], _embedding[:-1]])

        return (bk.cast(indices < start, dtype) * _embedding +
                bk.cast(indices == start, dtype) * _embedding1 +
                bk.cast(indices == seg_num - 1, dtype) * _embedding_1 +
                bk.cast((indices > start) & (indices < seg_num - 1), dtype) *
                (_embedding1 + _embedding_1) / 2)
예제 #27
0
파일: vConv_core.py 프로젝트: AUAShen/vConv
 def init_left(self):
     """
     Used to generate a leftmask
     :return:
     """
     K.set_floatx('float32')
     k_weights_tem_2d_left = K.arange(self.kernel.shape[0])
     k_weights_tem_2d_left = tf.expand_dims(k_weights_tem_2d_left, 1)
     k_weights_tem_3d_left = K.cast(
         K.repeat_elements(
             k_weights_tem_2d_left, self.kernel.shape[2], axis=1),
         dtype='float32') - self.k_weights[0, :, :]
     self.k_weights_3d_left = tf.expand_dims(k_weights_tem_3d_left, 1)
예제 #28
0
 def predict_log_proba(self, X, y):
     """ Predicts the log probability of y given X
     """
     X = X.reshape((-1, self.input_dim))
     parameters = self.model.predict(X)
     y = y.reshape((-1,self.output_dim))
     # check for nan
     if np.any(np.isnan(parameters)):
         return np.full((y.shape[0]), -np.inf)
     parameters = K.variable(parameters)
     x = K.variable(y)
     func = lambda i : self.log_Gaussian_likelihood(x[i:i+1,:], parameters)
     return K.eval(K.map_fn(func, K.arange(0, y.shape[0]), dtype='float32')).reshape((y.shape[0]))
예제 #29
0
def seq_gather(x):
    """
        seq是[None, seq_len, s_size]的格式,
        idxs是[None, 1]的格式,在seq的第i个序列中选出第idxs[i]个向量,
        最终输出[None, s_size]的向量。
    """
    seq, idxs = x
    idxs = K.cast(idxs, 'int32')
    batch_idxs = K.arange(0, K.shape(seq)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, idxs], 1)
    ret = K.tf.gather_nd(seq, idxs)
    return ret
예제 #30
0
 def call(self, inputs, mask=None):
     input_shape = K.shape(inputs)
     if self.mode == self.MODE_ADD:
         batch_size, seq_len, output_dim = input_shape[0], input_shape[1], input_shape[2]
         pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1])
     elif self.mode == self.MODE_CONCAT:
         batch_size, seq_len, output_dim = input_shape[0], input_shape[1], self.output_dim
         pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1])
     else:
         output_dim = self.output_dim
         pos_input = inputs
     if K.dtype(pos_input) != K.floatx():
         pos_input = K.cast(pos_input, K.floatx())
     evens = K.arange(output_dim // 2) * 2
     odds = K.arange(output_dim // 2) * 2 + 1
     even_embd = K.sin(
         K.dot(
             K.expand_dims(pos_input, -1),
             K.expand_dims(1.0 / K.pow(
                 10000.0,
                 K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx())
             ), 0)
         )
     )
     odd_embd = K.cos(
         K.dot(
             K.expand_dims(pos_input, -1),
             K.expand_dims(1.0 / K.pow(
                 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx())
             ), 0)
         )
     )
     embd = K.stack([even_embd, odd_embd], axis=-1)
     output = K.reshape(embd, [-1, K.shape(inputs)[1], output_dim])
     if self.mode == self.MODE_CONCAT:
         output = K.concatenate([inputs, output], axis=-1)
     if self.mode == self.MODE_ADD:
         output += inputs
     return output
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = anchors_per_level
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(
        tf.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1],
                   name='yolo_head/tile/reshape/grid_y'),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(
        tf.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1],
                   name='yolo_head/tile/reshape/grid_x'),
        [grid_shape[0], 1, 1, 1])
    grid = tf.concat([grid_x, grid_y],
                     axis=-1,
                     name='yolo_head/concatenate/grid')
    grid = K.cast(grid, K.dtype(feats))
    global _var
    _var = [grid_shape, feats, anchors_tensor]
    feats = tf.reshape(feats, [
        -1, grid_shape[0], grid_shape[1], num_anchors,
        num_classes + 5 + NUM_ANGLES3
    ],
                       name='yolo_head/reshape/feats')

    # Adjust predictions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[..., ::-1], K.dtype(feats))

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:5 + num_classes])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #32
0
def yolo_head(feats, anchors, num_classes):
    num_anchors = len(anchors)
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
    conv_dims = K.shape(feats)[1:3]  # assuming channels last.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))    
    feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
예제 #33
0
def _head(
    feats: Tensor, anchors: np.ndarray, num_classes: int, input_shape: Tensor
) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
    """

    :param feats:
    :param anchors:
    :param num_classes:
    :param input_shape:
    :return:
    """
    num_anchors = len(anchors)
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(
        K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1],
    )
    grid_x = K.tile(
        K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1],
    )
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]
    )

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probability = K.sigmoid(feats[..., 5:])

    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probability
def sparse_amsoftmax_loss(y_true, y_pred, scale=24, margin=0.2):
    y_true = K.expand_dims(y_true[:, 0], 1)  # shape=(None, 1)
    y_true = K.cast(y_true, 'int32')  # dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    ordinal_y = K.concatenate([batch_idxs, y_true], 1)
    sel_logits = K.tf.gather_nd(y_pred, ordinal_y)
    t = K.tf.scatter_nd(ordinal_y, sel_logits * 0 + (-margin),
                        K.tf.shape(y_pred))
    comb_logits_diff = K.tf.add(y_pred, t)
    return K.sparse_categorical_crossentropy(y_true,
                                             scale * comb_logits_diff,
                                             from_logits=True)
예제 #35
0
 def call(self, inputs, mask=None, **kwargs):
     if isinstance(inputs, list):
         query, key, value = inputs
     else:
         query = key = value = inputs
     if isinstance(mask, list):
         mask = mask[1]
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx()))
     e = K.exp(e - K.max(e, axis=-1, keepdims=True))
     if self.history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         indices = K.expand_dims(K.arange(0, key_len), axis=0)
         upper = K.expand_dims(K.arange(0, query_len), axis=-1)
         e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0)
     if mask is not None:
         e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx())
     a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon())
     v = K.batch_dot(a, value)
     if self.return_attention:
         return [v, a]
     return v
예제 #36
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""  #feats:网络输出的节骨.
    num_anchors = len(anchors)

    #因为 anchors是高维向量,所以len 不是里面数字的数量.所以下面的reshape是正确的.
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])  #concatenate 默认-1轴
    grid = K.cast(grid, K.dtype(feats))  #grid 前2维 是 [[0--13],....[0----13]]

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
    #使用sigmoid 值域0,1 所以加完grid之后表示box_xy中心在第几个grid行列中,只不过这个行列是一个float,再除以总的grid总数
    #就得到了feats对应的坐标的行列坐标占图片长,宽的比例是多少!!!!!!!!!!!!
    #还是原来的思路,神经网络的输出不用管他表示什么含义,他表示的含义都是逆向传导之后自己学到的.
    #反过来看,feats[..., :2]表示的就是跟当前grid的偏移量被作用sigmoid反函数.这个是容易学到的东西.
    #因为这个值有一个基准,是当前grid,所以模型稳定,收敛速度快!不是像以前一样全图片搜索.而是在grid中心附近搜索框.
    #并且不超过这个grid. 感觉这里就是yolov2最难理解的地方.全靠bp思想理解.
    # Adjust preditions to each spatial grid point and anchor size.//box_xy: shape(2,)/(13,13) #下面/ 是对-1维除的
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))  #这两个很神秘!
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    #下面学习box_wh.     K.exp(feats[..., 2:4]) * anchors_tensor 表示学习到的框大小.
    #这里面用exp来让学到的东西变化不大. 缩小了取值范围,提高收敛速度.感觉本质就是提高学习率了.

    box_confidence = K.sigmoid(feats[..., 4:5])  #这2个归一化,sigmoid好理解.
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:  #看这个grid:表示网格
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
예제 #37
0
def sparse_amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35):
    y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1)
    y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, y_true], 1)
    y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来
    y_true_pred = K.expand_dims(y_true_pred, 1)
    y_true_pred_margin = y_true_pred - margin # 减去margin
    _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数
    _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1]
    logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失
    logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred)
    return - y_true_pred_margin * scale + logZ
 def call(self, x, mask=None):
     if (self.size == None) or (self.mode == 'sum'):
         self.size = int(x.shape[-1])
     batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
     position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
     position_j = K.expand_dims(position_j, 0)
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1  # K.arange不支持变长,只好用这种方法生成
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], 2)
예제 #39
0
def sparse_simpler_asoftmax_loss(y_true, y_pred, scale=30):
    y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1)
    y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, y_true], 1)
    y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来
    y_true_pred = K.expand_dims(y_true_pred, 1)
    # 用到了四倍角公式进行展开
    y_true_pred_margin = 1 - 8 * K.square(y_true_pred) + 8 * K.square(K.square(y_true_pred))
    # 下面等效于min(y_true_pred, y_true_pred_margin)
    y_true_pred_margin = y_true_pred_margin - K.relu(y_true_pred_margin - y_true_pred)
    _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数
    _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1]
    logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失
    logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred)
    return - y_true_pred_margin * scale + logZ
예제 #40
0
    def test_map(self):
        x = np.random.rand(10, 3).astype(np.float32)
        for K in [KTF, KTH]:
            vx = K.variable(x)
            kx = K.eval(K.map_fn(K.sum, vx))
            # make sure we can also walk the indexes in tensorflow which we
            # can't without specifying dtype
            kx2 = K.eval(K.map_fn(
                lambda i: K.sum(vx[i]),
                K.arange(10),
                dtype=K.floatx()
            ))

            assert (10,) == kx.shape
            assert (10,) == kx2.shape
            assert_allclose(x.sum(axis=1), kx, atol=1e-05)
            assert_allclose(kx, kx2, atol=1e-05)
예제 #41
0
    def _interpolate(self, image, sampled_grids, output_size):

        batch_size = K.shape(image)[0]
        height = K.shape(image)[1]
        width = K.shape(image)[2]
        num_channels = K.shape(image)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = .5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, 'int32')
        x1 = x0 + 1
        y0 = K.cast(y, 'int32')
        y1 = y0 + 1

        max_x = int(K.int_shape(image)[2] - 1)
        max_y = int(K.int_shape(image)[1] - 1)

        x0 = K.clip(x0, 0, max_x)
        x1 = K.clip(x1, 0, max_x)
        y0 = K.clip(y0, 0, max_y)
        y1 = K.clip(y1, 0, max_y)

        pixels_batch = K.arange(0, batch_size) * (height * width)
        pixels_batch = K.expand_dims(pixels_batch, axis=-1)
        flat_output_size = output_size[0] * output_size[1]
        base = K.repeat_elements(pixels_batch, flat_output_size, axis=1)
        base = K.flatten(base)

        # base_y0 = base + (y0 * width)
        base_y0 = y0 * width
        base_y0 = base + base_y0
        # base_y1 = base + (y1 * width)
        base_y1 = y1 * width
        base_y1 = base_y1 + base

        indices_a = base_y0 + x0
        indices_b = base_y1 + x0
        indices_c = base_y0 + x1
        indices_d = base_y1 + x1

        flat_image = K.reshape(image, shape=(-1, num_channels))
        flat_image = K.cast(flat_image, dtype='float32')
        pixel_values_a = K.gather(flat_image, indices_a)
        pixel_values_b = K.gather(flat_image, indices_b)
        pixel_values_c = K.gather(flat_image, indices_c)
        pixel_values_d = K.gather(flat_image, indices_d)

        x0 = K.cast(x0, 'float32')
        x1 = K.cast(x1, 'float32')
        y0 = K.cast(y0, 'float32')
        y1 = K.cast(y1, 'float32')

        area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
        area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
        area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
        area_d = K.expand_dims(((x - x0) * (y - y0)), 1)

        values_a = area_a * pixel_values_a
        values_b = area_b * pixel_values_b
        values_c = area_c * pixel_values_c
        values_d = area_d * pixel_values_d
        return values_a + values_b + values_c + values_d
예제 #42
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
예제 #43
0
 def call(self, z, mask=None):
     x, a = z
     return K.expand_dims(x[:, -1] - K.mean(x[:, :-1], axis=1) + x[K.arange(x.shape[0]), K.flatten(a)])
예제 #44
0
    def call(self, inputs, training=None, mask=None):
        input_shape = K.shape(inputs)

        if self.rank == 1:
            input_shape = [input_shape[i] for i in range(3)]
            batch_shape, dim, channels = input_shape

            xx_range = K.tile(K.expand_dims(K.arange(0, dim), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=-1)

            xx_channels = K.cast(xx_range, K.floatx())
            xx_channels = xx_channels / K.cast(dim - 1, K.floatx())
            xx_channels = (xx_channels * 2) - 1.

            outputs = K.concatenate([inputs, xx_channels], axis=self.axis)

        if self.rank == 2:
            if self.data_format == 'channels_first':
                inputs = K.permute_dimensions(inputs, [0, 2, 3, 1])

            input_shape = [input_shape[i] for i in range(4)]
            batch_shape, dim1, dim2, channels = input_shape

            xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32')
            xx_ones = K.expand_dims(xx_ones, axis=-1)

            xx_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=1)
            xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1])
            xx_channels = K.expand_dims(xx_channels, axis=-1)
            xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3])

            yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32')
            yy_ones = K.expand_dims(yy_ones, axis=1)

            yy_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0),
                              K.stack([batch_shape, 1]))
            yy_range = K.expand_dims(yy_range, axis=-1)

            yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1])
            yy_channels = K.expand_dims(yy_channels, axis=-1)
            yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3])

            xx_channels = K.cast(xx_channels, K.floatx())
            xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx())
            xx_channels = (xx_channels * 2) - 1.

            yy_channels = K.cast(yy_channels, K.floatx())
            yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx())
            yy_channels = (yy_channels * 2) - 1.

            outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=self.axis)

            if self.use_radius:
                rr = K.sqrt(K.square(xx_channels - 0.5) +
                            K.square(yy_channels - 0.5))
                outputs = K.concatenate([outputs, rr], axis=-1)

            if self.data_format == 'channels_first':
                outputs = K.permute_dimensions(outputs, [0, 3, 1, 2])

        if self.rank == 3:
            if self.data_format == 'channels_first':
                inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1])

            input_shape = [input_shape[i] for i in range(5)]
            batch_shape, dim1, dim2, dim3, channels = input_shape

            xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32')
            xx_ones = K.expand_dims(xx_ones, axis=-1)

            xx_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=1)

            xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1])
            xx_channels = K.expand_dims(xx_channels, axis=-1)
            xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3])

            xx_channels = K.expand_dims(xx_channels, axis=1)
            xx_channels = K.tile(xx_channels,
                                 [1, dim1, 1, 1, 1])

            yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32')
            yy_ones = K.expand_dims(yy_ones, axis=1)

            yy_range = K.tile(K.expand_dims(K.arange(0, dim3), axis=0),
                              K.stack([batch_shape, 1]))
            yy_range = K.expand_dims(yy_range, axis=-1)

            yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1])
            yy_channels = K.expand_dims(yy_channels, axis=-1)
            yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3])

            yy_channels = K.expand_dims(yy_channels, axis=1)
            yy_channels = K.tile(yy_channels,
                                 [1, dim1, 1, 1, 1])

            zz_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0),
                              K.stack([batch_shape, 1]))
            zz_range = K.expand_dims(zz_range, axis=-1)
            zz_range = K.expand_dims(zz_range, axis=-1)

            zz_channels = K.tile(zz_range,
                                 [1, 1, dim2, dim3])
            zz_channels = K.expand_dims(zz_channels, axis=-1)

            xx_channels = K.cast(xx_channels, K.floatx())
            xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx())
            xx_channels = xx_channels * 2 - 1.

            yy_channels = K.cast(yy_channels, K.floatx())
            yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx())
            yy_channels = yy_channels * 2 - 1.

            zz_channels = K.cast(zz_channels, K.floatx())
            zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx())
            zz_channels = zz_channels * 2 - 1.

            outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels],
                                    axis=self.axis)

            if self.data_format == 'channels_first':
                outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3])

        return outputs