Esempio n. 1
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[layer][b, ..., 0:4],
                                object_mask_bool[b, ..., 0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = K.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(
         b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
     return b + 1, ignore_mask
Esempio n. 2
0
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6):
    """Filter YOLO boxes based on object and class confidence."""
    box_scores = box_confidence * box_class_probs
    box_classes = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis=-1)
    prediction_mask = box_class_scores >= threshold

    # TODO: Expose tf.boolean_mask to Keras backend?
    boxes = tf.boolean_mask(boxes, prediction_mask)
    scores = tf.boolean_mask(box_class_scores, prediction_mask)
    classes = tf.boolean_mask(box_classes, prediction_mask)
    return boxes, scores, classes
Esempio n. 3
0
def viterbi_decode(x, U, b_start=None, b_end=None, mask=None):
    """Computes the best tag sequence y for a given input x, i.e. the one that
    maximizes the value of path_energy."""
    x = add_boundary_energy(x, b_start, b_end, mask)

    alpha_0 = x[:, 0, :]
    gamma_0 = K.zeros_like(alpha_0)
    initial_states = [gamma_0, alpha_0]
    _, gamma = _forward(
        x,
        lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()),
                   K.max(B, axis=1)], initial_states, U, mask)
    y = _backward(gamma, mask)
    return y
Esempio n. 4
0
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6):
    # get the p(class = x given object = true) = p(class = x) * p(object = true)
    box_scores = box_confidence * box_class_probs  # 19x19x80

    # box_classes indeces of highest probability
    box_classes = K.argmax(box_scores, axis=-1)  # 19x19x5x1  (1 class index)
    # box class scores of highest probabilites
    box_class_scores = K.max(box_scores, axis=-1)  # 19x19x5x1 (1 class score)
    # make filter  of boxes with have scores more than threshold
    filtering_mask = box_class_scores >= threshold
    # choice from box_classes that is exist in our filter
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.3):
    """
    通过阈值来过滤对象和分类的置信度。
    
    参数:
        box_confidence  - tensor类型,维度为(19,19,5,1),包含19x19单元格中每个单元格预测的5个锚框中的所有的锚框的pc (一些对象的置信概率)。
        boxes - tensor类型,维度为(19,19,5,4),包含了所有的锚框的(px,py,ph,pw )。
        box_class_probs - tensor类型,维度为(19,19,5,80),包含了所有单元格中所有锚框的所有对象( c1,c2,c3,···,c80 )检测的概率。
        threshold - 实数,阈值,如果分类预测的概率高于它,那么这个分类预测的概率就会被保留。
    
    返回:
        scores - tensor 类型,维度为(None,),包含了保留了的锚框的分类概率。
        boxes - tensor 类型,维度为(None,4),包含了保留了的锚框的(b_x, b_y, b_h, b_w)
        classess - tensor 类型,维度为(None,),包含了保留了的锚框的索引
        
    注意:"None"是因为你不知道所选框的确切数量,因为它取决于阈值。
          比如:如果有10个锚框,scores的实际输出大小将是(10,)
    """

    #第一步:计算锚框的得分
    box_scores = box_confidence * box_class_probs

    #第二步:找到最大值的锚框的索引以及对应的最大值的锚框的分数
    box_classes = K.argmax(box_scores, axis=-1)  #(19*19*5*1)
    box_class_scores = K.max(
        box_scores, axis=-1)  #找到最可能的类,是将最后一个维度进行展开(19*19*5*80)得到(19*19*5*1)

    #第三步:根据阈值创建掩码
    filtering_mask = (box_class_scores >= threshold)

    #对scores, boxes 以及 classes使用掩码
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes
Esempio n. 6
0
def dual_attn_block(inp, nc, squeeze_factor=8):
    '''
    https://github.com/junfu1115/DANet
    '''
    assert nc // squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}"
    x = inp
    shape_x = x.get_shape().as_list()

    # position attention module
    x_pam = Conv2D(nc,
                   kernel_size=3,
                   kernel_regularizer=regularizers.l2(w_l2),
                   kernel_initializer=conv_init,
                   use_bias=False,
                   padding="same")(x)
    x_pam = Activation("relu")(x_pam)
    x_pam = normalization(x_pam, norm, nc)
    f_pam = Conv2D(nc // squeeze_factor,
                   1,
                   kernel_regularizer=regularizers.l2(w_l2))(x_pam)
    g_pam = Conv2D(nc // squeeze_factor,
                   1,
                   kernel_regularizer=regularizers.l2(w_l2))(x_pam)
    h_pam = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam)
    shape_f_pam = f_pam.get_shape().as_list()
    shape_g_pam = g_pam.get_shape().as_list()
    shape_h_pam = h_pam.get_shape().as_list()
    flat_f_pam = Reshape((-1, shape_f_pam[-1]))(f_pam)
    flat_g_pam = Reshape((-1, shape_g_pam[-1]))(g_pam)
    flat_h_pam = Reshape((-1, shape_h_pam[-1]))(h_pam)
    s_pam = Lambda(lambda x: K.batch_dot(x[0],
                                         Permute((2, 1))(x[1])))(
                                             [flat_g_pam, flat_f_pam])
    beta_pam = Softmax(axis=-1)(s_pam)
    o_pam = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta_pam, flat_h_pam])
    o_pam = Reshape(shape_x[1:])(o_pam)
    o_pam = Scale()(o_pam)
    out_pam = add([o_pam, x_pam])
    out_pam = Conv2D(nc,
                     kernel_size=3,
                     kernel_regularizer=regularizers.l2(w_l2),
                     kernel_initializer=conv_init,
                     use_bias=False,
                     padding="same")(out_pam)
    out_pam = Activation("relu")(out_pam)
    out_pam = normalization(out_pam, norm, nc)

    # channel attention module
    x_chn = Conv2D(nc,
                   kernel_size=3,
                   kernel_regularizer=regularizers.l2(w_l2),
                   kernel_initializer=conv_init,
                   use_bias=False,
                   padding="same")(x)
    x_chn = Activation("relu")(x_chn)
    x_chn = normalization(x_chn, norm, nc)
    shape_x_chn = x_chn.get_shape().as_list()
    flat_f_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
    flat_g_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
    flat_h_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
    s_chn = Lambda(lambda x: K.batch_dot(Permute((2, 1))(x[0]), x[1]))(
        [flat_g_chn, flat_f_chn])
    s_new_chn = Lambda(lambda x: K.repeat_elements(K.max(x, -1, keepdims=True),
                                                   nc, -1))(s_chn)
    s_new_chn = Lambda(lambda x: x[0] - x[1])([s_new_chn, s_chn])
    beta_chn = Softmax(axis=-1)(s_new_chn)
    o_chn = Lambda(lambda x: K.batch_dot(x[0],
                                         Permute((2, 1))(x[1])))(
                                             [flat_h_chn, beta_chn])
    o_chn = Reshape(shape_x[1:])(o_chn)
    o_chn = Scale()(o_chn)
    out_chn = add([o_chn, x_chn])
    out_chn = Conv2D(nc,
                     kernel_size=3,
                     kernel_regularizer=regularizers.l2(w_l2),
                     kernel_initializer=conv_init,
                     use_bias=False,
                     padding="same")(out_chn)
    out_chn = Activation("relu")(out_chn)
    out_chn = normalization(out_chn, norm, nc)

    out = add([out_pam, out_chn])
    return out
Esempio n. 7
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss