Example #1
0
def fcn_upsample(small,
                 big,
                 ksize=[4, 4],
                 strides=[2, 2],
                 padding='SAME',
                 name=None,
                 outputs_collections=None):
    """
    the upsample block for fcn, the specific strategy is :
        1. [1,1] conv to reduce big's channels so that channels match
        2. trans_conv to recover small's resolution so that resolution match
    :param small: low resolution feature
    :param big: high resolution feature
    :param ksize: trans_conv kernel size
    :param strides: trans_conv kernel stride
    :param padding: trans_conv kernel padding mode
    :param name: name for this op
    :param outputs_collections: add this op's output to outputs_collections
    :return:
    """
    # trans_conv small to big size
    with tf.variable_scope(name, 'fcn_upsample'):
        outc = tensor_shape(small)[-1]
        big = conv2d(big, outc, ksize=[1, 1], activate=None, name='score_conv')
        big_shape = tensor_shape(big)
        big_dim = big_shape[-1]
        trans_conv = trans_conv2d(small,
                                  outc=big_dim,
                                  ksize=ksize,
                                  output_shape=big_shape,
                                  strides=strides,
                                  padding=padding)
        summary = trans_conv + big
    tf.add_to_collection(outputs_collections, summary)
    return summary
Example #2
0
def draw_bbox(image, bboxes):
    if tensor_shape(image) == 3:
        image = tf.expand_dims(image, axis=1)
    if tensor_shape(bboxes) == 2:
        bboxes = tf.expand_dims(bboxes, axis=1)
    after = tf.image.draw_bounding_boxes(image, bboxes)
    return after
def _yolo_detection_loss(locations,
                         scores,
                         encode_locations,
                         encode_labels,
                         encode_ious,
                         pos_th,
                         background_label=0,
                         alpha=[1.0, 5.0, 1.0, 1.0]):
    """
    Calculate loss for one layer,
    encode_labels corresponds to the GT box with highest iou, but this iou can be less than neg_th!
    so need to process and create new labels !
    :param locations: predicted locations [N, H, W, K, 4 ]
    :param scores: predicted scores [N, H, W, K, 21]
    :param encode_locations: [N, H, W, K, 4]
    :param encode_labels: [N, H, W, K]
    :param encode_ious: [N, H, W, K]
    :return:
    """
    positive_mask = tf.logical_and(encode_labels != background_label,
                                   encode_ious > pos_th)
    positive_mask = tf.cast(positive_mask, tf.float32)
    tf.add_to_collection('positive_nums', tf.reduce_sum(positive_mask))
    num_classes = tensor_shape(scores)[-1]
    batch_size = tensor_shape(locations)[0]

    with tf.name_scope('classes_loss'):
        classes_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(
            tf.one_hot(encode_labels, depth=num_classes), dtype=tf.float32),
                                                               logits=scores)
        classes_loss = classes_loss[..., 1:]
        classes_loss = tf.reduce_sum(classes_loss, axis=-1)
        classes_loss = alpha[0] * tf.reduce_sum(
            classes_loss * positive_mask) / batch_size
        tf.add_to_collection(tf.GraphKeys.LOSSES, classes_loss)

    with tf.name_scope('objectness_loss'):
        # use negative background as objectness
        object_scores = scores[..., background_label]
        tf.add_to_collection('objectness', object_scores)
        object_losses = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=positive_mask, logits=object_scores)
        is_obj_losses = alpha[1] * tf.reduce_sum(
            object_losses * positive_mask) / batch_size
        non_obj_losses = alpha[2] * tf.reduce_sum(
            object_losses * (1.0 - positive_mask)) / batch_size
        tf.add_to_collection(tf.GraphKeys.LOSSES, is_obj_losses)
        tf.add_to_collection(tf.GraphKeys.LOSSES, non_obj_losses)

    #
    with tf.name_scope('bbox_regression_loss'):
        bbox_loss = tf.square(locations - encode_locations)
        bbox_loss = tf.reduce_sum(bbox_loss, axis=-1)
        bbox_loss *= positive_mask
        bbox_loss = alpha[3] * tf.reduce_sum(bbox_loss) / batch_size
        tf.add_to_collection(tf.GraphKeys.LOSSES, bbox_loss)

    return classes_loss, is_obj_losses, non_obj_losses, bbox_loss
Example #4
0
def mAP(tensor1, tensor2):
    shape1 = tensor_shape(tensor1)
    shape2 = tensor_shape(tensor2)
    type1 = tensor1.dtype
    type2 = tensor2.dtype

    assert shape1 == shape2
    assert type1 == type2

    equal = tf.cast(tf.equal(tensor1, tensor2), tf.float32)
    acc = tf.reduce_mean(equal, name='mAP')
    return acc
def layers_loss_new(prediction_gathers,
                    encoding_gathers,
                    pos_th=0.5,
                    neg_th=0.3,
                    neg_ratio=3,
                    alpha=[1.0, 1.0, 1.0],
                    HNM=False):
    gather_pred_locations, gather_pred_scores = prediction_gathers
    gather_truth_locations, gather_truth_labels, gather_truth_ious = encoding_gathers

    concat_pred_locations = []
    concat_pred_scores = []
    concat_truth_locations = []
    concat_truth_labels = []
    concat_truth_ious = []

    batch_size = tensor_shape(gather_pred_scores[0])[0]
    num_classes = tensor_shape(gather_pred_scores[0])[-1]
    for idx in range(len(gather_pred_locations)):
        concat_pred_locations.append(
            tf.reshape(gather_pred_locations[idx], shape=[-1, 4]))
        concat_pred_scores.append(
            tf.reshape(gather_pred_scores[idx], shape=[-1, num_classes]))
        concat_truth_locations.append(
            tf.reshape(gather_truth_locations[idx], shape=[-1, 4]))
        concat_truth_labels.append(
            tf.reshape(gather_truth_labels[idx], shape=[-1]))
        concat_truth_ious.append(tf.reshape(gather_truth_ious[idx],
                                            shape=[-1]))

    concat_pred_locations = tf.concat(concat_pred_locations, axis=0)
    concat_pred_scores = tf.concat(concat_pred_scores, axis=0)
    concat_truth_locations = tf.concat(concat_truth_locations, axis=0)
    concat_truth_labels = tf.concat(concat_truth_labels, axis=0)
    concat_truth_ious = tf.concat(concat_truth_ious, axis=0)

    pos_loss, neg_loss, bbox_loss = _layer_loss(
        locations=concat_pred_locations,
        scores=concat_pred_scores,
        encode_locations=concat_truth_locations,
        encode_labels=concat_truth_labels,
        encode_ious=concat_truth_ious,
        pos_th=pos_th,
        neg_th=neg_th,
        neg_ratio=neg_ratio,
        batch_size=batch_size,
        alpha=alpha,
        HNM=HNM)
    return [pos_loss], [neg_loss], [bbox_loss]
def compare(predictions, labels):
    if tensor_shape(predictions) != tensor_shape(labels):
        h, w = tensor_shape(labels)[1:3]
        predictions = tf.image.resize_nearest_neighbor(predictions, [h, w],
                                                       align_corners=True)

    same = tf.logical_or(tf.equal(predictions, labels), tf.equal(labels, 255))
    same = tf.cast(same, tf.int32)
    paint_ = tf.one_hot(same, depth=2, axis=-1, dtype=predictions.dtype)
    paint_ = tf.squeeze(tf.tensordot(paint_,
                                     tf.cast([[255, 0, 0], [0, 0, 0]],
                                             predictions.dtype),
                                     axes=[[-1], [0]]),
                        axis=3)
    return paint_
Example #7
0
def from_sem_to_boundary(anno, nrange=3):
    if tensor_rank(anno) == 3:
        anno = anno[tf.newaxis, ...]
    H, W = tensor_shape(anno)[1:3]

    # def generate_boundaries(anno):
    anno = tf.cast(anno, tf.int32)
    is_bound = tf.zeros_like(anno, dtype=tf.bool)

    for r in range(nrange):
        pad_anno = tf.pad(anno, [[0, 0], [r, r], [r, r], [0, 0]],
                          mode="SYMMETRIC")

        shifts = []
        for ridx in [-1 * r, 0, 1 * r]:
            for cidx in [-1 * r, 0, 1 * r]:
                trans_anno = pad_anno[:, (r + ridx):(H + r + ridx),
                                      (r + cidx):(W + r + cidx):]
                shifts.append(trans_anno)

        for shift in shifts:
            shift_boundary = tf.not_equal(shift, anno)
            is_bound = tf.logical_or(is_bound, shift_boundary)

    return is_bound
Example #8
0
def fcn_32(inputs,
           num_classes=21,
           weight_init=None,
           weight_reg=None,
           bias_init=tf.zeros_initializer,
           bias_reg=None,
           device='cpu'):
    image_shape = tensor_shape(inputs)

    with arg_scope(vgg_arg_scope()):
        fcn32, end_points = vgg_16(inputs,
                                   num_classes=num_classes,
                                   spatial_squeeze=False,
                                   fc_conv_padding='SAME')
    with tf.name_scope('upscale') as ns:
        end_points_collection = ns + '_end_points'
        with arg_scope(
                fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg,
                              device, end_points_collection)):
            # conv7 deconv and add with pool4 [jump = 16]
            fcn1 = trans_conv2d(fcn32,
                                outc=num_classes,
                                ksize=[64, 64],
                                strides=[32, 32],
                                output_shape=image_shape[:-1] + [num_classes],
                                name='to_1')

            print(tf.get_collection(end_points_collection))
            end_points.update(
                dict([(ep.name, ep)
                      for ep in tf.get_collection(end_points_collection)]))
        end_points[ns + '_to_1'] = fcn1
    return fcn1, end_points
Example #9
0
def _layer_prediction(feature_map, num_anchors, conv_params, num_classes, scope=None):
    """
    For each location in feature map, predict 4*num_anchors locations and num_classes objectness
    :param feature_map: [None, H, W, C]
    :param scope:
    :return: locations with shape [None, H, W, num_anchors, 4]
             scores with shape [None, H, W, num_anchors, num_classes]
    """
    with tf.variable_scope(scope, 'feature2bbox'):
        # TODO : CHECK ACTIVATION FUNC HERE

        with slim.arg_scope([conv2d],
                            activation_fn=None,
                            normalizer_fn=None,
                            **conv_params):
            locations = conv2d(feature_map,
                               kernel_size=3,
                               num_outputs=num_anchors * 4,
                               scope='conv_loc')

            scores = conv2d(feature_map,
                            kernel_size=3,
                            num_outputs=num_anchors * num_classes,
                            scope='conv_obj')

        partial_shape = (tensor_shape(locations))[1:-1]

        locations = tf.reshape(locations, shape=[-1] + partial_shape + [num_anchors, 4])
        scores = tf.reshape(scores, shape=[-1] + partial_shape + [num_anchors, num_classes])

        return locations, scores
def soft_nms(scores, bboxes, max_output_size, sigma=0.5):
    def gaussian_decay(score, degree, sigma=1.0):
        return score * tf.exp(-degree**2 / sigma)

    bboxes_num = tensor_shape(scores)[0]
    loop_times = min(bboxes_num, max_output_size)
    is_select = tf.zeros(shape=[bboxes_num], dtype=tf.float32)

    def condition(i, scores, is_select):
        return tf.less(i, loop_times)

    def main_body(i, scores, is_select):
        idx = tf.argmax(scores * (1 - is_select))
        # mark idx as one
        is_select = is_select + \
                    tf.cast(tf.one_hot(idx, bboxes_num), tf.float32)

        ious = iou(bboxes, tf.gather(bboxes, idx))
        decay_scores = gaussian_decay(scores, ious, sigma=sigma)

        scores = is_select * scores + (1 - is_select) * decay_scores
        return [i + 1, scores, is_select]

    i = 0
    [i, scores, is_select] = tf.while_loop(cond=condition,
                                           body=main_body,
                                           loop_vars=[i, scores, is_select])
    # [?,]
    idxes = tf.squeeze(tf.where(is_select > 0), axis=-1)
    sorted_scores, sorted_idx = tf.nn.top_k(tf.gather(scores, idxes),
                                            k=loop_times)
    sorted_bbox = tf.gather(bboxes, tf.gather(idxes, sorted_idx))

    return sorted_scores, sorted_bbox
Example #11
0
def multi_scale_loss(logits_pyramids,
                     labels,
                     loss_func,
                     resize_labels=False,
                     loss_func_args=None):
    _, label_h, label_w, _ = tensor_shape(labels)
    loss_pyramids = []
    for logits in logits_pyramids:
        _, h, w, _ = tensor_shape(logits)
        if resize_labels:
            resized_labels = tf.image.resize_nearest_neighbor(
                labels, size=[h, w], align_corners=True)
            loss = loss_func(logits, resized_labels, **loss_func_args)
        else:
            resized_logits = tf.image.resize_bilinear(logits,
                                                      size=[label_h, label_w],
                                                      align_corners=True)
            loss = loss_func(resized_logits, labels, **loss_func_args)
        loss_pyramids.append(loss)
    return loss_pyramids
Example #12
0
def fcn_8(inputs,
          num_classes=21,
          weight_init=None,
          weight_reg=None,
          bias_init=tf.zeros_initializer,
          bias_reg=None,
          device='cpu'):
    image_shape = tensor_shape(inputs)

    with arg_scope(
            vgg_arg_scope(weight_init,
                          weight_reg,
                          bias_init,
                          bias_reg,
                          device=device)):
        fcn32, end_points = vgg_16(inputs,
                                   num_classes=num_classes,
                                   spatial_squeeze=False,
                                   fc_conv_padding='SAME')
    with tf.name_scope('upscale') as ns:
        end_points_collection = ns + '_end_points'
        with arg_scope(
                fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg,
                              device, end_points_collection)):
            # conv7 deconv and add with pool4 [jump = 16]
            pool4 = end_points['vgg_16/pool4:0']
            fcn16 = fcn_upsample(fcn32, pool4, ksize=[4, 4], name='to_16')

            pool3 = end_points['vgg_16/pool3:0']
            fcn8 = fcn_upsample(fcn16, pool3, ksize=[4, 4], name='to_8')

            fcn1 = trans_conv2d(fcn8,
                                outc=num_classes,
                                ksize=[16, 16],
                                strides=[8, 8],
                                output_shape=image_shape[:-1] + [num_classes],
                                name='trans_conv/to_1')

            # print(tf.get_collection(end_points_collection))
            end_points.update(
                dict([(ep.name, ep)
                      for ep in tf.get_collection(end_points_collection)]))
        end_points[ns + '_to_32'] = fcn32
        end_points[ns + '_to_16'] = fcn16
        end_points[ns + '_to_8'] = fcn8
        end_points[ns + '_to_1'] = fcn1

    return fcn1, end_points
def gaussian_blur(img, kernel_size=5, sigma=1.0):
    """
    :param img: [H, W, C] or [N, H, W, C]
    :param kernel_size:
    :param sigma:
    :return:
    """
    if type(kernel_size) is list:
        kernel_size = tf.random_shuffle(kernel_size)[0]

    tf.add_to_collection('kernel_size', kernel_size)
    squeeze = False
    if tensor_rank(img) == 3:
        img = tf.expand_dims(img, axis=0)
        squeeze = True

    # generate gaussian kernel
    g_r = tf.range(kernel_size)

    kernel_size = tf.cast(kernel_size, tf.float32)
    g_r = tf.cast(g_r, tf.float32)

    if sigma is None:
        # https://docs.opencv.org/3.1.0/d4/d86/group__imgproc__filter.html#gac05a120c1ae92a6060dd0db190a61afa
        sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8

    g_r = tf.exp(-1.0 * (g_r - 0.5 * (kernel_size - 1))**2 / (2.0 * sigma**2))
    g_r = g_r / tf.reduce_sum(g_r)
    g_2d = g_r[tf.newaxis, ...] * g_r[..., tf.newaxis]
    g_2d = g_2d[..., tf.newaxis, tf.newaxis]

    kernel_size = tf.cast(kernel_size, tf.int32)
    f = lambda x: tf.nn.conv2d(
        same_padding(x, [kernel_size, kernel_size], [1, 1]),
        filter=g_2d,
        strides=[1, 1, 1, 1],
        padding='VALID',
    )

    blurs = []
    for i in range(tensor_shape(img)[-1]):
        blurs.append(f(img[..., i][..., tf.newaxis]))

    blur = tf.concat(blurs, axis=-1)
    if squeeze:
        blur = tf.squeeze(blur, axis=0)
    return blur
Example #14
0
def get_image_pyramids(images,
                       scales,
                       method=tf.image.ResizeMethod.BILINEAR,
                       align_corners=True):
    _, h, w, _ = tensor_shape(images)
    scales = sorted(scales)
    image_pyramids = []

    for scale in scales:
        scale_h, scale_w = int(ceil(scale * h)), int(ceil(scale * w))
        resize_image = tf.image.resize_images(
            images,
            size=[scale_h, scale_w],
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=align_corners)
        image_pyramids.append(resize_image)
    return image_pyramids
Example #15
0
File: ssd.py Project: Mooonside/SSD
def layers_anchors(end_points):
    """
    Gather anchors from layers
    :param end_points:
    :return:
    """
    ys, xs, hs, ws = [], [], [], []
    for idx, key in enumerate(default_params.feat_layers):
        layer = end_points[key]
        y, x, h, w = _layer_anchors(
            tensor_shape(layer)[1:-1], default_params.feat_steps[idx],
            default_params.anchor_scales[idx],
            default_params.anchor_scales[idx + 1],
            default_params.anchor_ratios[idx])
        ys.append(y)
        xs.append(x)
        hs.append(h)
        ws.append(w)
    return ys, xs, hs, ws
Example #16
0
def fcn_16(inputs,
           num_classes=21,
           weight_init=None,
           weight_reg=None,
           bias_init=tf.zeros_initializer,
           bias_reg=None):
    image_shape = tensor_shape(inputs)

    with arg_scope(vgg_arg_scope()):
        fcn32, end_points = vgg_16(inputs,
                                   num_classes=num_classes,
                                   spatial_squeeze=False,
                                   fc_conv_padding='SAME')
    prefix_name = list(end_points.keys())[0]
    prefix_name = prefix_name[:search('vgg_16', prefix_name).span()[0]]
    with tf.name_scope('upscale') as ns:
        end_points_collection = ns + '_end_points'
        with arg_scope(
                fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg,
                              end_points_collection)):
            # conv7 deconv and add with pool4 [jump = 16]
            pool4 = end_points[prefix_name + 'vgg_16/pool4:0']
            fcn16 = fcn_upsample(fcn32, pool4, ksize=[4, 4], name='to_16')

            fcn1 = trans_conv2d(fcn16,
                                outc=num_classes,
                                ksize=[32, 32],
                                strides=[16, 16],
                                output_shape=image_shape[:-1] + [num_classes],
                                name='to_1')

            print(tf.get_collection(end_points_collection))
            end_points.update(
                dict([(ep.name, ep)
                      for ep in tf.get_collection(end_points_collection)]))
        end_points[ns + '_to_32'] = fcn32
        end_points[ns + '_to_16'] = fcn16
        end_points[ns + '_to_1'] = fcn1

    return fcn1, end_points
def gaussian_edge(
        input,
        kernel=(3, 3),
        sigma=None,
        nearest=3,
        dtype=tf.float32
):
    """
    For each point in input, if input is larger than 0, assign a gaussian distribution around the point
    For multiple gaussian, take their maximum!

    :param input: [N, H, W] or [N, H, W, 1]
    :param kernel: [kh, kw]
    :param sigma: [sh, sw]
    :param dtype: output type
    :return:
    """
    assert input.dtype is tf.int32
    add_tail_axis = False
    if tensor_rank(input) == 4:
        if tensor_shape(input)[-1] == 1:
            input = input[..., 0]
            add_tail_axis = True

    #  sigma = 0.3\*((ksize-1)\*0.5 - 1) + 0.8
    if sigma is None:
        sigmax = 0.3 * (kernel[0] * 0.5 - 1) + 0.8
        sigmay = 0.3 * (kernel[1] * 0.5 - 1) + 0.8
        sigma = [sigmax, sigmay]

    edge = gaussian_edge_op(x=input,
                            T=dtype,
                            kernel=kernel,
                            sigma=sigma,
                            nearest=nearest)
    if add_tail_axis:
        edge = edge[..., tf.newaxis]

    return edge
Example #18
0
File: ssd.py Project: Mooonside/SSD
def _layer_prediction(feature_map,
                      num_anchors,
                      num_classes,
                      l2_norm=False,
                      name=None):
    """
    For each location in feature map, predict 4*num_anchors locations and num_classes objectness
    :param feature_map: [None, H, W, C]
    :param num_classes:
    :param name:
    :return: locations with shape [None, H, W, num_anchors, 4]
             scores with shape [None, H, W, num_anchors, num_classes]
    """
    with tf.variable_scope(name, 'feature2bbox'):
        if l2_norm:
            feature_map = l2_norm_1D(feature_map, scale=True)
        locations = conv2d(feature_map,
                           outc=4 * num_anchors,
                           ksize=[3, 3],
                           activate=None,
                           name='conv_loc')
        scores = conv2d(feature_map,
                        outc=num_anchors * num_classes,
                        ksize=[3, 3],
                        activate=None,
                        name='conv_cls')

        partial_shape = (tensor_shape(feature_map))[1:-1]

        locations = tf.reshape(locations,
                               shape=[-1] + partial_shape + [num_anchors, 4])
        scores = tf.reshape(scores,
                            shape=[-1] + partial_shape +
                            [num_anchors, num_classes])

        # batch size = 1 version
        locations = tf.squeeze(locations, axis=0)
        scores = tf.squeeze(scores, axis=0)
        return locations, scores
Example #19
0
def locate_boundary(labels):
    """ locate boundaries in labels
    todo: test this function
    :param labels: [N, H, W, C]
    :return: a bool tensor, true indicating boundaries
    """
    H, W = tensor_shape(labels)[1:3]
    pad = tf.pad(labels, [[0, 0], [0, 1], [0, 0], [0, 0]],
                 mode='REFLECT')[:, 1:, :, :]
    boundary = tf.equal(pad, labels)
    pad = tf.pad(labels, [[0, 0], [0, 0], [0, 1], [0, 0]],
                 mode='REFLECT')[:, :, 1:, :]
    boundary = tf.logical_or(boundary, tf.equal(pad, labels))

    expansions = tf.cast(
        tf.pad(labels, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT'),
        tf.bool)
    for xmove in [-1, 0, 1]:
        for ymove in [-1, 0, 1]:
            boundary = tf.logical_or(
                boundary, expansions[:, 1 + xmove:1 + xmove + H,
                                     1 + ymove:1 + ymove + W, :])
    return boundary
Example #20
0
def _get_logits(images,
                model_options,
                outputs_to_num_classes,
                weight_decay=0.0001,
                reuse=tf.AUTO_REUSE,
                is_training=False,
                fine_tune_batch_norm=False):
    """Gets the logits by atrous/image spatial pyramid pooling.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      outputs_to_logits: A map from output_type to logits.
    """
    features, end_points = _extract_features(
        images,
        model_options,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)
    print('ASPP FEATRUES', features)
    for i in end_points.keys():
        print(end_points[i])

    DEBUG_VARS.aspp_result = features
    if model_options.decoder_output_stride is not None:
        _, image_h, image_w, _ = tensor_shape(images)
        decoder_height = scale_dimension(
            image_h, 1.0 / model_options.decoder_output_stride)
        decoder_width = scale_dimension(
            image_w, 1.0 / model_options.decoder_output_stride)
        features = refine_by_decoder(features,
                                     end_points,
                                     decoder_height=decoder_height,
                                     decoder_width=decoder_width,
                                     decoder_use_separable_conv=model_options.
                                     decoder_use_separable_conv,
                                     model_variant=model_options.model_variant,
                                     weight_decay=weight_decay,
                                     reuse=reuse,
                                     is_training=is_training,
                                     fine_tune_batch_norm=fine_tune_batch_norm)

    outputs_to_logits = {}
    for output in sorted(outputs_to_num_classes):
        outputs_to_logits[output] = _get_branch_logits(
            features,
            outputs_to_num_classes[output],
            model_options.atrous_rates,
            aspp_with_batch_norm=model_options.aspp_with_batch_norm,
            kernel_size=model_options.logits_kernel_size,
            weight_decay=weight_decay,
            reuse=reuse,
            scope_suffix=output)

        outputs_to_logits['detection'] = _get_detection(
            end_points,
            num_classes=outputs_to_num_classes[output],
            weight_decay=weight_decay,
            reuse=reuse,
            scope_suffix='scale')

    return outputs_to_logits
def _layer_loss(locations,
                scores,
                encode_locations,
                encode_labels,
                encode_ious,
                pos_th,
                neg_th,
                neg_ratio,
                background_label=0,
                alpha=[1.0, 1.0, 1.0],
                HNM=False,
                batch_size=None):
    """
    Calculate loss for one layer,
    encode_labels corresponds to the GT box with highest iou, but this iou can be less than neg_th!
    so need to process and create new labels !
    :param locations: predicted locations [N, H, W, K, 4 ]
    :param scores: predicted scores [N, H, W, K, 21]
    :param encode_locations: [N, H, W, K, 4]
    :param encode_labels: [N, H, W, K]
    :param encode_ious: [N, H, W, K]
    :return:
    """
    positive_mask = encode_ious > pos_th

    # need to redefine the labels, those assgined to some class with iou < neg_th, should be assgined to background
    negative_mask = tf.logical_and(encode_ious <= neg_th,
                                   tf.logical_not(positive_mask))
    # background_label for negative and label for positive
    negative_labels = tf.where(
        negative_mask, background_label * tf.cast(negative_mask, tf.int32),
        encode_labels)
    # tf.add_to_collection('debug', negative_labels)
    if batch_size is None:
        batch_size = tensor_shape(locations)[0]

    if HNM:
        positive_num = tf.reduce_sum(tf.cast(positive_mask, tf.int32))
        # calculate background scores
        neg_scores = tf.nn.softmax(scores, axis=-1)[..., background_label]
        neg_scores = tf.where(
            negative_mask,
            neg_scores,
            # set positive ones's negative score to be 1, so that it won't be count in top_k
            1.0 - tf.cast(negative_mask, tf.float32))
        # solve #negative, add one so that neg_values has more than one value
        max_negative_num = tf.reduce_sum(tf.cast(negative_mask, tf.int32))

        negative_num = neg_ratio * positive_num + batch_size
        negative_num = tf.minimum(negative_num, max_negative_num)

        # Hard Negative Mining:
        # find those with lower background scores, but are indeed background!
        neg_values, _ = tf.nn.top_k(tf.reshape(-1.0 * neg_scores, [-1]),
                                    k=negative_num)
        negative_mask = tf.logical_and(negative_mask,
                                       neg_scores < -neg_values[-1])

    positive_mask = tf.cast(positive_mask, tf.float32)
    negative_mask = tf.cast(negative_mask, tf.float32)

    tf.add_to_collection(ZERO_NUMBER_PER_LAYER_SCOPE,
                         tf.reduce_sum(positive_mask * negative_mask))
    tf.add_to_collection(POS_NUMBER_PER_LAYER_SCOPE,
                         tf.reduce_sum(positive_mask))
    tf.add_to_collection(NEG_NUMBER_PER_LAYER_SCOPE,
                         tf.reduce_sum(negative_mask))

    with tf.name_scope('cross_entropy_loss'):
        with tf.name_scope('positive'):
            pos_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=scores, labels=encode_labels)
            pos_loss = tf.div(tf.reduce_sum(pos_loss * positive_mask),
                              batch_size)
            pos_loss *= alpha[0]
            tf.add_to_collection(tf.GraphKeys.LOSSES, pos_loss)

        with tf.name_scope('negative'):
            neg_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=scores, labels=negative_labels)
            neg_loss = tf.div(tf.reduce_sum(neg_loss * negative_mask),
                              batch_size)
            neg_loss *= alpha[1]
            tf.add_to_collection(tf.GraphKeys.LOSSES, neg_loss)

    with tf.name_scope('bbox_regression_loss'):
        bbox_loss = smooth_l1(locations - encode_locations)
        bbox_loss = tf.reduce_sum(bbox_loss, axis=-1)
        bbox_loss = tf.div(tf.reduce_sum(bbox_loss * positive_mask),
                           batch_size)
        bbox_loss *= alpha[2]
        tf.add_to_collection(tf.GraphKeys.LOSSES, bbox_loss)

    return pos_loss, neg_loss, bbox_loss
Example #22
0
def deform_conv2d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  rate=1,
                  padding='SAME',
                  activation_fn=tf.nn.relu,
                  deformable_group=1,
                  num_groups=1,
                  normalizer_fn=None,
                  weights_initializer=None,
                  weights_regularizer=None,
                  biases_initializer=tf.zeros_initializer,
                  biases_regularizer=None,
                  outputs_collections=None,
                  offsets_collections='offsets',
                  scope=None):
    assert num_outputs % num_groups == 0, print('outc % num_groups != 0')
    kernel_size = [kernel_size, kernel_size] if type(kernel_size) is int else kernel_size
    stride = [stride, stride] if type(stride) is int else stride
    rate = [rate, rate] if type(rate) is int else rate

    with tf.variable_scope(scope, 'deform_conv2d'):
        _, iH, iW, indim = tensor_shape(inputs)
        assert indim % num_groups == 0, print('indim % num_groups != 0')
        assert indim % deformable_group == 0, print('indim % deformable_group != 0')

        offsets = conv2d(
            inputs,
            num_outputs= kernel_size[0] * kernel_size[1] * 2 * deformable_group,
            kernel_size=kernel_size,
            stride=stride,
            rate=rate,
            padding=padding,
            normalizer_fn=None,
            activation_fn=None,
            # may be using zero initializer?
            # weight_init=tf.zeros_initializer,
            weights_initializer=weights_initializer,
            weights_regularizer=weights_regularizer,
            biases_initializer=tf.zeros_initializer,
            biases_regularizer=None,
            outputs_collections=offsets_collections,
            scope = 'conv_offsets'
        )
        offsets = tf.transpose(offsets, [0, 3, 1, 2])
        # TODO: MAYA
        offsets *= 0.0

        filters = tf.get_variable(name='weights',
                                  shape= kernel_size + [indim // num_groups, num_outputs],
                                  initializer=weights_initializer,
                                  regularizer=weights_regularizer)

        # transpose filters to required order
        # [outC, inC, ksize, ksize]
        filters = tf.transpose(filters, [3, 2, 0, 1])
        inputs = tf.transpose(inputs, [0, 3, 1, 2])
        conv = deform_conv_op.deform_conv_op(x=inputs,
                                             filter=filters,
                                             offset=offsets,
                                             strides=[1, 1] + stride,
                                             rates=[1, 1] + rate,
                                             num_groups=num_groups,
                                             padding=padding,
                                             deformable_group=deformable_group,
                                             name=scope)
        conv = tf.transpose(conv, [0, 2, 3, 1])

        # tf.add_to_collection(outputs_collections, conv)
        if normalizer_fn is not None:
            conv = normalizer_fn(conv)
        elif biases_initializer is not None:
            biases = tf.get_variable(name='biases',
                                     shape=[num_outputs],
                                     initializer=biases_initializer,
                                     regularizer=biases_regularizer,
                                     collections=BIAS_COLLECTIONS)
            conv = conv + biases

        if activation_fn is not None:
            conv = activation_fn(conv)

    tf.add_to_collection(outputs_collections, conv)
    return conv
Example #23
0
def deform_conv2d(inputs,
                  outc,
                  ksize,
                  strides=[1, 1],
                  ratios=[1, 1],
                  name=None,
                  padding='SAME',
                  activate=tf.nn.relu,
                  deformable_group=1,
                  num_groups=1,
                  batch_norm=True,
                  group_norm=False,
                  use_bias=None,
                  weight_init=None,
                  weight_reg=None,
                  bias_init=tf.zeros_initializer,
                  bias_reg=None,
                  offset_init=tf.zeros_initializer,
                  offset_reg=None,
                  outputs_collections=None,
                  offsets_collections='offsets'):
    """
    Wrapper for Conv layers
    :param inputs: [N, H, W, C]
    :param outc: output channels
    :param ksize: [hk, wk]
    :param strides: [hs, ws]
    :param ratios: [hr, wr]
    :param name: var_scope & operation name
    :param padding: padding mode
    :param activate: activate function
    :param batch_norm: whether performs batch norm
    :param use_bias: whether use bias addition
    :param weight_init: weight initializer
    :param weight_reg: weight regularizer
    :param bias_init: bias initializer
    :param bias_reg: bias regularizer
    :param outputs_collections: add result to some collection
    :return: convolution after activation
    """
    # can't use both
    if use_bias is None:
        use_bias = not batch_norm
    assert not (batch_norm and use_bias)
    assert outc % num_groups == 0, print('outc % num_groups != 0')

    with tf.variable_scope(name, 'deform_conv2d'):
        _, iH, iW, indim = tensor_shape(inputs)
        assert indim % num_groups == 0, print('indim % num_groups != 0')
        assert indim % deformable_group == 0, print(
            'indim % deformable_group != 0')

        # use num groups xixi
        filters = get_variable(name='weights',
                               shape=ksize + [indim // num_groups, outc],
                               init=weight_init,
                               reg=weight_reg,
                               collections=WEIGHT_COLLECTIONS)

        # use get_variable merely for debug!
        offsets = conv2d(
            inputs,
            outc=ksize[0] * ksize[1] * 2 * deformable_group,
            ksize=ksize,
            strides=strides,
            ratios=ratios,
            padding=padding,
            batch_norm=False,
            group_norm=False,
            use_bias=True,
            activate=None,
            name='conv_offsets',
            # may be using zero initializer?
            # weight_init=tf.zeros_initializer,
            weight_init=weight_init,
            weight_reg=weight_reg,
            bias_init=tf.zeros_initializer,
            bias_reg=None,
            outputs_collections=offsets_collections)
        offsets = tf.transpose(offsets, [0, 3, 1, 2])
        tf.add_to_collection('offsets', offsets)
        # transpose filters to required order
        # [outC, inC, ksize, ksize]
        filters = tf.transpose(filters, [3, 2, 0, 1])

        inputs = tf.transpose(inputs, [0, 3, 1, 2])
        conv = deform_conv_op.deform_conv_op(x=inputs,
                                             filter=filters,
                                             offset=offsets,
                                             strides=[1, 1] + strides,
                                             rates=[1, 1] + ratios,
                                             num_groups=num_groups,
                                             padding=padding,
                                             deformable_group=deformable_group,
                                             name=name)
        conv = tf.transpose(conv, [0, 2, 3, 1])

        # tf.add_to_collection(outputs_collections, conv)
        if batch_norm:
            conv = batch_norm2d(conv)
        elif group_norm:
            conv = GroupNorm2D(conv)
        elif use_bias:
            biases = get_variable(name='biases',
                                  shape=[outc],
                                  init=bias_init,
                                  reg=bias_reg,
                                  collections=BIAS_COLLECTIONS)
            conv = conv + biases

        if activate is not None:
            conv = activate(conv)

    tf.add_to_collection(outputs_collections, conv)
    return conv
Example #24
0
def _extract_features(images,
                      model_options,
                      weight_decay=0.0001,
                      reuse=tf.AUTO_REUSE,
                      is_training=False,
                      fine_tune_batch_norm=False):
    """Extracts features by the particular model_variant.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      concat_logits: A tensor of size [batch, feature_height, feature_width,
        feature_channels], where feature_height/feature_width are determined by
        the images height/width and output_stride.
      end_points: A dictionary from components of the network to the corresponding
        activation.
    """
    # feature extractor is a backbone factory
    DEBUG_VARS.raw_image = images
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    # TODO:check
    # DEBUG_VARS.xception_feature = end_points['xception_65/entry_flow/conv1_1/Relu:0']
    DEBUG_VARS.xception_feature = features
    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        batch_norm_params = {
            'is_training': is_training and fine_tune_batch_norm,
            'decay': 0.9997,
            'eps': 1e-5,
            'affine': True,
        }
        regularize_func = regularizer('l2', weight_decay)
        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
            with arg_scope([sep_conv2d],
                           activate=tf.nn.relu,
                           activate_middle=tf.nn.relu,
                           batch_norm=True,
                           depthwise_weight_reg=None,
                           pointwise_weight_reg=regularize_func,
                           padding='SAME',
                           strides=[1, 1]):
                with arg_scope([conv2d],
                               activate=tf.nn.relu,
                               weight_reg=regularize_func,
                               batch_norm=True,
                               padding='SAME',
                               strides=[1, 1]):
                    # TODO: ASPP IS IMPLEMENTED HERE! Check Out!
                    with arg_scope([batch_norm2d], **batch_norm_params):
                        depth = 256
                        branch_logits = []

                        # TODO: ADD IMAGE POOLING HERE
                        if model_options.add_image_level_feature:
                            # this crop size has been updated to the new scaled one outside, which is the exact size
                            # of this model's inputs
                            _, image_h, image_w, _ = tensor_shape(images)
                            pool_height = scale_dimension(
                                image_h, 1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_w, 1. / model_options.output_stride)
                            # global average pooling, check whether the shape here is 1?
                            image_feature = avg_pool2d(
                                features, [pool_height, pool_width],
                                [pool_height, pool_width],
                                padding='VALID')
                            # collapse channels to depth after GAP
                            image_feature = conv2d(inputs=image_feature,
                                                   outc=depth,
                                                   ksize=[1, 1],
                                                   name=_IMAGE_POOLING_SCOPE)
                            # TODO:check
                            DEBUG_VARS.image_feature = image_feature
                            # reshape it to final feature map shape
                            image_feature = tf.image.resize_bilinear(
                                image_feature, [pool_height, pool_width],
                                align_corners=True)
                            image_feature.set_shape(
                                [None, pool_height, pool_width, depth])
                            # add image level feature to branch_logits
                            branch_logits.append(image_feature)

                        # Employ a 1x1 convolution.
                        branch_logits.append(
                            conv2d(features,
                                   outc=depth,
                                   ksize=[1, 1],
                                   name=_ASPP_SCOPE + str(0)))

                        if model_options.atrous_rates:
                            # Employ 3x3 convolutions with different atrous rates.
                            DEBUG_VARS.aspp_features = []
                            for i, rate in enumerate(
                                    model_options.atrous_rates, 1):
                                scope = _ASPP_SCOPE + str(i)
                                if model_options.aspp_with_separable_conv:
                                    aspp_features = sep_conv2d(
                                        features,
                                        outc=depth,
                                        ksize=[3, 3],
                                        ratios=[rate, rate],
                                        name=scope)
                                    DEBUG_VARS.aspp_features.append(
                                        aspp_features)
                                else:
                                    aspp_features = conv2d(features,
                                                           outc=depth,
                                                           ksize=[3, 3],
                                                           ratios=[rate, rate],
                                                           name=scope)
                                branch_logits.append(aspp_features)

                        # Merge branch logits.
                        concat_logits = tf.concat(branch_logits, 3)
                        concat_logits = conv2d(inputs=concat_logits,
                                               outc=depth,
                                               ksize=[1, 1],
                                               name=_CONCAT_PROJECTION_SCOPE)
                        DEBUG_VARS.aspp_concat_feature = concat_logits
                        concat_logits = drop_out(
                            concat_logits,
                            kp_prob=0.9,
                            is_training=is_training,
                            name=_CONCAT_PROJECTION_SCOPE + '_dropout')

                        return concat_logits, end_points