Example #1
0
def batch_gpu_nms(dets, thresh):
    """
    Implement NMS in gpu
    :param dets: (Batch, N, 5)bbox and score
    :param thresh: (1) the NMS algorithm thresh
    :return:
        keep_idx: list of keeping index
        keep:(N) 0-1 mask
    """
    scores = dets[:, 4]
    order = scores.sort(0, descending=True)

    batch_size = dets.shape[0].item()
    keep_idx = []
    keep = torch.zeros(batch_size)
    boxs = dets[:, :4]

    for idx in order[1]:
        if scores[idx] != 0:

            box_idx = boxs[idx]
            box_idx = torch.stack([box_idx] * batch_size, dim=0)
            _iou = iou(box_idx, boxs)

            scores = torch.where(_iou > thresh,
                                 torch.zeros_like(scores).cuda(), scores)

            keep_idx.append(idx)
            keep[idx] = 1

    return keep_idx, keep
Example #2
0
def gpu_nms(dets, thresh, confidence=0.4):
    """
    Implement NMS in gpu
    :param dets: (N, 5)bbox and score
    :param thresh: (1) the NMS algorithm thresh
    :return:
        keep_idx: list of keeping index
        keep:(N) 0-1 mask
    """
    scores = dets[:, 4]
    order = scores.sort(0, descending=True)

    keep_idx = []
    keep = torch.zeros(scores.shape)
    boxs = dets[:, :4]

    scores_keep = torch.stack([order[0], order[1].float()], dim=1)
    scores_keep = scores_keep[scores_keep[:, 0] > confidence, :]
    boxs_keep = boxs[scores_keep[:, 1].long(), :]

    boxs_keep, scores_keep = delect_box(boxs_keep, scores_keep)

    while scores_keep.shape[0] > 0:
        idx = scores_keep[0, 1].long()
        box = boxs[idx]

        _iou = iou(torch.stack([box] * scores_keep.shape[0], dim=0), boxs_keep)

        scores_keep = scores_keep[_iou < thresh, :]
        boxs_keep = boxs_keep[_iou < thresh, :]

        keep_idx.append(idx)
        keep[idx] = 1

    return keep_idx, keep
Example #3
0
def create_models(backbone_retinanet,
                  num_classes,
                  weights,
                  num_gpus=0,
                  freeze_backbone=False,
                  lr=1e-5,
                  config=None):
    """
    Creates three models (model, training_model, prediction_model).

    Args
        backbone_retinanet: A function to call to create a retinanet model with a given backbone.
        num_classes: The number of classes to train.
        weights: The weights to load into the model.
        num_gpus: The number of GPUs to use for training.
        freeze_backbone: If True, disables learning for the backbone.
        config: Config parameters, None indicates the default configuration.

    Returns
        model: The base model. This is also the model that is saved in snapshots.
        training_model: The training model. If num_gpus=0, this is identical to model.
        prediction_model: The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
    """

    modifier = freeze_model if freeze_backbone else None

    # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
    # optionally wrap in a parallel model
    if num_gpus > 1:
        from keras.utils import multi_gpu_model
        with tf.device('/cpu:0'):
            model = model_with_weights(backbone_retinanet(num_classes,
                                                          modifier=modifier),
                                       weights=weights,
                                       skip_mismatch=True)
        training_model = multi_gpu_model(model, gpus=num_gpus)
    else:
        model = model_with_weights(backbone_retinanet(num_classes,
                                                      modifier=modifier),
                                   weights=weights,
                                   skip_mismatch=True)
        training_model = model

    # make prediction model
    prediction_model = retinanet_bbox(model=model)

    # compile model
    training_model.compile(loss={
        'regression': losses.iou(),
        'classification': losses.focal(),
        'centerness': losses.bce(),
    },
                           optimizer=keras.optimizers.adam(lr=lr))

    return model, training_model, prediction_model
Example #4
0
    def __init__(self, backbone):
        # a dictionary mapping custom layer names to the correct classes
        self.custom_objects = {
            'UpsampleLike': layers.UpsampleLike,
            'PriorProbability': initializers.PriorProbability,
            'RegressBoxes': layers.RegressBoxes,
            'FilterDetections': layers.FilterDetections,
            'Anchors': layers.Anchors,
            'ClipBoxes': layers.ClipBoxes,
            '_focal': losses.focal(),
            'bce_': losses.bce(),
            'iou_': losses.iou(),
        }

        self.backbone = backbone
        self.validate()
Example #5
0
    def train(self, epochs, backbone_name, evaluation):
        #Compile model
        self.model.compile(
            loss={
                'regression': losses.iou(),
                'classification': losses.focal(),
                'centerness': losses.bce(),
            },
            optimizer=keras.optimizers.adam(lr=1e-5)
            # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True)
        )
        # create the generators
        train_generator, validation_generator = create_generators(
            self.config, self.dataset)

        # create the callbacks
        callbacks = create_callbacks(
            self.config,
            backbone_name,
            self.model,
            self.training_model,
            self.prediction_model,
            validation_generator,
            evaluation,
            self.log_dir,
        )
        # start training
        return self.training_model.fit_generator(
            generator=train_generator,
            initial_epoch=0,
            steps_per_epoch=self.config.STEPS_PER_EPOCH,
            epochs=epochs,
            verbose=1,
            callbacks=callbacks,
            max_queue_size=10,
            validation_data=validation_generator)
Example #6
0
def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # make sure keras is the minimum required version
    check_keras_version()

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generators
    train_generator, validation_generator = create_generators(
        args, backbone.preprocess_image)

    # create the model
    if args.snapshot is not None:
        print('Loading model, this may take a second...')
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        anchor_params = None
        if args.config and 'anchor_parameters' in args.config:
            anchor_params = parse_anchor_parameters(args.config)
        prediction_model = retinanet_bbox(model=model,
                                          anchor_params=anchor_params)
        # compile model
        training_model.compile(
            loss={
                'regression': losses.iou(),
                'classification': losses.focal(),
                'centerness': losses.bce(),
            },
            optimizer=keras.optimizers.adam(lr=1e-5)
            # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True)
        )
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        print('Creating model, this may take a second...')
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=train_generator.num_classes(),
            weights=weights,
            num_gpus=args.num_gpus,
            freeze_backbone=args.freeze_backbone,
            lr=args.lr,
            config=args.config)

    # print model summary
    # print(model.summary())

    # this lets the generator compute backbone layer shapes using the actual backbone model
    if 'vgg' in args.backbone or 'densenet' in args.backbone:
        train_generator.compute_shapes = make_shapes_callback(model)
        if validation_generator:
            validation_generator.compute_shapes = train_generator.compute_shapes

    # create the callbacks
    callbacks = create_callbacks(
        model,
        training_model,
        prediction_model,
        validation_generator,
        args,
    )

    if not args.compute_val_loss:
        validation_generator = None

    # start training
    return training_model.fit_generator(
        generator=train_generator,
        initial_epoch=0,
        steps_per_epoch=args.steps,
        epochs=args.epochs,
        verbose=1,
        callbacks=callbacks,
        workers=args.workers,
        use_multiprocessing=args.multiprocessing,
        max_queue_size=args.max_queue_size,
        validation_data=validation_generator)
Example #7
0
def level_select(cls_pred, regr_pred, gt_boxes, feature_shapes, strides, pos_scale=0.2):
    """

    Args:
        cls_pred: (sum(fh * fw), num_classes)
        regr_pred:  (sum(fh * fw), 4)
        gt_boxes:  (MAX_NUM_GT_BOXES, 5)
        feature_shapes: (5, 2)
        strides:
        pos_scale:

    Returns:

    """
    gt_labels = tf.cast(gt_boxes[:, 4], tf.int32)
    gt_boxes = gt_boxes[:, :4]
    focal_loss = focal()
    iou_loss = iou()
    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes)
    num_gt_boxes = tf.shape(gt_boxes)[0]
    gt_labels = tf.boolean_mask(gt_labels, non_zeros)
    level_losses = []
    for level_id in range(len(strides)):
        stride = strides[level_id]
        fh = feature_shapes[level_id][0]
        fw = feature_shapes[level_id][1]
        fa = tf.reduce_prod(feature_shapes, axis=-1)
        start_idx = tf.reduce_sum(fa[:level_id])
        end_idx = start_idx + fh * fw
        cls_pred_i = tf.reshape(cls_pred[start_idx:end_idx], (fh, fw, tf.shape(cls_pred)[-1]))
        regr_pred_i = tf.reshape(regr_pred[start_idx:end_idx], (fh, fw, tf.shape(regr_pred)[-1]))
        proj_boxes = gt_boxes / stride
        x1, y1, x2, y2 = prop_box_graph(proj_boxes, pos_scale, fw, fh)

        def compute_gt_box_loss(args):
            x1_ = args[0]
            y1_ = args[1]
            x2_ = args[2]
            y2_ = args[3]
            gt_box = args[4]
            gt_label = args[5]
            locs_cls_pred_i = cls_pred_i[y1_:y2_, x1_:x2_, :]
            locs_cls_pred_i = tf.reshape(locs_cls_pred_i, (-1, tf.shape(locs_cls_pred_i)[-1]))
            locs_cls_true_i = tf.zeros_like(locs_cls_pred_i)
            gt_label_col = tf.ones_like(locs_cls_true_i[:, 0:1])
            locs_cls_true_i = tf.concat([locs_cls_true_i[:, :gt_label],
                                         gt_label_col,
                                         locs_cls_true_i[:, gt_label + 1:],
                                         ], axis=-1)
            loss_cls = focal_loss(K.expand_dims(locs_cls_true_i, axis=0), K.expand_dims(locs_cls_pred_i, axis=0))
            locs_regr_pred_i = regr_pred_i[y1_:y2_, x1_:x2_, :]
            locs_regr_pred_i = tf.reshape(locs_regr_pred_i, (-1, tf.shape(locs_regr_pred_i)[-1]))

            locs_x = K.arange(x1_, x2_, dtype=tf.float32)
            locs_y = K.arange(y1_, y2_, dtype=tf.float32)
            shift_x = (locs_x + 0.5) * stride
            shift_y = (locs_y + 0.5) * stride
            shift_xx, shift_yy = tf.meshgrid(shift_x, shift_y)
            shift_xx = tf.reshape(shift_xx, (-1,))
            shift_yy = tf.reshape(shift_yy, (-1,))
            shifts = K.stack((shift_xx, shift_yy, shift_xx, shift_yy), axis=-1)
            l = tf.maximum(shifts[:, 0] - gt_box[0], 0)
            t = tf.maximum(shifts[:, 1] - gt_box[1], 0)
            r = tf.maximum(gt_box[2] - shifts[:, 2], 0)
            b = tf.maximum(gt_box[3] - shifts[:, 3], 0)
            locs_regr_true_i = tf.stack([l, t, r, b], axis=-1)
            locs_regr_true_i /= 4.0
            loss_regr = iou_loss(K.expand_dims(locs_regr_true_i, axis=0), K.expand_dims(locs_regr_pred_i, axis=0))
            return loss_cls + loss_regr

        level_loss = tf.map_fn(
            compute_gt_box_loss,
            elems=[x1, y1, x2, y2, gt_boxes, gt_labels],
            dtype=tf.float32
        )
        level_losses.append(level_loss)
    losses = tf.stack(level_losses, axis=-1)
    gt_box_levels = tf.argmin(losses, axis=-1)
    padding_gt_box_levels = tf.ones((MAX_NUM_GT_BOXES - num_gt_boxes), dtype=tf.int64) * -1
    gt_box_levels = tf.concat([gt_box_levels, padding_gt_box_levels], axis=0)
    return gt_box_levels
Example #8
0
    for (imgs, labels, _) in tqdm(train_loader):

        if args.cuda:
            imgs, labels = imgs.cuda(), labels.cuda()
        optim.zero_grad()
        out = model(imgs)

        batch_train_loss = loss(out, labels)

        if args.cuda:
            out = out.cpu()
            labels = labels.cpu()
            batch_train_loss = batch_train_loss.cpu()

        batch_train_iou = iou(out, labels, threshold=0.5, activation="sigmoid")

        train_loss += batch_train_loss.item()
        train_iou += batch_train_iou.item()

        batch_train_loss.backward()
        optim.step()

    scheduler.step()

    if epoch % Tres == 0:
        Tres = Tmax * Tmult + Tres
        Tmax = Tmax * Tmult
        optim.param_groups[0]['lr'] = lr
        scheduler = CosineAnnealingLR(optim, Tmax, eta_min=0.003)
Example #9
0
def build_meta_select_target(cls_pred,
                             regr_pred,
                             gt_boxes,
                             feature_shapes,
                             strides,
                             shrink_ratio=0.2):
    gt_labels = tf.cast(gt_boxes[:, 4], tf.int32)
    gt_boxes = gt_boxes[:, :4]
    max_gt_boxes = tf.shape(gt_boxes)[0]
    focal_loss = focal()
    iou_loss = iou()
    gt_boxes, non_zeros = trim_padding_boxes(gt_boxes)
    num_gt_boxes = tf.shape(gt_boxes)[0]
    gt_labels = tf.boolean_mask(gt_labels, non_zeros)
    level_losses = []
    for level_id in range(len(strides)):
        stride = strides[level_id]
        fh = feature_shapes[level_id][0]
        fw = feature_shapes[level_id][1]
        fa = tf.reduce_prod(feature_shapes, axis=-1)
        start_idx = tf.reduce_sum(fa[:level_id])
        end_idx = start_idx + fh * fw
        cls_pred_i = tf.reshape(cls_pred[start_idx:end_idx],
                                (fh, fw, tf.shape(cls_pred)[-1]))
        regr_pred_i = tf.reshape(regr_pred[start_idx:end_idx],
                                 (fh, fw, tf.shape(regr_pred)[-1]))
        # (num_gt_boxes, )
        x1, y1, x2, y2 = shrink_and_project_boxes(gt_boxes,
                                                  fw,
                                                  fh,
                                                  stride,
                                                  shrink_ratio=shrink_ratio)

        def compute_gt_box_loss(args):
            x1_ = args[0]
            y1_ = args[1]
            x2_ = args[2]
            y2_ = args[3]
            gt_box = args[4]
            gt_label = args[5]

            def do_match_pixels_in_level():
                locs_cls_pred_i = cls_pred_i[y1_:y2_, x1_:x2_, :]
                locs_cls_pred_i = tf.reshape(
                    locs_cls_pred_i, (-1, tf.shape(locs_cls_pred_i)[-1]))
                locs_cls_true_i = tf.zeros_like(locs_cls_pred_i)
                gt_label_col = tf.ones_like(locs_cls_true_i[:, 0:1])
                locs_cls_true_i = tf.concat([
                    locs_cls_true_i[:, :gt_label],
                    gt_label_col,
                    locs_cls_true_i[:, gt_label + 1:],
                ],
                                            axis=-1)
                loss_cls = focal_loss(tf.expand_dims(locs_cls_true_i, axis=0),
                                      tf.expand_dims(locs_cls_pred_i, axis=0))
                locs_regr_pred_i = regr_pred_i[y1_:y2_, x1_:x2_, :]
                locs_regr_pred_i = tf.reshape(
                    locs_regr_pred_i, (-1, tf.shape(locs_regr_pred_i)[-1]))
                locs_x = tf.cast(tf.range(x1_, x2_), dtype=tf.float32)
                locs_y = tf.cast(tf.range(y1_, y2_), dtype=tf.float32)
                shift_x = (locs_x + 0.5) * stride
                shift_y = (locs_y + 0.5) * stride
                shift_xx, shift_yy = tf.meshgrid(shift_x, shift_y)
                shift_xx = tf.reshape(shift_xx, (-1, ))
                shift_yy = tf.reshape(shift_yy, (-1, ))
                shifts = tf.stack((shift_xx, shift_yy, shift_xx, shift_yy),
                                  axis=-1)
                l = tf.maximum(shifts[:, 0] - gt_box[0], 0)
                t = tf.maximum(shifts[:, 1] - gt_box[1], 0)
                r = tf.maximum(gt_box[2] - shifts[:, 2], 0)
                b = tf.maximum(gt_box[3] - shifts[:, 3], 0)
                locs_regr_true_i = tf.stack([l, t, r, b], axis=-1)
                locs_regr_true_i = locs_regr_true_i / 4.0 / stride
                loss_regr = iou_loss(tf.expand_dims(locs_regr_true_i, axis=0),
                                     tf.expand_dims(locs_regr_pred_i, axis=0))
                return loss_cls + loss_regr

            def do_not_match_pixels_in_level():
                box_loss = tf.constant(1e7, dtype=tf.float32)
                return box_loss

            level_box_loss = tf.cond(
                tf.equal(tf.cast(x1_, tf.int32), tf.cast(x2_, tf.int32))
                | tf.equal(tf.cast(y1_, tf.int32), tf.cast(y2_, tf.int32)),
                do_not_match_pixels_in_level, do_match_pixels_in_level)
            return level_box_loss

        level_loss = tf.map_fn(compute_gt_box_loss,
                               elems=[x1, y1, x2, y2, gt_boxes, gt_labels],
                               dtype=tf.float32)
        level_losses.append(level_loss)
    losses = tf.stack(level_losses, axis=-1)
    gt_box_levels = tf.argmin(losses, axis=-1, output_type=tf.int32)
    padding_gt_box_levels = tf.ones(
        (max_gt_boxes - num_gt_boxes), dtype=tf.int32) * -1
    gt_box_levels = tf.concat([gt_box_levels, padding_gt_box_levels], axis=0)
    return gt_box_levels