Beispiel #1
0
    def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas,
                            im_info, _feat_stride, anchor_scales):
        """
        rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
        gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
        gt_ishard: (G, 1), 1 or 0 indicates difficult or not
        dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
        im_info: a list of [image_height, image_width, scale_ratios]
        _feat_stride: the downsampling ratio of feature map to the original input image
        anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
        ----------
        Returns
        ----------
        rpn_labels : (1, 1, HxA, W), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
        rpn_bbox_targets: (1, 4xA, H, W), distances of the anchors to the gt_boxes(may contains some transform)
                        that are the regression objectives
        rpn_bbox_inside_weights: (1, 4xA, H, W) weights of each boxes, mainly accepts hyper param in cfg
        rpn_bbox_outside_weights: (1, 4xA, H, W) used to balance the fg/bg,
        beacuse the numbers of bgs and fgs mays significiantly different
        """
        rpn_cls_score = rpn_cls_score.data.cpu().numpy()
        rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \
            anchor_target_layer_py(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales)

        rpn_labels = network.np_to_variable(rpn_labels,
                                            is_cuda=True,
                                            dtype=torch.LongTensor)
        rpn_bbox_targets = network.np_to_variable(rpn_bbox_targets,
                                                  is_cuda=True)
        rpn_bbox_inside_weights = network.np_to_variable(
            rpn_bbox_inside_weights, is_cuda=True)
        rpn_bbox_outside_weights = network.np_to_variable(
            rpn_bbox_outside_weights, is_cuda=True)

        return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #2
0
    def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas,
                              num_classes):
        """
        ----------
        rpn_rois:  (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int
        # gt_ishard: (G, 1) {0 | 1} 1 indicates hard
        dontcare_areas: (D, 4) [ x1, y1, x2, y2]
        num_classes
        ----------
        Returns
        ----------
        rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
        labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
        bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
        bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
        """
        rpn_rois = rpn_rois.data.cpu().numpy()
        rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
            proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes)
        # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
        rois = network.np_to_variable(rois, is_cuda=True)
        labels = network.np_to_variable(labels,
                                        is_cuda=True,
                                        dtype=torch.LongTensor)
        bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True)
        bbox_inside_weights = network.np_to_variable(bbox_inside_weights,
                                                     is_cuda=True)
        bbox_outside_weights = network.np_to_variable(bbox_outside_weights,
                                                      is_cuda=True)

        return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
Beispiel #3
0
    def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None):
        conv1s = self.conv1s(im_data)

        conv2 = self.conv2(conv1s)

        conv3 = self.conv3(conv2)

        conv1s_reorg = self.reorg(conv1s)
        cat_1_3 = torch.cat([conv1s_reorg, conv3], 1)

        conv4 = self.conv4(cat_1_3)
        conv5 = self.conv5(conv4)   # batch_size, out_channels, h, w

        # for detection
        # bsize, c, h, w -> bsize, h, w, c -> bsize, h x w, num_anchors, 5+num_classes
        bsize, _, h, w = conv5.size()
        # assert bsize == 1, 'detection only support one image per batch'
        conv5_reshaped = conv5.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5)

        # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
        xy_pred = F.sigmoid(conv5_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(conv5_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3)

        iou_pred = F.sigmoid(conv5_reshaped[:, :, :, 4:5])

        score_pred = conv5_reshaped[:, :, :, 5:].contiguous()
        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred)

        # for training
        if self.training:
            bbox_pred_np = bbox_pred.data.cpu().numpy()
            _boxes, _ious, _classes, _mask = self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare)
            _boxes = net_utils.np_to_variable(_boxes)
            _ious = net_utils.np_to_variable(_ious)
            _classes = net_utils.np_to_variable(_classes)
            _mask = net_utils.np_to_variable(_mask, dtype=torch.FloatTensor)
            num_boxes = torch.sum(_mask)

            bbox_mask = _mask.expand_as(_boxes)
            bbox_loss = F.smooth_l1_loss(bbox_mask * bbox_pred, bbox_mask * _boxes,
                                         size_average=False) / num_boxes

            iou_loss = nn.MSELoss()(iou_pred, _ious)

            cls_mask = _mask.expand_as(score_pred)
            cls_loss = nn.MSELoss(size_average=True)(prob_pred * cls_mask, _classes * cls_mask) / num_boxes
            # cls_loss = F.cross_entropy(score_pred.view(-1, score_pred.size()[-1]), _classes.view(-1))

            # print prob_pred.size(), _classes.size(), _mask.size()
            # cls_loss = nn.MSELoss()(prob_pred * _mask, _classes * _mask)
            # print num_boxes
            # print bbox_loss, iou_loss, cls_loss
            self.loss = 5. * bbox_loss + iou_loss + cls_loss

        return bbox_pred, iou_pred, prob_pred
Beispiel #4
0
    def process(self):
        while True:
            image, im_data = preprocess(self.camera)
            im_data = net_utils.np_to_variable(im_data,
                                               is_cuda=True,
                                               volatile=True).permute(
                                                   0, 3, 1, 2)
            bbox_pred, iou_pred, prob_pred = self.net(im_data)

            bbox_pred = bbox_pred.data.cpu().numpy()
            iou_pred = iou_pred.data.cpu().numpy()
            prob_pred = prob_pred.data.cpu().numpy()

            bboxes, scores, cls_inds = yolo_utils.postprocess(
                bbox_pred, iou_pred, prob_pred, image.shape, cfg, self.thresh)
            out = np.ones((1, 2)).astype('float32')

            for x in range(len(bboxes)):
                if cls_inds[x] == 14:
                    topleft = (bboxes[x][0], bboxes[x][1])
                    bottomright = (bboxes[x][2], bboxes[x][3])
                    conf = scores[x]
                    detect = True

                    diff = self._difference(topleft, bottomright)
                    area = self._area(topleft, bottomright)

                    out[0][0] = diff
                    out[0][0] /= self.center[0]
                    out[0][1] = area
                    out[0][1] /= self.max_area

                    return out
def train_batch(net, sample_batched, train_loss_epoch, coord_loss_epoch, conf_loss_epoch):

    net.train()
    batch = sample_batched[0]
    size_index = sample_batched[1]
    im = batch['image']
    gt_boxes = batch['gt_boxes']
    gt_classes = batch['gt_classes']
    gt_RT = batch['gt_RT']
    dontcare = batch['dontcare']

    # forward
    im_data = net_utils.np_to_variable(im,
                                       is_cuda=True,
                                       volatile=False).permute(0, 3, 1, 2)

    bbox_pred, conf_pred, score_pred = net(im_data)

    if args.mGPUs:
        coord_loss_var, conf_objloss_var, conf_noobjloss_var, cls_loss_var = net.module.loss(bbox_pred,
                                                                                             conf_pred, score_pred,
                                                                                             gt_boxes, gt_classes, gt_RT,
                                                                                             dontcare,
                                                                                             size_index)
    else:
        coord_loss_var, conf_objloss_var, conf_noobjloss_var, cls_loss_var = net.loss(bbox_pred, conf_pred,
                                                                                      score_pred,
                                                                                      gt_boxes, gt_classes, gt_RT, dontcare,
                                                                                      size_index)

    loss_var = cfg.lambda_coord * coord_loss_var + cfg.lambda_objconf * conf_objloss_var + \
               cfg.lambda_noobjconf * conf_noobjloss_var + cfg.lambda_class * cls_loss_var

    coord_loss_np = coord_loss_var.data.cpu().numpy()
    conf_objloss_np = conf_objloss_var.data.cpu().numpy()
    conf_noobjloss_np = conf_noobjloss_var.data.cpu().numpy()
    cls_loss_np = cls_loss_var.data.cpu().numpy()
    train_loss_np = loss_var.data.cpu().numpy()
    train_loss_epoch += train_loss_np
    coord_loss_epoch += coord_loss_np
    conf_loss_epoch += conf_objloss_np

    prob_pred = []

    for i in xrange(3):
        prob_pred.append(F.softmax(score_pred[i].view(-1, score_pred[i].size()[-1]), dim=1).view_as(score_pred[i]))

    vis_scaleid = randint(0, 2)
    ### for visualisation of predictions in tensorboard
    bbox_pred_np = bbox_pred[vis_scaleid].data[0:1].cpu().numpy()
    conf_pred_np = conf_pred[vis_scaleid].data[0:1].cpu().numpy()
    prob_pred_np = prob_pred[vis_scaleid].data[0:1].cpu().numpy()

    optimizer.zero_grad()
    loss_var.backward()

    torch.nn.utils.clip_grad_norm(net.parameters(), 5)
    optimizer.step()

    return bbox_pred_np, conf_pred_np, prob_pred_np, coord_loss_np, conf_objloss_np, conf_noobjloss_np, cls_loss_np, train_loss_np, train_loss_epoch, coord_loss_epoch, conf_loss_epoch
def main():

    trained_model = cfg.trained_model
    thresh = 0.5
    image_dir = '/home/cory/cedl/vid/videos/vid04'

    net = Darknet19()
    net_utils.load_net(trained_model, net)
    net.eval()
    net.cuda()
    print('load model successfully')
    print(net)

    image_extensions = ['.jpg', '.JPG', '.png', '.PNG']
    image_abs_paths = sorted([
        os.path.join(image_dir, name) for name in os.listdir(image_dir)
        if name[-4:] in image_extensions
    ])

    t_det = Timer()
    t_total = Timer()

    for i, image_path in enumerate(image_abs_paths):
        t_total.tic()
        image, im_data = preprocess(image_path)
        im_data = net_utils.np_to_variable(im_data,
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)
        t_det.tic()
        bbox_pred, iou_pred, prob_pred = net.forward(im_data)
        det_time = t_det.toc()
        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)

        im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds,
                                            cfg)

        if im2show.shape[0] > 1100:
            im2show = cv2.resize(im2show, (int(
                1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))
        cv2.imshow('test', im2show)

        total_time = t_total.toc()
        format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)'
        print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time,
                            total_time * 1000))

        t_det.clear()
        t_total.clear()

        key = cv2.waitKey(1)
        if key == ord('q'):
            break
Beispiel #7
0
    def getCarinfofromPic(self, content, method='nparray'):
        image, im_data = self.preprocess(content, method='nparray')
        im_data = net_utils.np_to_variable(im_data,
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)
        bbox_pred, iou_pred, prob_pred = self.net(im_data)

        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred,
                                                          iou_pred,
                                                          prob_pred,
                                                          image.shape,
                                                          cfg,
                                                          self.thresh,
                                                          size_index=0)

        roi = []
        for i in range(len(bboxes)):
            roiimage = image[bboxes[i][1]:bboxes[i][3],
                             bboxes[i][0]:bboxes[i][2]]
            roi.append(roiimage)

        return bboxes, scores, cls_inds, image, roi
Beispiel #8
0
    def loss(self, conf_pred, gt_conf):

        _confs = net_utils.np_to_variable(gt_conf, volatile=True)

        conf_loss = nn.MSELoss(size_average=False)(conf_pred,
                                                   _confs) / len(gt_conf)

        return conf_loss
Beispiel #9
0
 def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key,
                    _feat_stride, anchor_scales):
     rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
     rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()
     x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
                           cfg_key, _feat_stride, anchor_scales)
     x = network.np_to_variable(x, is_cuda=True)
     return x.view(-1, 5)
Beispiel #10
0
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False):
    #ci chu xiu gaile
    num_images = imdb.num_images
    #    num_images = 2
    ipdb.set_trace()
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    #    all_boxes = [[[] for _ in range(num_images)]
    #                 for _ in range(imdb.num_classes)]

    # timers
    #    _t = {'im_detect': Timer(), 'misc': Timer()}
    #    det_file = os.path.join(output_dir, 'detections.pkl')
    # =============================================================================
    size_index = 0
    # =============================================================================
    # =============================================================================
    #    change of me
    #     size_index = args.image_size_index
    # =============================================================================

    for i in range(num_images):

        batch = imdb.next_batch(size_index=size_index)

        ori_im = batch['origin_im'][0]
        im = batch['images']
        gt_boxes = batch['gt_boxes']
        gt_classes = batch['gt_classes']
        dontcare = batch['dontcare']

        im_data = net_utils.np_to_variable(im, is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        #        _t['im_detect'].tic()
        bbox_pred, iou_pred, prob_pred = net(im_data, gt_boxes, gt_classes,
                                             dontcare, size_index)

        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh,
            size_index)

        #        loss = net.loss
        bbox_loss = net.bbox_loss.data.cpu().numpy()[0]
        iou_loss = net.iou_loss.data.cpu().numpy()[0]
        cls_loss = net.cls_loss.data.cpu().numpy()[0]
        print('bbox_loss', bbox_loss)
        print('iou_loss', iou_loss)
        print('cls_loss', cls_loss)
Beispiel #11
0
def detect_image(cfg, image_path, net, thresh):
    image, im_data = preprocess(image_path, cfg['inp_size'])
    im_data = net_utils.np_to_variable(im_data, is_cuda=True,
                                       volatile=True).permute(0, 3, 1, 2)
    bbox_pred, iou_pred, prob_pred = net.forward(im_data)
    bbox_pred = bbox_pred.data.cpu().numpy()
    iou_pred = iou_pred.data.cpu().numpy()
    prob_pred = prob_pred.data.cpu().numpy()
    bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred,
                                                      prob_pred, image.shape,
                                                      cfg, thresh)
    return bboxes, cls_inds, image, scores
def loss_fxn(gt_boxes, gt_classes, dontcare, size_index, bbox_pred, iou_pred,
             prob_pred):

    bbox_pred_np = bbox_pred.data.cpu().numpy()
    iou_pred_np = iou_pred.data.cpu().numpy()
    # print('1')
    _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = build_target(
        bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index)
    # print('2')
    _boxes = net_utils.np_to_variable(_boxes)
    _ious = net_utils.np_to_variable(_ious)
    _classes = net_utils.np_to_variable(_classes)
    box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor)
    iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor)
    class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor)

    num_boxes = sum((len(boxes) for boxes in gt_boxes))
    # print(num_boxes, 'here are the number of boxes')
    # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
    box_mask = box_mask.expand_as(_boxes)

    bbox_loss = nn.L1Loss(size_average=False)(
        bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
    iou_loss = nn.L1Loss(size_average=False)(
        iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa

    class_mask = class_mask.expand_as(prob_pred)
    cls_loss = nn.CrossEntropyLoss(size_average=False)(
        prob_pred * class_mask,
        _classes * class_mask) / num_boxes  # noqa #Wrong, Cross entropy loss

    return bbox_loss, iou_loss, cls_loss
def training_target(cfg, bbox_pred, class_pred, labels, inp_size, iou_pred):
    # inp_size = (w, h)
    gt_boxes, gt_classes = restore_gt_numpy(labels)
    bbox_pred_np = bbox_pred.data.cpu().numpy()
    iou_pred_np = iou_pred.data.cpu().numpy()
    _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = _build_target(
        cfg, bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size)

    _boxes = net_utils.np_to_variable(_boxes)
    _ious = net_utils.np_to_variable(_ious)
    _classes = net_utils.np_to_variable(_classes)
    box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor)
    iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor)
    class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor)

    num_boxes = sum((len(boxes) for boxes in gt_boxes))
    box_mask = box_mask.expand_as(_boxes)
    class_mask = class_mask.expand_as(class_pred)

    bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask,
                                               _boxes * box_mask) / num_boxes
    iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask,
                                              _ious * iou_mask) / num_boxes
    class_loss = nn.MSELoss(size_average=False)(
        class_pred * class_mask, _classes * class_mask) / num_boxes
    return bbox_loss, iou_loss, class_loss
Beispiel #14
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes=None,
                gt_ishard=None,
                dontcare_areas=None):
        im_data = network.np_to_variable(im_data, is_cuda=True)
        im_data = im_data.permute(0, 3, 1, 2)
        features = self.features(im_data)

        rpn_conv1 = self.conv1(features)

        # rpn score
        rpn_cls_score = self.score_conv(rpn_conv1)
        rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2)
        rpn_cls_prob = F.softmax(rpn_cls_score_reshape)
        rpn_cls_prob_reshape = self.reshape_layer(
            rpn_cls_prob,
            len(self.anchor_scales) * 3 * 2)

        # rpn boxes
        rpn_bbox_pred = self.bbox_conv(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred,
                                   im_info, cfg_key, self._feat_stride,
                                   self.anchor_scales)

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None
            rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes,
                                                gt_ishard, dontcare_areas,
                                                im_info, self._feat_stride,
                                                self.anchor_scales)
            self.cross_entropy, self.loss_box = self.build_loss(
                rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)

        return features, rois
Beispiel #15
0
net.cuda()
net.eval()
print('load model succ...')

t_det = Timer()
t_total = Timer()
im_fnames = sorted((fname
                    for fname in os.listdir(im_path)
                    if os.path.splitext(fname)[-1] == '.jpg'))
im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
pool = Pool(processes=1)

for i, (image, im_data) in enumerate(pool.imap(
        preprocess, im_fnames, chunksize=1)):
    t_total.tic()
    im_data = net_utils.np_to_variable(
        im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
    t_det.tic()
    bbox_pred, iou_pred, prob_pred = net(im_data)
    det_time = t_det.toc()
    # to numpy
    bbox_pred = bbox_pred.data.cpu().numpy()
    iou_pred = iou_pred.data.cpu().numpy()
    prob_pred = prob_pred.data.cpu().numpy()

    # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

    bboxes, scores, cls_inds = yolo_utils.postprocess(
        bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)

    im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
Beispiel #16
0
def test_net(net2,
             imdb,
             dataloader,
             args,
             output_dir,
             size_index,
             batch_size,
             objpoints3D,
             corners_3d,
             vertices,
             iter_count=0,
             thresh=0.5,
             vis=True,
             verbose=True,
             summary=None):

    net2.eval()
    cv2.setNumThreads(1)
    test_loss = 0

    pool = Pool(processes=2)

    num_images = imdb.__len__()

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb._num_classes)]

    # timers
    _t = {'network': Timer(), 'postpro': Timer()}
    # corners3d = yolo_utils.threed_corners(dataset='linemod')
    batch_num_summary = randint(0, num_images / batch_size - 1)

    if args.confidence_plotlogs:
        confidence_plotlogs = {}
        confidence_plotlogs['gt_conf'] = []
        confidence_plotlogs['cullnet_conf'] = []

    network_time = 0
    postpro_time = 0

    for i_batch, sample_batched in enumerate(dataloader):

        _t['network'].tic()
        rgb_patches, gt_2dconfs, gt_3dconfs, bboxes = sample_batched[
            'pose_proposals']

        ori_im = np.array(sample_batched['origin_im'])

        if cfg.args.cullnet_confidence == 'conf2d':
            gt_confs = gt_2dconfs
        elif cfg.args.cullnet_confidence == 'conf3d':
            gt_confs = gt_3dconfs

        with torch.no_grad():
            if cfg.args.seg_cullnet:

                confidence_new_batch = []

                rgb_patch_np = np.array(rgb_patches)
                gtconf_patch_np = np.array(gt_confs)
                bboxes_batch = bboxes

                current_batch_size = rgb_patch_np.shape[0]

                subnetwork_batchsize = 128
                if cfg.args.cullnet_type == 'vgg19_bn':
                    subnetwork_batchsize = 128
                if cfg.args.cullnet_type == 'allconvnet':
                    subnetwork_batchsize = 64
                if cfg.args.cullnet_type == 'allconvnet_small':
                    subnetwork_batchsize = 256
                if cfg.args.cullnet_type == 'resnet18':
                    subnetwork_batchsize = 512
                if cfg.args.cullnet_type == 'resnet18_gn' or cfg.args.cullnet_type == 'resnet18concat_gn':
                    subnetwork_batchsize = 320
                if cfg.args.cullnet_type == 'resnet50' or cfg.args.cullnet_type == 'resnet50_gn' or cfg.args.cullnet_type == 'resnet50concat_gn':
                    subnetwork_batchsize = 160

                if cfg.args.sub_bs_test is not None:
                    subnetwork_batchsize = int(cfg.args.sub_bs_test)

                partition_size = (current_batch_size *
                                  cfg.k_proposals_test) / subnetwork_batchsize
                subnetwork_numimages = int(
                    math.ceil(subnetwork_batchsize / cfg.k_proposals_test))

                for i in range(partition_size):
                    rgb_patches_var = net_utils.np_to_variable(
                        rgb_patch_np[i * subnetwork_numimages:(i + 1) *
                                     subnetwork_numimages],
                        is_cuda=True,
                        volatile=True).permute(0, 1, 4, 2, 3)

                    if cfg.args.cullnet_inconf == 'concat':
                        confidence_new = net2(
                            rgb_patches_var.view(-1, 4, cfg.args.cullnet_input,
                                                 cfg.args.cullnet_input))
                    else:
                        confidence_new = net2(
                            rgb_patches_var.view(-1, 3, cfg.args.cullnet_input,
                                                 cfg.args.cullnet_input))

                    gtconf_patch = gtconf_patch_np[i * subnetwork_numimages:(
                        i + 1) * subnetwork_numimages].reshape(-1, 1)

                    if args.confidence_plotlogs:
                        confidence_plotlogs[
                            'gt_conf'] += gtconf_patch[:, 0].tolist()
                        confidence_plotlogs[
                            'cullnet_conf'] += confidence_new[:, 0].tolist()

                    if args.mGPUs:
                        conf_loss_var = net2.module.loss(
                            confidence_new, gtconf_patch)
                    else:
                        conf_loss_var = net2.loss(confidence_new, gtconf_patch)

                    confidence_new_np = confidence_new.data.cpu().numpy()
                    ### debugging purpose
                    # confidence_new_np = gtconf_patch

                    confidence_new_batch.append(confidence_new_np)
                    # bbox_pred.register_hook(extract)

                    conf_loss_np = conf_loss_var.data.cpu().numpy()
                    test_loss += conf_loss_np

                confidence_new_batch = np.array(confidence_new_batch)

                left_overpatches = (current_batch_size * cfg.k_proposals_test
                                    ) % subnetwork_batchsize

                confidence_new_batch = confidence_new_batch.reshape(
                    partition_size * subnetwork_numimages,
                    cfg.k_proposals_test)

                if i_batch == len(dataloader) - 1 and left_overpatches > 0:
                    rgb_patches_var = net_utils.np_to_variable(
                        rgb_patch_np[partition_size * subnetwork_numimages:],
                        is_cuda=True,
                        volatile=True).permute(0, 1, 4, 2, 3)

                    if cfg.args.cullnet_inconf == 'concat':
                        confidence_new = net2(
                            rgb_patches_var.view(-1, 4, cfg.args.cullnet_input,
                                                 cfg.args.cullnet_input))
                    else:
                        confidence_new = net2(
                            rgb_patches_var.view(-1, 3, cfg.args.cullnet_input,
                                                 cfg.args.cullnet_input))

                    gtconf_patch = gtconf_patch_np[
                        partition_size * subnetwork_numimages:].reshape(-1, 1)

                    if args.confidence_plotlogs:
                        confidence_plotlogs[
                            'gt_conf'] += gtconf_patch[:, 0].tolist()
                        confidence_plotlogs[
                            'cullnet_conf'] += confidence_new[:, 0].tolist()

                    if args.mGPUs:
                        conf_loss_var = net2.module.loss(
                            confidence_new, gtconf_patch)
                    else:
                        conf_loss_var = net2.loss(confidence_new, gtconf_patch)

                    confidence_new_np = confidence_new.data.cpu().numpy()

                    ### debugging purpose
                    # confidence_new_np = gtconf_patch
                    confidence_new_np = confidence_new_np.reshape(
                        -1, cfg.k_proposals_test)
                    confidence_new_batch = np.concatenate(
                        (confidence_new_batch, confidence_new_np), 0)
                    # bbox_pred.register_hook(extract)
                    conf_loss_np = conf_loss_var.data.cpu().numpy()
                    test_loss += conf_loss_np

        network_time += _t['network'].toc()

        _t['postpro'].tic()

        if cfg.args.seg_cullnet:
            targets = pool.map(
                partial(yolo_utils.seg_cullnet_postprocess,
                        sample_batched['origin_im'][0].shape, size_index),
                ((bboxes_batch[b], confidence_new_batch[b])
                 for b in range(rgb_patch_np.shape[0])))

            bboxes_batch = [row[0] for row in targets]
            scores_batch = [row[1] for row in targets]
            cls_inds_batch = [row[2] for row in targets]

        ###########
        # targets = pool.map(yolo_utils.final_postprocess,
        #                     ((bboxes_batch[b], scores_batch[b], cls_inds_batch[b])
        #                      for b in range(im_data.shape[0])))
        # bboxes_batch = [row[0] for row in targets]
        # scores_batch = [row[1] for row in targets]
        # cls_inds_batch = [row[2] for row in targets]

        if summary and i_batch == batch_num_summary:
            imnum_summary = randint(0, sample_batched['image'].shape[0] - 1)
            image = sample_batched['origin_im'][imnum_summary]
            # bboxes_sum, scores_sum, cls_inds_sum = yolo_utils.seg_cullnet_postprocess(sample_batched['origin_im'][imnum_summary].shape, size_index, (bboxes_batch[imnum_summary],
            #                                                                   confidence_new_batch[imnum_summary]))
            im2show = yolo_utils.draw_detection(
                image, bboxes_batch[imnum_summary],
                scores_batch[imnum_summary], cls_inds_batch[imnum_summary],
                cfg, imdb._classes, 0.5, objpoints3D, corners_3d, vertices)
            summary.add_image('predict_' + imdb._image_set,
                              cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB),
                              iter_count)

        for batch_id in range(rgb_patch_np.shape[0]):
            if vis:
                det_im = yolo_utils.draw_detection(
                    ori_im[batch_id].copy(), bboxes_batch[batch_id],
                    scores_batch[batch_id], cls_inds_batch[batch_id], cfg,
                    imdb._classes, thresh, objpoints3D, corners_3d, vertices)

            bboxes = bboxes_batch[batch_id]
            scores = scores_batch[batch_id]
            cls_inds = cls_inds_batch[batch_id]

            for j in range(imdb._num_classes):
                inds = np.where(cls_inds == j)[0]
                if len(inds) == 0:
                    all_boxes[j][i_batch * batch_size + batch_id] = np.empty(
                        [0, 2 * args.num_detection_points + 1],
                        dtype=np.float32)
                    continue
                # bboxes[inds] = yolo_utils.refine_2dboxes(bboxes[inds], corners3d[j])
                ##
                bboxes_batch[batch_id][inds] = bboxes[inds]
                ##
                c_bboxes = bboxes[inds]
                c_scores = scores[inds]
                c_dets = np.hstack(
                    (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
                all_boxes[j][i_batch * batch_size + batch_id] = c_dets

            if vis:
                if args.num_detection_points > 9:
                    gt_image = yolo_utils.vis_corner_points(
                        ori_im[batch_id].copy(),
                        np.reshape(sample_batched['origin_gt_boxes'][batch_id],
                                   (2, args.num_detection_points),
                                   order='F'), objpoints3D, vertices)
                    cuboid_gtimage = yolo_utils.vis_corner_cuboids(
                        gt_image,
                        np.reshape(sample_batched['origin_gt_boxes'][batch_id],
                                   (2, args.num_detection_points),
                                   order='F'), objpoints3D, corners_3d)
                else:
                    cuboid_gtimage = yolo_utils.vis_corner_cuboids(
                        ori_im[batch_id].copy(),
                        np.reshape(sample_batched['origin_gt_boxes'][batch_id],
                                   (2, args.num_detection_points),
                                   order='F'))
                im2show = np.hstack(
                    (det_im, cuboid_gtimage, ori_im[batch_id].copy()))

                cv2.imwrite(
                    test_output_dir + '/' +
                    imdb._image_indexes[i_batch * batch_size + batch_id] +
                    '.jpg', im2show)
                # cv2.imshow('test', im2show)
                # cv2.waitKey(0)

        postpro_time += _t['postpro'].toc()

    # print('Culling network time: {:.3f}s ,, Postprocessing time: {:.3f}s'.format(network_time, postpro_time))
    # print('Total Images: {:d}'.format(imdb.__len__()))

    pool.close()
    pool.join()

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    if args.confidence_plotlogs:
        with open(
                'confidence_plotlogs/' + args.class_name + '_cullnet_logs.yml',
                'w') as outfile:
            yaml.dump(confidence_plotlogs, outfile, default_flow_style=False)

    print('Evaluating detections')
    accuracy_epoch, twod_dists, threed_dists = imdb.evaluate_detections(
        all_boxes, output_dir, verbose)
    return accuracy_epoch, twod_dists, threed_dists, test_loss / len(
        dataloader)
            # OG yoloV2 changes scales every 10 epochs
            # Selecting index first thing than last otherwise one scale gets more trained than others due to multiple start-stops

            if i % 10 == 0:
                size_index = randint(0, len(cfg.multi_scale_inp_size) - 1)
                print('new scale is {}'.format(cfg.multi_scale_inp_size[size_index]))

            batch = dataset.fetch_parse(batch_of_index, size_index)
            im = batch['images']
            gt_boxes = batch['gt_boxes']
            gt_classes = batch['gt_classes']
            dontcare = batch['dontcare']
            origin_im = ['origin_im']

            # sending images onto gpu after turning them into torch variable
            im = net_utils.np_to_variable(im, is_cuda=True, volatile=False).permute(0, 3, 1, 2)

            bbox_pred, iou_pred, prob_pred = net(im)

            bbox_loss_i, iou_loss_i, cls_loss_i = loss(gt_boxes, gt_classes, dontcare, size_index, bbox_pred, iou_pred, prob_pred)

            # accumulating mini-batch loss
            loss = bbox_loss_i + iou_loss_i + cls_loss_i
            bbox_loss += bbox_loss_i.data.cpu().numpy()[0]
            iou_loss += iou_loss_i.data.cpu().numpy()[0]
            cls_loss += cls_loss_i.data.cpu().numpy()[0]
            train_loss += loss.data.cpu().numpy()[0]

            # clearing grads before calculating new ones and then updating wts
            optimizer.zero_grad()
            loss.backward()
Beispiel #18
0
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False):

    num_images = imdb.num_images

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(output_dir, 'detections.pkl')
    size_index = args.image_size_index
    #helper: 0:320, 1:352, 2:384, 3:416, 4:448, 5:480, 6:512, 7:544, 8:576'
    #here val_img sometimes is 5123
    for i in range(num_images):

        batch = imdb.next_batch(size_index=size_index)
        ori_im = batch['origin_im'][0]
        im_data = net_utils.np_to_variable(batch['images'],
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        _t['im_detect'].tic()
        with torch.set_grad_enabled(False):
            bbox_pred, iou_pred, prob_pred = net(im_data)
        '''
        bbox->(batch,h*w,prior 4)
        iou ->(batch,h*w,prior,1)
        prob_pred-->(batch,h*w,prior,20)
        '''
        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()
        '''
        这里后处理的是:
        return bbox_pred, scores, cls_inds
        '''
        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh,
            size_index)
        detect_time = _t['im_detect'].toc()

        _t['misc'].tic()
        '''
        以下的操作是
        对我们预测的值进行处理,这里需要注意的是,对于
        这些问题,我们在最后头保留它的概率
        并对最后的概率获取
        '''
        for j in range(imdb.num_classes):
            inds = np.where(cls_inds == j)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = bboxes[inds]
            c_scores = scores[inds]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            all_boxes[j][i] = c_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                i + 1, num_images, detect_time, nms_time))  # noqa
            _t['im_detect'].clear()
            _t['misc'].clear()

        if vis:
            im2show = yolo_utils.draw_detection(ori_im,
                                                bboxes,
                                                scores,
                                                cls_inds,
                                                cfg,
                                                thr=0.1)
            if im2show.shape[0] > 1100:
                im2show = cv2.resize(im2show,
                                     (int(1000. * float(im2show.shape[1]) /
                                          im2show.shape[0]), 1000))  # noqa
            cv2.imshow('test', im2show)
            cv2.waitKey(0)
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
Beispiel #19
0
def test_net_img_only(net, img_list, max_per_image=300, thresh=0.5, vis=False):
    num_images = len(img_list)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(cfg.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(output_dir, 'detections.pkl')
    size_index = args.image_size_index
    inp_size = cfg.multi_scale_inp_size
    if not os.path.exists("result"):
        os.mkdir("result")
    dt = dataTransform.dataTransform()
    for i in range(num_images):
        img_name = img_list[i]
        im, _, __, ___, ori_im = test_only_transform(img_name, inp_size,
                                                     size_index)
        im = np.reshape(im,
                        newshape=(-1, im.shape[0], im.shape[1], im.shape[2]))
        im_data = net_utils.np_to_variable(im, is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)
        with torch.set_grad_enabled(False):
            bbox_pred, iou_pred, prob_pred = net(im_data)
        '''
        bbox->(batch,h*w,prior 4)
        iou ->(batch,h*w,prior,1)
        prob_pred-->(batch,h*w,prior,20)
        '''
        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()
        '''
        这里后处理的是:
        return bbox_pred, scores, cls_inds
        '''
        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh,
            size_index)
        detect_time = _t['im_detect'].toc()

        _t['misc'].tic()
        '''
        以下的操作是
        对我们预测的值进行处理,这里需要注意的是,对于
        这些问题,我们在最后头保留它的概率
        并对最后的概率获取
        '''
        for j in range(imdb.num_classes):
            inds = np.where(cls_inds == j)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = bboxes[inds]
            c_scores = scores[inds]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            all_boxes[j][i] = c_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        #save detect_result to xml
        dt.writeXml(img_name, "./result", ori_im, cfg.label_names,
                    cls_inds.tolist(), bboxes.tolist())
        if vis:
            im2show = yolo_utils.draw_detection(ori_im,
                                                bboxes,
                                                scores,
                                                cls_inds,
                                                cfg,
                                                thr=0.5)
            if im2show.shape[0] > 1100:
                im2show = cv2.resize(im2show,
                                     (int(1000. * float(im2show.shape[1]) /
                                          im2show.shape[0]), 1000))  # noqa
            cv2.imshow('test', im2show)
            cv2.waitKey(0)
Beispiel #20
0
def main():

    output_dir = '../output'
    output_template_dir = '../output_template'
    kitti_output_dir = '../kitti_det_output'
    input_file_list = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt'
    # input_file_list = '/home/cory/project/yolo2-pytorch/flow/w01_imgs.txt'
    vis_enable = False
    thresh = 0.5

    trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2_joint/' \
                    'kitti_new_2_flow_center_ft_flownet2_joint_30.h5'

    shutil.rmtree(output_dir, ignore_errors=True)
    shutil.rmtree(kitti_output_dir, ignore_errors=True)
    shutil.copytree(output_template_dir, output_dir)
    os.makedirs(kitti_output_dir)

    net = Darknet19(cfg)
    net_utils.load_net(trained_model, net)
    net.eval()
    net.cuda()
    print(trained_model)
    print('load model successfully')

    img_files = open(input_file_list)
    image_abs_paths = img_files.readlines()
    image_abs_paths = [f.strip() for f in image_abs_paths]

    t_det = Timer()
    t_total = Timer()
    for i, image_path in enumerate(image_abs_paths):
        t_total.tic()
        image, im_data = preprocess(image_path)
        im_data = net_utils.np_to_variable(im_data,
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        t_det.tic()
        bbox_pred, iou_pred, prob_pred = net.forward(im_data)
        det_time = t_det.toc()

        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
        det_obj = detection_objects(bboxes, scores, cls_inds)
        save_as_kitti_format(i, det_obj, kitti_output_dir, src_label='kitti')

        total_time = t_total.toc()
        format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms) %s'
        print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time,
                            total_time * 1000, image_path))

        t_det.clear()
        t_total.clear()

        if vis_enable:
            im2show = yolo_utils.draw_detection(image, bboxes, scores,
                                                cls_inds, cfg)
            cv2.imshow('detection', im2show)
            cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i),
                        im2show)
            key = cv2.waitKey(0)
            if key == ord('q'):
                break
Beispiel #21
0
def produce_bbnet_patches(net1, sample_batched, objpoints3D, vertices, obj_diameter):
    net1.eval()
    batch = sample_batched[0]
    size_index = sample_batched[1]
    im = batch['image']
    gt_boxes = batch['gt_boxes']
    gt_classes = batch['gt_classes']
    gt_RT = batch['gt_RT']
    non_nms = cfg.args.non_nms
    thresh = cfg.args.thresh

    if cfg.args.dataset_name=='LINEMOD':
        K = cfg.cam_K

    elif cfg.args.dataset_name=='YCB':
        K = cfg.cam_K1

    pool = Pool(processes=4)

    if cfg.args.seg_cullnet:
        with torch.no_grad():
            im_data = net_utils.np_to_variable(im,
                                               is_cuda=True,
                                               volatile=False).permute(0, 3, 1, 2)

            bbox_pred_all, conf_pred_all, score_pred_all = net1(im_data)
            prob_pred_all = []

            for i in xrange(3):
                prob_pred_all.append(F.softmax(score_pred_all[i].view(-1, score_pred_all[i].size()[-1]), dim=1).view_as(score_pred_all[i]))

            ##### concatenating outputs at multiple scale feature maps after postprocessing operation
            bbox_pred = bbox_pred_all[0].data.cpu().numpy()
            conf_pred = conf_pred_all[0].data.cpu().numpy()
            prob_pred = prob_pred_all[0].data.cpu().numpy()
            targets1 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms),
                                ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]])
                                 for b in range(im_data.shape[0])))
            bbox_pred = bbox_pred_all[1].data.cpu().numpy()
            conf_pred = conf_pred_all[1].data.cpu().numpy()
            prob_pred = prob_pred_all[1].data.cpu().numpy()
            targets2 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms),
                                ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]])
                                 for b in range(im_data.shape[0])))
            bbox_pred = bbox_pred_all[2].data.cpu().numpy()
            conf_pred = conf_pred_all[2].data.cpu().numpy()
            prob_pred = prob_pred_all[2].data.cpu().numpy()
            targets3 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms),
                                ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]])
                                 for b in range(im_data.shape[0])))
            bboxes_batch = [np.concatenate((row1[0], row2[0], row3[0])) for row1, row2, row3 in zip(targets1, targets2, targets3)]
            scores_batch = [np.concatenate((row1[1], row2[1], row3[1])) for row1, row2, row3 in zip(targets1, targets2, targets3)]
            cls_inds_batch = [np.concatenate((row1[2], row2[2], row3[2])) for row1, row2, row3 in zip(targets1, targets2, targets3)]
            ##########

            targets = pool.map(partial(yolo_utils.pose_proposals, objpoints3D, vertices, K, obj_diameter),
                               ((bboxes_batch[b], scores_batch[b], cls_inds_batch[b], batch['origin_gtboxes'][b],
                                 batch['origin_im'][b]) for b in range(im_data.shape[0])))
            Rt_pr_patch = [row[0] for row in targets]
            corner_patch = [row[1] for row in targets]
            gtconf2d_patch = [row[2] for row in targets]
            gtconf3d_patch = [row[3] for row in targets]
            bboxes_batch = [row[4] for row in targets]

        pool.close()
        pool.join()

        return Rt_pr_patch, corner_patch, gtconf2d_patch, gtconf3d_patch, bboxes_batch
Beispiel #22
0
    def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None,
                size_index=0):
        '''
        这里我们主要论述一下,该算法的主要操作方式
        首先是提取特征到细粒度提取层
        '''
        conv1s = self.conv1s(im_data)
        conv2 = self.conv2(conv1s)
        conv3 = self.conv3(conv2)
        conv1s_reorg = self.reorg(conv1s)
        cat_1_3 = torch.cat([conv1s_reorg, conv3], 1)
        conv4 = self.conv4(cat_1_3)
        conv5 = self.conv5(conv4)   # batch_size, out_channels, h, w
        global_average_pool = self.global_average_pool(conv5)
        # for detection
        # bsize, c, h, w -> bsize, h, w, c ->
        #                   bsize, h x w, num_anchors, 5+num_classes
        bsize, _, h, w = global_average_pool.size()
        # assert bsize == 1, 'detection only support one image per batch'
        global_average_pool_reshaped = \
            global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize,
                                                                      -1, cfg.num_anchors, cfg.num_classes + 5)  # noqa

        '''
        操作先review(bs,w,h,c)--->(bs,w*h,anchor,class+5)
        其中class + 5 表达的是 voc 的 20 个类别 ,外加 5 个执行度,表达的是 dx,dy,dw,dh
        以及第五类 d(o) 表达的是置信度
        '''
        # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
        xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3)
        iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5])

        score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous()
        '''
        对每一类分类做softmax ,也就是说是20 类做了softmax
        在这里 prob_pred --->(bs,w*h,anchors,classes)
        '''
        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred)  # noqa
        # for training
        if self.training:
            bbox_pred_np = bbox_pred.data.cpu().numpy()
            iou_pred_np = iou_pred.data.cpu().numpy()
            '''
            这一步就有意思了类似于传统的操作去计算我们的bbox那个类别是对的,对应的anchor ,就是用anchor 进行正负对比
            
            '''
            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \
                self._build_target(bbox_pred_np,
                                   gt_boxes,
                                   gt_classes,
                                   dontcare,
                                   iou_pred_np,
                                   size_index)

            _boxes = net_utils.np_to_variable(_boxes)
            _ious = net_utils.np_to_variable(_ious)
            _classes = net_utils.np_to_variable(_classes)
            box_mask = net_utils.np_to_variable(_box_mask,
                                                dtype=torch.FloatTensor)
            iou_mask = net_utils.np_to_variable(_iou_mask,
                                                dtype=torch.FloatTensor)
            class_mask = net_utils.np_to_variable(_class_mask,
                                                  dtype=torch.FloatTensor)

            num_boxes = sum((len(boxes) for boxes in gt_boxes))

            # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
            box_mask = box_mask.expand_as(_boxes)
            self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
            self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa

            class_mask = class_mask.expand_as(prob_pred)
            self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes  # noqa

        return bbox_pred, iou_pred, prob_pred
Beispiel #23
0
def home():
    data = request.body.read()
    body = json.loads(data)
    im_path = body['dir_path']
    #im_path = 'demo'
    im_fnames = sorted((fname for fname in os.listdir(im_path)\
                        if os.path.splitext(fname)[-1] == '.jpg'))
    im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)

    min_record_tmp_list = [0] * len(det_class)

    for i, (image, im_data,
            fname) in enumerate(pool.imap(preprocess, im_fnames, chunksize=1)):
        print(fname)
        t_total.tic()
        im_data = net_utils.np_to_variable(im_data,
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)
        t_det.tic()
        bbox_pred, iou_pred, prob_pred = net(im_data)
        det_time = t_det.toc()
        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)

        im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds,
                                            cfg)

        ## create list that used to write to database
        path_list = fname.split("/")
        filename = path_list.pop()
        time_folder = im_path

        # wirte im2show to out dir
        im_out_path = os.path.join(time_folder, "out")
        check_path_create(im_out_path)
        cv2.imwrite(os.path.join(im_out_path, filename), im2show)

        tmp_list = ['0'] * len(det_class)
        for i in cls_inds:
            try:
                tmp_list[det_class.index(cfg.label_names[i])] = '1'
                min_record_tmp_list[det_class.index(cfg.label_names[i])] += 1
            except:
                pass

        tmp_list.insert(0, time_folder)
        tmp_list.insert(0, filename)
        conn.execute(
            """insert into images_det (name, time_folder, %s)\
                        values (%s)""" %
            (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list)
        conn.commit()
        total_time = t_total.toc()

        if i % 1 == 0:
            format_str = 'frame: %d, (detection: %.1f Hz, %.1f ms) (total: %.1f Hz, %.1f ms)'
            print(format_str % (i, 1. / det_time, det_time * 1000,
                                1. / total_time, total_time * 1000))

            t_total.clear()
            t_det.clear()

    tmp_list = [im_path]
    min_record_tmp_list = [str(i) for i in min_record_tmp_list]
    tmp_list.extend(min_record_tmp_list)
    conn.execute(
        """insert into minute_det (time_folder, %s)
                    values (%s)""" %
        (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list)
    conn.commit()
Beispiel #24
0
net.cuda()
net.eval()
print('load model succ...')

t_det = Timer()
t_total = Timer()
im_fnames = sorted((fname
                    for fname in os.listdir(im_path)
                    if os.path.splitext(fname)[-1] == '.jpg'))
im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
pool = Pool(processes=1)

for i, (image, im_data) in enumerate(pool.imap(
        preprocess, im_fnames, chunksize=1)):
    t_total.tic()
    im_data = net_utils.np_to_variable(
        im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
    t_det.tic()
    bbox_pred, iou_pred, prob_pred = net(im_data)
    det_time = t_det.toc()
    # to numpy
    bbox_pred = bbox_pred.data.cpu().numpy()
    iou_pred = iou_pred.data.cpu().numpy()
    prob_pred = prob_pred.data.cpu().numpy()

    # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

    bboxes, scores, cls_inds = yolo_utils.postprocess(
        bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)

    im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
Beispiel #25
0
    if imdb.epoch > prev_epoch:
        # save trained weights
        save_name = os.path.join(cfg.train_output_dir, '{}_{}.h5'.format(cfg.exp_name, imdb.epoch))
        net_utils.save_net(save_name, net)
        print('save model: {}'.format(save_name))

        # update check_point file
        ckp = open(os.path.join(cfg.check_point_file), 'w')
        ckp.write(str(imdb.epoch))
        ckp.close()

        # prepare optimizer for next epoch
        optimizer = get_optimizer(cfg, net, imdb.epoch)

    # forward
    im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=False).permute(0, 3, 1, 2)
    x = net.forward(im_data, batch['gt_boxes'], batch['gt_classes'], batch['dontcare'], network_size)

    # loss
    bbox_loss += net.bbox_loss.data.cpu().numpy()[0]
    iou_loss += net.iou_loss.data.cpu().numpy()[0]
    cls_loss += net.cls_loss.data.cpu().numpy()[0]
    train_loss += net.loss.data.cpu().numpy()[0]
    cnt += 1
    # print('train_loss', net.loss.data.cpu().numpy()[0])

    # backward
    optimizer.zero_grad()
    net.loss.backward()
    optimizer.step()
    # Use only half of the image since ZED camera has 2 cameras
    frame = frame[:, 0:frame.shape[1] / 2, :]

    # Crop the middle square of the image
    frame_centre = cap.get(3) / 4
    frame_height = cap.get(4)
    frame = frame[:, frame_centre - frame_height / 2:frame_centre +
                  frame_height / 2, :]

    # Preprocess the image
    t_total.tic()

    image, im_data = preprocess(frame)
    im_data = net_utils.np_to_variable(im_data,
                                       use_cuda=cfg.use_cuda,
                                       volatile=True).permute(0, 3, 1, 2)

    # Forward
    t_det.tic()
    bbox_pred, iou_pred, prob_pred = model(im_data)
    det_time = t_det.toc()

    # Postprocess the image
    # to numpy
    bbox_pred = bbox_pred.data.cpu().numpy()

    iou_pred = iou_pred.data.cpu().numpy()

    prob_pred = prob_pred.data.cpu().numpy()
Beispiel #27
0
    def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None,
                size_index=0):
        conv1s = self.conv1s(im_data)
        conv2 = self.conv2(conv1s)
        conv3 = self.conv3(conv2)
        conv1s_reorg = self.reorg(conv1s)
        cat_1_3 = torch.cat([conv1s_reorg, conv3], 1)
        conv4 = self.conv4(cat_1_3)
        conv5 = self.conv5(conv4)   # batch_size, out_channels, h, w
        global_average_pool = self.global_average_pool(conv5)

        # for detection
        # bsize, c, h, w -> bsize, h, w, c ->
        #                   bsize, h x w, num_anchors, 5+num_classes
        bsize, _, h, w = global_average_pool.size()
        # assert bsize == 1, 'detection only support one image per batch'
        global_average_pool_reshaped = \
            global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize,
                                                                      -1, cfg.num_anchors, cfg.num_classes + 5)  # noqa

        # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
        xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3)
        iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5])

        score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous()
        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred)  # noqa

        # for training
        if self.training:
            bbox_pred_np = bbox_pred.data.cpu().numpy()
            iou_pred_np = iou_pred.data.cpu().numpy()
            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \
                self._build_target(bbox_pred_np,
                                   gt_boxes,
                                   gt_classes,
                                   dontcare,
                                   iou_pred_np,
                                   size_index)

            _boxes = net_utils.np_to_variable(_boxes)
            _ious = net_utils.np_to_variable(_ious)
            _classes = net_utils.np_to_variable(_classes)
            box_mask = net_utils.np_to_variable(_box_mask,
                                                dtype=torch.FloatTensor)
            iou_mask = net_utils.np_to_variable(_iou_mask,
                                                dtype=torch.FloatTensor)
            class_mask = net_utils.np_to_variable(_class_mask,
                                                  dtype=torch.FloatTensor)

            num_boxes = sum((len(boxes) for boxes in gt_boxes))

            # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
            box_mask = box_mask.expand_as(_boxes)

            self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
            self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa

            class_mask = class_mask.expand_as(prob_pred)
            self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes  # noqa

        return bbox_pred, iou_pred, prob_pred
Beispiel #28
0
    def forward(self,
                im_data,
                gt_boxes=None,
                gt_classes=None,
                dontcare=None,
                size_index=0):
        conv1s = self.conv1s(im_data)
        conv2 = self.conv2(conv1s)
        conv3 = self.conv3(conv2)
        conv1s_reorg = self.reorg(conv1s)
        cat_1_3 = torch.cat([conv1s_reorg, conv3], 1)
        conv4 = self.conv4(cat_1_3)
        conv5 = self.conv5(conv4)  # batch_size, out_channels, h, w

        #IFF
        for i in range(1):
            conv5 = self.conv5(conv4 + self.conv_back(conv5))
            #conv5 = self.conv5(conv4.mul(self.conv_back(conv5)))

        global_average_pool = self.global_average_pool(conv5)

        # for detection
        # bsize, c, h, w -> bsize, h, w, c ->
        #                   bsize, h x w, num_anchors, 5+num_classes
        bsize, _, h, w = global_average_pool.size()
        # assert bsize == 1, 'detection only support one image per batch'
        global_average_pool_reshaped = \
            global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize,
                                                                      -1, cfg.num_anchors, cfg.num_classes + 5)  # noqa

        # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
        xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3)
        iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5])

        score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous()
        prob_pred = F.softmax(score_pred.view(-1,
                                              score_pred.size()[-1])).view_as(
                                                  score_pred)  # noqa

        # for training
        if self.training:
            bbox_pred_np = bbox_pred.data.cpu().numpy()
            iou_pred_np = iou_pred.data.cpu().numpy()
            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \
                self._build_target(bbox_pred_np,
                                   gt_boxes,
                                   gt_classes,
                                   dontcare,
                                   iou_pred_np,
                                   size_index)

            _boxes = net_utils.np_to_variable(_boxes)
            _ious = net_utils.np_to_variable(_ious)
            _classes = net_utils.np_to_variable(_classes)
            box_mask = net_utils.np_to_variable(_box_mask,
                                                dtype=torch.FloatTensor)
            iou_mask = net_utils.np_to_variable(_iou_mask,
                                                dtype=torch.FloatTensor)
            class_mask = net_utils.np_to_variable(_class_mask,
                                                  dtype=torch.FloatTensor)

            num_boxes = sum((len(boxes) for boxes in gt_boxes))

            # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
            box_mask = box_mask.expand_as(_boxes)

            self.bbox_loss = nn.MSELoss(size_average=False)(
                bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
            self.iou_loss = nn.MSELoss(size_average=False)(
                iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa

            class_mask = class_mask.expand_as(prob_pred)
            self.cls_loss = nn.MSELoss(size_average=False)(
                prob_pred * class_mask,
                _classes * class_mask) / num_boxes  # noqa

        return bbox_pred, iou_pred, prob_pred
Beispiel #29
0
def train_batch(net2, rgb_patch, gtconf_patch, train_loss_epoch, conf_loss_epoch):

    net2.train()

    if cfg.args.seg_cullnet:
        confidence_new_batch = []

        rgb_patch_np = np.array(rgb_patch)
        gtconf_patch_np = np.array(gtconf_patch)

        current_batch_size = rgb_patch_np.shape[0]

        subnetwork_batchsize = 80
        if cfg.args.cullnet_type == 'vgg19_bn':
            subnetwork_batchsize = 32
        if cfg.args.cullnet_type == 'allconvnet':
            subnetwork_batchsize = 32
        if cfg.args.cullnet_type == 'allconvnet_small':
            subnetwork_batchsize = 128
        if cfg.args.cullnet_type == 'resnet18':
            subnetwork_batchsize = 512
        if cfg.args.cullnet_type == 'resnet18_gn' or cfg.args.cullnet_type == 'resnet18concat_gn':
            subnetwork_batchsize = 80
        if cfg.args.cullnet_type == 'resnet50concat_gn' or cfg.args.cullnet_type == 'resnet50_gn':
            subnetwork_batchsize = 80

        if not cfg.args.sub_bs == 80:
            subnetwork_batchsize = cfg.args.sub_bs

        ### partition_size is number of batches for the network2 using a batched output of network1
        partition_size = (current_batch_size*cfg.args.k_proposals) / subnetwork_batchsize
        subnetwork_numimages = int(math.ceil(subnetwork_batchsize/cfg.args.k_proposals))

        conf_loss_np_subnetwrk = 0

        for i in range(partition_size):
            if subnetwork_batchsize < cfg.args.k_proposals:
                b_id = (i * subnetwork_batchsize)/cfg.args.k_proposals
                p_id = (i * subnetwork_batchsize)%cfg.args.k_proposals
                rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[b_id:b_id+1, p_id:
                                                                                     p_id + subnetwork_batchsize],
                                                           is_cuda=True,
                                                           volatile=True).permute(0, 1, 4, 2, 3)
                gtconf_patch = gtconf_patch_np[b_id:b_id+1, p_id:
                                                            p_id + subnetwork_batchsize].reshape(-1, 1)
            else:
                rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[i*subnetwork_numimages:(i+1)*subnetwork_numimages],
                                                       is_cuda=True,
                                                       volatile=True).permute(0, 1, 4, 2, 3)
                gtconf_patch = gtconf_patch_np[i*subnetwork_numimages:(i+1)*subnetwork_numimages].reshape(-1, 1)

            if cfg.args.cullnet_inconf=='concat':
                confidence_new = net2(rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input))
            else:
                confidence_new = net2(rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input))

            if args.mGPUs:
                conf_loss_var = net2.module.loss(confidence_new, gtconf_patch)
            else:
                conf_loss_var = net2.loss(confidence_new, gtconf_patch)

            confidence_new_np = confidence_new.data.cpu().numpy()

            loss_var = conf_loss_var
            optimizer.zero_grad()
            loss_var.backward()

            optimizer.step()

            confidence_new_batch.append(confidence_new_np)

            conf_loss_np = conf_loss_var.data.cpu().numpy()
            conf_loss_np_subnetwrk += conf_loss_np

        confidence_new_batch = np.array(confidence_new_batch)

        left_overpatches = (current_batch_size*cfg.args.k_proposals) % subnetwork_batchsize

        if subnetwork_batchsize < cfg.args.k_proposals:
            confidence_new_batch = confidence_new_batch.reshape(int(partition_size * (float(subnetwork_batchsize)/
                                                                    cfg.args.k_proposals)), cfg.args.k_proposals)
        else:
            confidence_new_batch = confidence_new_batch.reshape(partition_size * subnetwork_numimages, cfg.args.k_proposals)

        if left_overpatches > 0:
            rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[partition_size*subnetwork_numimages:],
                                                       is_cuda=True,
                                                       volatile=True).permute(0, 1, 4, 2, 3)
            if cfg.args.cullnet_inconf=='concat':
                confidence_new = net2(rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input))
            else:
                confidence_new = net2(rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input))

            gtconf_patch = gtconf_patch_np[partition_size*subnetwork_numimages:].reshape(-1, 1)

            if args.mGPUs:
                conf_loss_var = net2.module.loss(confidence_new, gtconf_patch)
            else:
                conf_loss_var = net2.loss(confidence_new, gtconf_patch)

            loss_var = conf_loss_var
            optimizer.zero_grad()
            loss_var.backward()
            optimizer.step()

            confidence_new_np = confidence_new.data.cpu().numpy()
            confidence_new_np = confidence_new_np.reshape(-1, cfg.args.k_proposals)
            confidence_new_batch = np.concatenate((confidence_new_batch, confidence_new_np), 0)
            conf_loss_np = conf_loss_var.data.cpu().numpy()
            conf_loss_np_subnetwrk += conf_loss_np

        train_loss_np = conf_loss_np_subnetwrk

        train_loss_epoch += train_loss_np
        conf_loss_epoch += conf_loss_np_subnetwrk

        return confidence_new_batch, conf_loss_np_subnetwrk, train_loss_np, train_loss_epoch, conf_loss_epoch
Beispiel #30
0
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False):
# =============================================================================
#     chang here for ryan
# =============================================================================
    num_images = imdb.num_images
    print('num-images',num_images)
#    num_images = 3

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(output_dir, 'detections.pkl')
# =============================================================================
#     change size_index = 0
# =============================================================================
#    size_index = args.image_size_index
    size_index = 0

    for i in range(num_images):

        batch = imdb.next_batch(size_index=size_index)
#        print('next_batch')
        ori_im = batch['origin_im'][0]
        
        im_data = net_utils.np_to_variable(batch['images'], is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)
        
#        print('im_data')

        _t['im_detect'].tic()
        bbox_pred, iou_pred, prob_pred = net(im_data)

        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred,
                                                          iou_pred,
                                                          prob_pred,
                                                          ori_im.shape,
                                                          cfg,
                                                          thresh,
                                                          size_index
                                                          )
        detect_time = _t['im_detect'].toc()

        _t['misc'].tic()

        for j in range(imdb.num_classes):
            inds = np.where(cls_inds == j)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = bboxes[inds]
            c_scores = scores[inds]
            c_dets = np.hstack((c_bboxes,
                                c_scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
            all_boxes[j][i] = c_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time))  # noqa
            _t['im_detect'].clear()
            _t['misc'].clear()

        if vis:
            im2show = yolo_utils.draw_detection(ori_im,
                                                bboxes,
                                                scores,
                                                cls_inds,
                                                cfg,
                                                thr=0.1)
            if im2show.shape[0] > 1100:
                im2show = cv2.resize(im2show,
                                     (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))  # noqa
            cv2.imshow('test', im2show)
            cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)
Beispiel #31
0
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False):
    num_images = imdb.num_images

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(output_dir, 'detections.pkl')
    size_index = args.image_size_index

    for i in range(num_images):

        batch = imdb.next_batch(size_index=size_index)
        ori_im = batch['origin_im'][0]
        im_data = net_utils.np_to_variable(batch['images'], is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        _t['im_detect'].tic()
        bbox_pred, iou_pred, prob_pred = net(im_data)

        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred,
                                                          iou_pred,
                                                          prob_pred,
                                                          ori_im.shape,
                                                          cfg,
                                                          thresh,
                                                          size_index
                                                          )
        detect_time = _t['im_detect'].toc()

        _t['misc'].tic()

        for j in range(imdb.num_classes):
            inds = np.where(cls_inds == j)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = bboxes[inds]
            c_scores = scores[inds]
            c_dets = np.hstack((c_bboxes,
                                c_scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
            all_boxes[j][i] = c_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time))  # noqa
            _t['im_detect'].clear()
            _t['misc'].clear()

        if vis:
            im2show = yolo_utils.draw_detection(ori_im,
                                                bboxes,
                                                scores,
                                                cls_inds,
                                                cfg,
                                                thr=0.1)
            if im2show.shape[0] > 1100:
                im2show = cv2.resize(im2show,
                                     (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))  # noqa
            cv2.imshow('test', im2show)
            cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)
Beispiel #32
0
    def forward(self,
                im_data,
                gt_boxes=None,
                gt_classes=None,
                dontcare=None,
                size_index=0):
        feature = self.basenet(im_data)
        conv5 = self.conv5(conv4)
        global_average_pool = self.global_average_pool(conv5)

        # for detection
        # bsize, c, h, w -> bsize, h, w, c ->
        #                   bsize, h x w, num_anchors, 5+num_classes
        bsize, _, h, w = global_average_pool.size()
        # assert bsize == 1, 'detection only support one image per batch'
        global_average_pool_reshaped = \
            global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize,
                                                                      -1, cfg.num_anchors, cfg.num_classes + 5)

        xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3)
        iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5])

        score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous()
        prob_pred = F.softmax(score_pred.view(
            -1,
            score_pred.size()[-1])).view_as(score_pred)

        # for training
        if self.training:
            bbox_pred_np = bbox_pred.data.cpu().numpy()
            iou_pred_np = iou_pred.data.cpu().numpy()
            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \
                self._build_target(bbox_pred_np,
                                   gt_boxes,
                                   gt_classes,
                                   dontcare,
                                   iou_pred_np,
                                   size_index)

            _boxes = net_utils.np_to_variable(_boxes)
            _ious = net_utils.np_to_variable(_ious)
            _classes = net_utils.np_to_variable(_classes)
            box_mask = net_utils.np_to_variable(_box_mask,
                                                dtype=torch.FloatTensor)
            iou_mask = net_utils.np_to_variable(_iou_mask,
                                                dtype=torch.FloatTensor)
            class_mask = net_utils.np_to_variable(_class_mask,
                                                  dtype=torch.FloatTensor)

            num_boxes = sum((len(boxes) for boxes in gt_boxes))

            # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
            box_mask = box_mask.expand_as(_boxes)

            self.bbox_loss = nn.MSELoss(size_average=False)(
                bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
            self.iou_loss = nn.MSELoss(size_average=False)(
                iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa

            class_mask = class_mask.expand_as(prob_pred)
            self.cls_loss = nn.MSELoss(size_average=False)(
                prob_pred * class_mask,
                _classes * class_mask) / num_boxes  # noqa

        return bbox_pred, iou_pred, prob_pred
Beispiel #33
0
def main():

    shutil.rmtree('output', ignore_errors=True)
    shutil.copytree('output_template', 'output')
    shutil.rmtree('kitti_det_output', ignore_errors=True)
    os.makedirs('kitti_det_output')

    trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5'
    thresh = 0.5
    use_kitti = True
    image_dir = '/home/cory/KITTI_Dataset/data_object_image_2/training/image_2'

    net = Darknet19()
    net_utils.load_net(trained_model, net)
    net.eval()
    net.cuda()
    print('load model successfully')

    # print(net)

    def str_index(filename):
        if use_kitti:
            return filename
        begin_pos = filename.rfind('_') + 1
        end_pos = filename.rfind('.')
        str_v = filename[begin_pos:end_pos]
        return int(str_v)

    image_extensions = ['.jpg', '.JPG', '.png', '.PNG']
    img_files = open(
        '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt')
    image_abs_paths = img_files.readlines()
    image_abs_paths = [f.strip() for f in image_abs_paths]
    '''image_abs_paths = sorted([os.path.join(image_dir, name)
                              for name in os.listdir(image_dir)
                              if name[-4:] in image_extensions],
                             key=str_index)'''

    key_frame_path = ''
    detection_period = 5
    use_flow = False

    kitti_filename = 'yolo_flow_kitti_det.txt'
    try:
        os.remove(kitti_filename)
    except OSError:
        pass

    t_det = Timer()
    t_total = Timer()

    for i, image_path in enumerate(image_abs_paths):
        t_total.tic()
        image, im_data = preprocess(image_path)
        im_data = net_utils.np_to_variable(im_data,
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        layer_of_flow = 'conv4'
        t_det.tic()
        bbox_pred, iou_pred, prob_pred = net.forward(im_data)

        det_time = t_det.toc()

        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
        det_obj = detection_objects(bboxes, scores, cls_inds)
        save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti')

        vis_enable = False
        if vis_enable:
            im2show = yolo_utils.draw_detection(image, bboxes, scores,
                                                cls_inds, cfg)

            cv2.imshow('detection', im2show)
            cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show)

        total_time = t_total.toc()
        format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)'
        print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time,
                            total_time * 1000))

        t_det.clear()
        t_total.clear()

        if vis_enable:
            key = cv2.waitKey(0)
            if key == ord('q'):
                break