Example #1
0
 def _construct_mask(self, pth, prev=False):
     video_name, frame_num = self._info_from_pth(pth)
     # if frame_num > 3:
     #   return
     prev_frame_num = frame_num - 1
     flow = np.load(
         os.path.join(self.flow_dir, video_name,
                      format(frame_num, '08') + '_flow.npy'))
     print(flow.shape)
     if prev_frame_num == 0:
         bbox = self.gts[video_name][prev_frame_num]
         x0, y0, x1, y1 = bbox_format(bbox, 'tlxy_wh_2_rect')
         mask = np.zeros([flow.shape[0], flow.shape[1]])
         x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)
         mask[y0:y1, x0:x1] = 1
     else:
         if not prev:
             bboxes, scores, det_scores = self._load_bbox(
                 video_name, prev_frame_num)
             mask = np.zeros([flow.shape[0], flow.shape[1]])
             for bbox, score in zip(bboxes, det_scores):
                 x1, y1, w, h = bbox
                 x1, y1, w, h = int(x1), int(y1), int(w), int(h)
                 gaussian_mask = self._gaussian_box(h, w)
                 #mask[y1:y1+h, x1:x1+w] = np.maximum(mask[y1:y1+h, x1:x1+w],score*gaussian_mask)
                 mask[y1:y1 + h,
                      x1:x1 + w] = np.maximum(mask[y1:y1 + h, x1:x1 + w],
                                              gaussian_mask)
         else:
             mask = np.load(
                 os.path.join(
                     self.data_dir, video_name,
                     format(prev_frame_num, '08') + '_fgmask_nd.npy'))
     # cv2.imwrite('../results/tests/new_mask.png', mask * 255)
     mask = flow_warp(mask, flow)
     # rgb_img = self.vot.get_frames(video_name)[frame_num]
     # mask = cv2.resize(mask, (self.tg_size[1], self.tg_size[0]))
     # cv2.imwrite('../results/tests/new_warped_mask.png', (mask+0.1)[:,:,np.newaxis] * rgb_img)
     #if not prev:
     #   print(pth+'_fgmask.npy')
     #   np.save(pth+'_fgmask.npy', mask)
     if not prev:
         print(pth + '_fgmask_nd.npy')
         np.save(pth + '_fgmask_nd.npy', mask)
     else:
         print(pth + '_prev_fgmask_nd.npy')
         np.save(pth + '_prev_fgmask_nd.npy', mask)
Example #2
0
                    int(cx + w / 2),
                    int(cy + h / 2)
                ]
                foreground_candidate_mask = np.zeros(im2.shape)
                foreground_candidate_mask[
                    detection_box[1]:detection_box[3],
                    detection_box[0]:detection_box[2], :] = 1
                state = SiamRPN_init(im2, target_pos, target_sz, net)
                font = cv2.FONT_HERSHEY_SIMPLEX
                data_dir = os.path.join('/home/jianingq/vot_info', video_name)
                if not os.path.exists(data_dir):
                    os.makedirs(data_dir)
                for i in range(0, video_length - 1):
                    im1 = np.copy(video_frames[i])
                    im2 = np.copy(video_frames[i + 1])
                    current_bbox = bbox_format(gts[i], 'tlxy_wh_2_rect')
                    current_bbox = [int(j) for j in current_bbox]
                    next_bbox = bbox_format(gts[i + 1], 'tlxy_wh_2_rect')
                    next_bbox = [int(j) for j in next_bbox]
                    flow = np.load(
                        os.path.join(confidence_dir,
                                     format(i + 1, '08') + '_flow.npy'))
                    entropy_data = np.load(
                        os.path.join(confidence_dir,
                                     format(i + 1, '08') + '_entropy.npy'))
                    entropy = (entropy_data - np.min(entropy_data)) / (
                        np.max(entropy_data) - np.min(entropy_data))
                    confidence = 1 - entropy
                    warped_im1 = flow_warp(im1, flow)
                    diff, rgb_diff = color_confidence(warped_im1, im2)
Example #3
0
                foreground = foreground_rank[1]
                foreground_index = np.where(((labeled == foreground)))
                binary_labeled[foreground_index] = 1
        else:
            #foreground = np.argmax(np.bincount(patch.flatten()))
            foreground_index = np.where(((labeled == foreground)))
            binary_labeled[foreground_index] = 1
        ##################combined#####################
        bi_mask = np.zeros((np.shape(binary_labeled)[0],np.shape(binary_labeled)[1],3),dtype='bool')
        bi_mask[:,:,0] = binary_labeled
        bi_mask[:,:,1] = binary_labeled
        bi_mask[:,:,2] = binary_labeled

        ############################################

        current_bbox = bbox_format(gts[i],'tlxy_wh_2_rect')
        next_bbox = bbox_format(gts[i + 1],'tlxy_wh_2_rect')

        next_bbox = [int(l) for l in next_bbox]

        next_mask = background_flow(im, detection_box, flow)

        state = SiamRPN_track_bbox(score_net, state, im, (next_mask[:,:,0]>0.99) ,(bi_mask[:,:,0]), gts[i + 1])

        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        #x0,y0,w,h
        res = [int(l) for l in res]
        #x0,y0,x1,y1
        detection_box = [res[0], res[1], res[0] + res[2], res[1] + res[3]]

        foreground_index =  np.bitwise_or(((next_mask[:,:,0])>0.99) ,bi_mask[:,:,0])
Example #4
0
def tracker_eval_record_data(net, x_crop, target_pos, target_sz, window,
                             scale_z, p, im, next_mask, conf_mask, index_1,
                             index_2, frame_num, data_dir, gtbbox):
    delta, score = net(x_crop)

    delta = delta.permute(1, 2, 3, 0).contiguous().view(4,
                                                        -1).data.cpu().numpy()
    score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1),
                      dim=0).data[1, :].cpu().numpy()

    delta[0, :] = delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0]
    delta[1, :] = delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1]
    delta[2, :] = np.exp(delta[2, :]) * p.anchor[:, 2]
    delta[3, :] = np.exp(delta[3, :]) * p.anchor[:, 3]

    def change(r):
        return np.maximum(r, 1. / r)

    def sz(w, h):
        pad = (w + h) * 0.5
        sz2 = (w + pad) * (h + pad)
        return np.sqrt(sz2)

    def sz_wh(wh):
        pad = (wh[0] + wh[1]) * 0.5
        sz2 = (wh[0] + pad) * (wh[1] + pad)
        return np.sqrt(sz2)

    # size penalty
    s_c = change(sz(delta[2, :], delta[3, :]) /
                 (sz_wh(target_sz)))  # scale penalty
    r_c = change((target_sz[0] / target_sz[1]) /
                 (delta[2, :] / delta[3, :]))  # ratio penalty

    penalty = np.exp(-(r_c * s_c - 1.) * p.penalty_k)
    pscore = penalty * score
    # window float
    pscore = pscore * (1 - p.window_influence) + window * p.window_influence

    #background penalty
    inspect_num = 100
    top_score = score.argsort()[-inspect_num:][::-1]

    temp_result = np.zeros((inspect_num, 4), dtype=int)
    fg_result = np.zeros(inspect_num)
    score_result = np.zeros(inspect_num)
    original_score = np.zeros(inspect_num)
    all_scores = []
    for i in range(0, inspect_num):
        target = delta[:, top_score[i]] / scale_z
        res_x = target[0] + target_pos[0]
        res_y = target[1] + target_pos[1]
        res_w = target[2]
        res_h = target[3]

        res = cxy_wh_2_rect(np.array([res_x, res_y]), np.array([res_w, res_h]))
        res = [int(l) for l in res]

        temp = [res[0], res[1], (res[0] + res[2]), (res[1] + res[3])]
        res = [np.clip(temp[0], 0, next_mask.shape[1]-1),\
               np.clip(temp[1], 0, next_mask.shape[0]-1),\
               np.clip(temp[2], 0, next_mask.shape[1]-1),\
               np.clip(temp[3], 0, next_mask.shape[0]-1)]
        res[2] = res[2] - res[0]
        res[3] = res[3] - res[1]
        #IOU with groundtruth
        iou = calculate_iou([res[0], res[1], res[0] + res[2], res[1] + res[3]],
                            bbox_format(gtbbox, 'tlxy_wh_2_rect'))
        #score
        bbox_mask = np.zeros(next_mask.shape)
        bbox_mask[(res[1]):(res[1] + res[3]), (res[0]):(res[0] + res[2])] = 1
        intersection = np.count_nonzero(np.logical_and(next_mask, bbox_mask))
        fg_result[i] = float(intersection) / float(res[3] * res[2])

        #conf score
        conf_intersection = np.count_nonzero(
            np.logical_and(conf_mask, bbox_mask))
        conf_score = float(conf_intersection) / float(res[3] * res[2])
        #size_penalty
        size_penalty_score = np.exp(r_c[top_score[i]])
        #scale_penalty
        scale_penalty_score = np.exp(s_c[top_score[i]])
        #cosine score
        cosine_score = window[top_score[i]]
        temp_result[i, :] = res
        original_score[i] = score[top_score[i]]
        score_result[i] = np.exp(
            -(r_c[top_score[i]] * s_c[top_score[i]] - 1.) * p.penalty_k)
        scores = [
            original_score[i], fg_result[i], conf_score, size_penalty_score,
            scale_penalty_score, cosine_score
        ]
        all_scores.append(scores)
        #This is probably the input to your network
        #original_score[i]) , fg_result[i]) , conf_score , size_penalty_score , scale_penalty_score , cosine_score
        #iou is the groundtruth iou
    x = Variable(torch.from_numpy(np.array(all_scores)).float().cuda())
    y = net(x).data.cpu().numpy()
    #pick the bounding box with largest network score

    #visualize top ten box
    """
    for j in range(9,-1,-1):
        cv2.rectangle(im,  (box x0, box y0), (box x1, box y1), (255,255, 0), 3)
    

    target_pos = np.array([box x0 + ((box x1 - box x0)/2), box y0 + ((box y1 - box y0)/2)])
    target_sz = np.array([(box x1 - box x0),(box y1 - box y0)])
    alternative = []
    return target_pos, target_sz, score_result[top_ids[0]], alternative
    """
    # score_result = score_result * score[top_score]
    # #score_result = 0.1248*original_score + 0.4754*fg_result + 0.1267*score_result
    # score_result = score_result * (1 - p.window_influence) + window[top_score] * p.window_influence

    # top_ids = score_result.argsort()[-10:][::-1]
    # font = cv2.FONT_HERSHEY_SIMPLEX
    # best = []
    # for j in range(9,-1,-1):
    #     cv2.putText(im,"%.2f" % fg_result[top_ids[j]],(20,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     cv2.putText(im,"%.2f" % original_score[top_ids[j]],(220,20+j*10), font, 0.4,(0,0,0), 1,cv2.LINE_AA)
    #     res = temp_result[top_ids[j]]
    #     if(original_score[top_ids[j]] > 0.75):
    #         cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,255, 0), 3)
    #     else:
    #         cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,0, 0), 3)
    #     #cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     #cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0,0, 255), 3)
    chosen_id = np.argmax(y)
    target_pos = np.array([
        temp_result[chosen_id, 0] + (temp_result[chosen_id, 2] / 2),
        temp_result[top_ids[0], 1] + (temp_result[top_ids[0], 3] / 2)
    ])
    target_sz = np.array(
        [temp_result[chosen_id, 2], temp_result[chosen_id, 3]])
    alternative = []
    return target_pos, target_sz, score_result[chosen_id], alternative