Ejemplo n.º 1
0
def track_video(model, video):
    toc, regions = 0, []
    image_files, gt = video['image_files'], video['gt']
    for f, image_file in enumerate(image_files):
        im = cv2.imread(image_file)  # TODO: batch load
        tic = cv2.getTickCount()
        if f == 0:  # init
            target_pos, target_sz = rect_2_cxy_wh(gt[f])
            state = SiamRPN_init(im, target_pos, target_sz,
                                 model)  # init tracker
            location = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            regions.append(gt[f])
        elif f > 0:  # tracking
            state = SiamRPN_track(state, im)  # track
            location = cxy_wh_2_rect(state['target_pos'] + 1,
                                     state['target_sz'])
            regions.append(location)
        toc += cv2.getTickCount() - tic

        if args.visualization and f >= 0:  # visualization
            if f == 0:
                cv2.destroyAllWindows()
            if len(gt[f]) == 8:
                cv2.polylines(im,
                              [np.array(gt[f], np.int).reshape(
                                  (-1, 1, 2))], True, (0, 255, 0), 3)
            else:
                cv2.rectangle(im, (gt[f, 0], gt[f, 1]),
                              (gt[f, 0] + gt[f, 2], gt[f, 1] + gt[f, 3]),
                              (0, 255, 0), 3)
            if len(location) == 8:
                cv2.polylines(im, [location.reshape((-1, 1, 2))], True,
                              (0, 255, 255), 3)
            else:
                location = [int(l) for l in location]  #
                cv2.rectangle(
                    im, (location[0], location[1]),
                    (location[0] + location[2], location[1] + location[3]),
                    (0, 255, 255), 3)
            cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 255), 2)

            cv2.imshow(video['name'], im)
            cv2.waitKey(1)
    toc /= cv2.getTickFrequency()

    # save result
    video_path = join('../test', args.dataset, 'SiamRPN_AlexNet_OTB2015')
    if not isdir(video_path): makedirs(video_path)
    result_path = join(video_path, '{:s}.txt'.format(video['name']))
    with open(result_path, "w") as fin:
        for x in regions:
            fin.write(','.join([str(i) for i in x]) + '\n')

    print('({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps'.format(
        v_id, video['name'], toc, f / toc))
    return f / toc
Ejemplo n.º 2
0
    def on_tracking(self):
        # warm up
        for i in range(10):
            self.net.template(
                torch.autograd.Variable(torch.FloatTensor(1, 3, 127,
                                                          127)).cuda())
            self.net(
                torch.autograd.Variable(torch.FloatTensor(1, 3, 255,
                                                          255)).cuda())

        i = 1
        pred_bbx = self.gt_first
        print("{}th frame: {} {} {} {}".format(i, pred_bbx[0], pred_bbx[1],
                                               pred_bbx[2], pred_bbx[3]))
        cx, cy, w, h = pred_bbx[0] + pred_bbx[2] / 2.0, pred_bbx[
            1] + pred_bbx[3] / 2.0, pred_bbx[2], pred_bbx[3]
        i += 1

        target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
        im = cv2.imread(self.path_seq + '/imgs/0001.jpg')  # HxWxC
        state = SiamRPN_init(im, target_pos, target_sz,
                             self.net)  # init tracker

        while i <= self.num_frames:
            self.index_frame = i
            im = cv2.imread(self.path_seq + '/imgs/' + str(i).zfill(4) +
                            '.jpg')
            state = SiamRPN_track(state, im)

            # convert cx, cy, w, h into rect
            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            print(f"{i}th frame: ", res)
            i += 1
Ejemplo n.º 3
0
def main():
    # load net
    net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model')
    net = SiamRPNBIG()
    net.load_state_dict(torch.load(net_file))
    net.eval().cuda()

    # warm up
    for i in range(10):
        net.temple(
            torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

    # start to track
    handle = vot.VOT("polygon")
    Polygon = handle.region()
    cx, cy, w, h = get_axis_aligned_bbox(Polygon)

    image_file = handle.frame()
    if not image_file:
        sys.exit(0)

    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    im = cv2.imread(image_file)  # HxWxC
    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    while True:
        image_file = handle.frame()
        if not image_file:
            break
        im = cv2.imread(image_file)  # HxWxC
        state = SiamRPN_track(state, im)  # track
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])

        handle.report(Rectangle(res[0], res[1], res[2], res[3]))
Ejemplo n.º 4
0
def siam_track(image, img_name, txt_path, states, labels, roi):
    x_min, y_min = roi[0], roi[1]
    x_max, y_max = roi[2], roi[3]
    # img_h, img_w = image.shape[0], image.shape[1]
    result_state = []
    for state in states:
        state_o = SiamRPN_track(state, image)  # track
        result_state.append(state_o)
    txt = os.path.join(txt_path, img_name.replace('.jpg', '.txt'))
    file_handle = open(txt, 'w')
    mess = ''
    del_list = []
    for i, state in enumerate(result_state):
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(l) for l in res]
        if (res[0] + res[2]) > x_max or (
                res[1] + res[3]) > y_max or res[0] < x_min or res[1] < y_min:
            del_list.append(i)
        else:
            cv2.rectangle(image, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 0, 255), 2)
            cv2.putText(image, labels[i], (res[0], res[1] - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            mess += f'{(res[0])} {res[1]} {res[0]+res[2]} {res[1]+res[3]} {labels[i]} \n'
    file_handle.write(mess)
    for i in reversed(del_list):
        del result_state[i]
        del labels[i]
    return result_state, image, labels
Ejemplo n.º 5
0
 def track(self, image_file):
     image = cv2.imread(image_file)
     self.state = SiamRPN_track(self.state, image)  # track
     center = self.state["target_pos"] + 1
     target_sz = self.state["target_sz"]
     box = cxy_wh_2_rect(center, target_sz)
     return box
Ejemplo n.º 6
0
def main(imagedir, gtdir):
    # load net
    net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model')
    net = SiamRPNBIG()
    net.load_state_dict(torch.load(net_file))
    net.eval().cuda()

    # warm up
    for i in range(10):
        net.temple(
            torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

    # start to track
    # get the first frame groundtruth
    gt_file = os.path.join(gtdir, 'gt.txt')
    with open(gt_file, 'r') as f:
        lines = f.readlines()
    gt = []
    for line in lines:
        line = line.split(' ')
        gt.append([int(float(x)) for x in line])
    init_bbox = gt[0]  # top-left x y,w,h
    target_pos, target_sz = rect_2_cxy_wh(
        init_bbox)  # top-left x y,w,h --> center x y,w,h

    image_list = glob.glob(os.path.join(imagedir, '*.jpg'))
    image_list.sort()
    im = cv2.imread(image_list[0])  # HxWxC

    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    bboxes = []
    for i in range(1, len(gt)):
        im = cv2.imread(image_list[i])  # HxWxC
        state = SiamRPN_track(state, im)  # track
        res = cxy_wh_2_rect(
            state['target_pos'],
            state['target_sz'])  # center x y,w,h --> top-left x y,w,h
        bboxes.append(res.tolist())

    _, precision, precision_auc, iou = _compile_results(gt[1:], bboxes)
    print(' -- Precision ' + "(20 px)"  + ': ' + "%.2f" % precision +\
            ' -- Precision AUC: ' + "%.2f" % precision_auc + \
            ' -- IOU: ' + "%.2f" % iou + ' --')

    isSavebbox = True
    if isSavebbox:
        print('saving bbox...')
        res_bbox_file = os.path.join('results_bbox.json')
        json.dump(bboxes, open(res_bbox_file, 'w'), indent=2)

    isSavevideo = True
    if isSavevideo:
        print('saving video...')
        save_video(image_list, bboxes)
    print('done')
Ejemplo n.º 7
0
def tracking(im, state, cap):
    """Return cx, cy"""
    state = SiamRPN_track(state, im)  # track
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
    cx, cy = res[0] + res[2] / 2, res[1] + res[3] / 2
    res = [int(l) for l in res]
    cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                  (0, 255, 255), 3)
    cv2.imshow('SiamRPN', im)
    cap.write(im)
    cv2.waitKey(1)
    return cx, cy
Ejemplo n.º 8
0
def main():

    vid_file = os.path.expanduser("~/Videos/VID_20190327_195111.mp4")

    cap = cv2.VideoCapture(vid_file)

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
    net.eval().cuda()

    # # image and init box
    # image_files = sorted(glob.glob('./bag/*.jpg'))
    init_rbox = [
        334.02, 128.36, 438.19, 188.78, 396.39, 260.83, 292.23, 200.41
    ]
    [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    ret, im = cap.read()

    state = SiamRPN_init(im, target_pos, target_sz, net, use_gpu=True)

    toc = 0
    while (True):
        # Capture frame-by-frame
        ret, im = cap.read()

        tic = cv2.getTickCount()
        state = SiamRPN_track(state, im, use_gpu=True)  # track
        toc += cv2.getTickCount() - tic
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(l) for l in res]
        cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                      (0, 255, 255), 3)
        cv2.imshow('SiamRPN', im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 9
0
net.eval().cuda()

# image and init box
image_files = sorted(glob.glob('./testData/*.jpg'))
init_rbox = [3641, 1778, 3810, 1778, 3810, 2313, 3641, 2313]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net, args.model)

# tracking and visualization
toc = 0
for f, image_file in enumerate(image_files):
    im = cv2.imread(image_file)
    # print(im.shape)
    tic = cv2.getTickCount()
    state = SiamRPN_track(state, im)  # track
    toc += cv2.getTickCount() - tic
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
    res = [int(l) for l in res]
    # print(res)
    cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                  (0, 255, 255), 3)
    cv2.imshow('SiamRPN', im)
    cv2.waitKey(1)

print('Tracking Speed {:.1f}fps'.format(
    (len(image_files) - 1) / (toc / cv2.getTickFrequency())))
Ejemplo n.º 10
0
def process_video(net, groundtruth_path, image_path, out_video):
    print('processing sequence', out_video)
    with open(groundtruth_path) as f:
        groundtruth = f.readlines()

    groundtruth = [x.rstrip() for x in groundtruth]

    image_filenames = [
        y for x in walk(image_path) for y in glob(join(x[0], '*.jpg'))
    ]
    image_filenames.sort()

    assert len(image_filenames) == len(groundtruth)

    image = cv2.imread(image_filenames[0])
    height, width = image.shape[:2]
    writer = cv2.VideoWriter(out_video,
                             cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 15,
                             (width, height))

    if not writer.isOpened():
        print('Failed to open video')
        return

    # VOT sequence
    # polygon_ = parse_region(groundtruth[0])
    # cx, cy, w, h = get_axis_aligned_bbox(polygon_)
    # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])

    polygon = [float(x) for x in groundtruth[0].split(',')]
    target_pos, target_sz = np.array(
        [polygon[0] + polygon[2] / 2,
         polygon[1] + polygon[3] / 2]), np.array([polygon[2], polygon[3]])
    state = SiamRPN_init(image, target_pos, target_sz, net)  # init tracker

    for i in range(len(image_filenames)):
        image = cv2.imread(image_filenames[i])
        polygon = [float(x) for x in groundtruth[i].split(',')]
        polygon = [int(x) for x in polygon]

        # VOT sequence
        # cv2.line(image, (polygon[0], polygon[1]), (polygon[2], polygon[3]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[2], polygon[3]), (polygon[4], polygon[5]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[4], polygon[5]), (polygon[6], polygon[7]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[6], polygon[7]), (polygon[0], polygon[1]), (0, 0, 255), 2)

        cv2.rectangle(image, (polygon[0], polygon[1]),
                      (polygon[0] + polygon[2], polygon[1] + polygon[3]),
                      (0, 0, 255), 2)

        # Start timer
        timer = cv2.getTickCount()

        state = SiamRPN_track(state, image)  # track
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(x) for x in res]

        cv2.rectangle(image, (res[0], res[1]),
                      (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2)

        # Calculate Frames per second (FPS)
        fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)

        cv2.rectangle(image, (res[0], res[1]),
                      (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2)

        # Display tracker type on frame
        cv2.putText(image, "SiamRPN", (50, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75,
                    (230, 170, 50), 2)

        # Display FPS on frame
        cv2.putText(image, "FPS : " + str(int(fps)), (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (230, 170, 50), 2)

        writer.write(image)

    writer.release()
Ejemplo n.º 11
0
def showImage(subscriber, camera_matrix, kcf_tracker_h):
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start

    flag = 1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('loading model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')

    rospy.Subscriber(subscriber, Image, callback)

    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(50)
    while not rospy.is_shutdown():
        if getim:
            getim = False
            ## !
            d_info = DetectionInfo()
            d_info.frame = 0
            ## !

            if start is False and init is True:
                target_pos = np.array([int((x1 + x2) / 2), int((y1 + y2) / 2)])
                target_sz = np.array([int(x2 - x1), int(y2 - y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
            if start is True:
                state = SiamRPN_track(state, image)  # track
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                ## !
                depth = kcf_tracker_h / state['target_sz'][1] * camera_matrix[
                    1, 1]
                cx = state['target_pos'][0] - image.shape[1] / 2
                cy = state['target_pos'][1] - image.shape[0] / 2
                d_info.position[0] = depth * cx / camera_matrix[0, 0]
                d_info.position[1] = depth * cy / camera_matrix[1, 1]
                d_info.position[2] = depth
                d_info.sight_angle[0] = cx / (image.shape[1] / 2) * math.atan(
                    (image.shape[1] / 2) / camera_matrix[0, 0])
                d_info.sight_angle[1] = cy / (image.shape[0] / 2) * math.atan(
                    (image.shape[0] / 2) / camera_matrix[1, 1])
                d_info.detected = True
                ## !

                cv2.putText(image, str(state['score']),
                            (res[0] + res[2], res[1] + res[3]),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)

                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
            if flag_lose is True:
                cv2.putText(image, 'target lost', (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                ## !
                d_info.detected = False
            if drawing is True:
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

            cx = int(image.shape[1] / 2)
            cy = int(image.shape[0] / 2)
            cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2)
            cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2)
            ## !
            pub.publish(d_info)
            cv2.imshow('image', image)
            cv2.waitKey(1)

        rate.sleep()
Ejemplo n.º 12
0
def tracker_eval_record_data(net, x_crop, target_pos, target_sz, window,
                             scale_z, p, im, next_mask, conf_mask, index_1,
                             index_2, frame_num, data_dir, gtbbox):
    delta, score = net(x_crop)

    delta = delta.permute(1, 2, 3, 0).contiguous().view(4,
                                                        -1).data.cpu().numpy()
    score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1),
                      dim=0).data[1, :].cpu().numpy()

    delta[0, :] = delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0]
    delta[1, :] = delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1]
    delta[2, :] = np.exp(delta[2, :]) * p.anchor[:, 2]
    delta[3, :] = np.exp(delta[3, :]) * p.anchor[:, 3]

    def change(r):
        return np.maximum(r, 1. / r)

    def sz(w, h):
        pad = (w + h) * 0.5
        sz2 = (w + pad) * (h + pad)
        return np.sqrt(sz2)

    def sz_wh(wh):
        pad = (wh[0] + wh[1]) * 0.5
        sz2 = (wh[0] + pad) * (wh[1] + pad)
        return np.sqrt(sz2)

    # size penalty
    s_c = change(sz(delta[2, :], delta[3, :]) /
                 (sz_wh(target_sz)))  # scale penalty
    r_c = change((target_sz[0] / target_sz[1]) /
                 (delta[2, :] / delta[3, :]))  # ratio penalty

    penalty = np.exp(-(r_c * s_c - 1.) * p.penalty_k)
    pscore = penalty * score
    # window float
    pscore = pscore * (1 - p.window_influence) + window * p.window_influence

    #background penalty
    inspect_num = 100
    top_score = score.argsort()[-inspect_num:][::-1]

    temp_result = np.zeros((inspect_num, 4), dtype=int)
    fg_result = np.zeros(inspect_num)
    score_result = np.zeros(inspect_num)
    original_score = np.zeros(inspect_num)
    all_scores = []
    for i in range(0, inspect_num):
        target = delta[:, top_score[i]] / scale_z
        res_x = target[0] + target_pos[0]
        res_y = target[1] + target_pos[1]
        res_w = target[2]
        res_h = target[3]

        res = cxy_wh_2_rect(np.array([res_x, res_y]), np.array([res_w, res_h]))
        res = [int(l) for l in res]

        temp = [res[0], res[1], (res[0] + res[2]), (res[1] + res[3])]
        res = [np.clip(temp[0], 0, next_mask.shape[1]-1),\
               np.clip(temp[1], 0, next_mask.shape[0]-1),\
               np.clip(temp[2], 0, next_mask.shape[1]-1),\
               np.clip(temp[3], 0, next_mask.shape[0]-1)]
        res[2] = res[2] - res[0]
        res[3] = res[3] - res[1]
        #IOU with groundtruth
        iou = calculate_iou([res[0], res[1], res[0] + res[2], res[1] + res[3]],
                            bbox_format(gtbbox, 'tlxy_wh_2_rect'))
        #score
        bbox_mask = np.zeros(next_mask.shape)
        bbox_mask[(res[1]):(res[1] + res[3]), (res[0]):(res[0] + res[2])] = 1
        intersection = np.count_nonzero(np.logical_and(next_mask, bbox_mask))
        fg_result[i] = float(intersection) / float(res[3] * res[2])

        #conf score
        conf_intersection = np.count_nonzero(
            np.logical_and(conf_mask, bbox_mask))
        conf_score = float(conf_intersection) / float(res[3] * res[2])
        #size_penalty
        size_penalty_score = np.exp(r_c[top_score[i]])
        #scale_penalty
        scale_penalty_score = np.exp(s_c[top_score[i]])
        #cosine score
        cosine_score = window[top_score[i]]
        temp_result[i, :] = res
        original_score[i] = score[top_score[i]]
        score_result[i] = np.exp(
            -(r_c[top_score[i]] * s_c[top_score[i]] - 1.) * p.penalty_k)
        scores = [
            original_score[i], fg_result[i], conf_score, size_penalty_score,
            scale_penalty_score, cosine_score
        ]
        all_scores.append(scores)
        #This is probably the input to your network
        #original_score[i]) , fg_result[i]) , conf_score , size_penalty_score , scale_penalty_score , cosine_score
        #iou is the groundtruth iou
    x = Variable(torch.from_numpy(np.array(all_scores)).float().cuda())
    y = net(x).data.cpu().numpy()
    #pick the bounding box with largest network score

    #visualize top ten box
    """
    for j in range(9,-1,-1):
        cv2.rectangle(im,  (box x0, box y0), (box x1, box y1), (255,255, 0), 3)
    

    target_pos = np.array([box x0 + ((box x1 - box x0)/2), box y0 + ((box y1 - box y0)/2)])
    target_sz = np.array([(box x1 - box x0),(box y1 - box y0)])
    alternative = []
    return target_pos, target_sz, score_result[top_ids[0]], alternative
    """
    # score_result = score_result * score[top_score]
    # #score_result = 0.1248*original_score + 0.4754*fg_result + 0.1267*score_result
    # score_result = score_result * (1 - p.window_influence) + window[top_score] * p.window_influence

    # top_ids = score_result.argsort()[-10:][::-1]
    # font = cv2.FONT_HERSHEY_SIMPLEX
    # best = []
    # for j in range(9,-1,-1):
    #     cv2.putText(im,"%.2f" % fg_result[top_ids[j]],(20,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     cv2.putText(im,"%.2f" % original_score[top_ids[j]],(220,20+j*10), font, 0.4,(0,0,0), 1,cv2.LINE_AA)
    #     res = temp_result[top_ids[j]]
    #     if(original_score[top_ids[j]] > 0.75):
    #         cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,255, 0), 3)
    #     else:
    #         cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,0, 0), 3)
    #     #cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA)
    #     #cv2.rectangle(im,  (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0,0, 255), 3)
    chosen_id = np.argmax(y)
    target_pos = np.array([
        temp_result[chosen_id, 0] + (temp_result[chosen_id, 2] / 2),
        temp_result[top_ids[0], 1] + (temp_result[top_ids[0], 3] / 2)
    ])
    target_sz = np.array(
        [temp_result[chosen_id, 2], temp_result[chosen_id, 3]])
    alternative = []
    return target_pos, target_sz, score_result[chosen_id], alternative
def get_object_center(q, detect_class):

    # classes:
    # 1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
    # 6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
    # 11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
    # 16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_bboxes(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_bboxes = []
        for i in range(number_classes):
            object_bbox = dict()
            object_bbox['i'] = i
            object_bbox['class'] = rclasses[i]
            object_bbox['y_min'] = rbboxes[i, 0]
            object_bbox['x_min'] = rbboxes[i, 1]
            object_bbox['y_max'] = rbboxes[i, 2]
            object_bbox['x_max'] = rbboxes[i, 3]
            object_bboxes.append(object_bbox)
        return object_bboxes

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(
            join(realpath(dirname(__file__)),
                 '../DaSiamRPN-master/code/SiamRPNVOT.model')))

    net.eval()

    # open video capture
    video = cv2.VideoCapture(0)

    if not video.isOpened():
        print("Could not open video")
        sys.exit()

    index = True
    while index:

        # Read first frame.
        ok, frame = video.read()
        if not ok:
            print('Cannot read video file')
            sys.exit()

        # Define an initial bounding box
        height = frame.shape[0]
        width = frame.shape[1]

        rclasses, rscores, rbboxes = process_image(frame)

        bboxes = get_bboxes(rclasses, rbboxes)
        for bbox in bboxes:
            if bbox['class'] == detect_class:
                print(bbox)
                ymin = int(bbox['y_min'] * height)
                xmin = int((bbox['x_min']) * width)
                ymax = int(bbox['y_max'] * height)
                xmax = int((bbox['x_max']) * width)
                cx = (xmin + xmax) / 2
                cy = (ymin + ymax) / 2
                h = ymax - ymin
                w = xmax - xmin
                new_bbox = (cx, cy, w, h)
                print(new_bbox)
                index = False
                break

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(frame, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    count_number = 0

    while True:

        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break

        # Start timer
        tic = cv2.getTickCount()

        # Update tracker
        state = SiamRPN_track(state, frame)  # track
        # print(state)

        toc += cv2.getTickCount() - tic

        if state:

            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            res = [int(l) for l in res]

            cv2.rectangle(frame, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3)

            count_number += 1
            # set object_center
            object_center = dict()
            object_center['x'] = state['target_pos'][0] / width
            object_center['y'] = state['target_pos'][1] / height
            q.put(object_center)

            if (not state) or count_number % 40 == 3:
                # Tracking failure
                cv2.putText(frame, "Tracking failure detected", (100, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                index = True
                while index:
                    ok, frame = video.read()
                    rclasses, rscores, rbboxes = process_image(frame)
                    bboxes = get_bboxes(rclasses, rbboxes)
                    for bbox in bboxes:
                        if bbox['class'] == detect_class:
                            ymin = int(bbox['y_min'] * height)
                            xmin = int(bbox['x_min'] * width)
                            ymax = int(bbox['y_max'] * height)
                            xmax = int(bbox['x_max'] * width)
                            cx = (xmin + xmax) / 2
                            cy = (ymin + ymax) / 2
                            h = ymax - ymin
                            w = xmax - xmin
                            new_bbox = (cx, cy, w, h)
                            target_pos, target_sz = np.array(
                                [cx, cy]), np.array([w, h])
                            state = SiamRPN_init(frame, target_pos, target_sz,
                                                 net)

                            p1 = (int(xmin), int(ymin))
                            p2 = (int(xmax), int(ymax))
                            cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)

                            index = 0

                            break

        # 调整图片大小
        resized_frame = cv2.resize(frame,
                                   None,
                                   fx=0.65,
                                   fy=0.65,
                                   interpolation=cv2.INTER_AREA)
        # 水平翻转图片(为了镜像显示)
        horizontal = cv2.flip(resized_frame, 1, dst=None)

        # 显示图片
        cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL)
        cv2.imshow('SSD+SiamRPN', horizontal)

        # Exit if ESC pressed
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    video.release()
    cv2.destroyAllWindows()
Ejemplo n.º 14
0
def showImage():
    
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start
    rospy.init_node('RPN', anonymous=True)
    
    flag=1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('laoding model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')
    
    rospy.Subscriber('/camera/rgb/image_raw', Image, callback)
    pub = rospy.Publisher('/vision/target', Pose, queue_size=10) 
    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(30)
    i = 1
    t = time.time()
    fps = 0
    while not rospy.is_shutdown():
      
        if getim:
            t1 = time.time()
            idd = readid(image)
            
            pose = Pose()
            pose.position.z = 0
            
            if start is False and init is True:
                target_pos = np.array([int((x1+x2)/2), int((y1+y2)/2)])
                target_sz = np.array([int(x2-x1), int(y2-y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
                
            if start is True:
            
                state = SiamRPN_track(state, image)  # track              
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2)
                pose.position.x = (state['target_pos'][0]-image.shape[1]/2) / (image.shape[1]/2)
                pose.position.y = (state['target_pos'][1]-image.shape[0]/2) / (image.shape[0]/2)
                cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255,255,0), 1)
                pose.position.z = 1
                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
                    
            if flag_lose is True:
                    cv2.putText(image, 'target is lost!', (200,200), cv2.FONT_HERSHEY_SIMPLEX , 2, (255,0,0), 3)
                    pose.position.z = -1
                   
            if drawing is True:              
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            cv2.putText(image, '#'+str(idd), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            cx = int(image.shape[1]/2)
            cy = int(image.shape[0]/2)
            cv2.line(image,(cx-20,cy), (cx+20, cy), (255, 255, 255), 2)
            cv2.line(image,(cx, cy-20), (cx, cy+20), (255, 255, 255), 2)
            
            pub.publish(pose)
            
            if start is True:    
               
                i = i + 1
            if i > 5:
                i = 1
                fps = 5 / (time.time()-t)
                t = time.time()
            cv2.putText(image, 'fps='+str(fps), (200,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            
            cv2.imshow('image', image)
            cv2.waitKey(1)
            getim = False

        rate.sleep()
Ejemplo n.º 15
0
def test(score):

    net = SiamRPNvot()
    net.load_state_dict(
        torch.load('/home/traker_hao/code/learn/train_RPN/model/30.model'))
    net.eval().cuda()

    version_name = 'jiasu'

    sequence_path = '/media/traker_hao/data/dataset/UAV1/sequences'
    init_path = '/media/traker_hao/data/dataset/UAV1/annotations'
    result_path = '/home/traker_hao/result/visdrone/' + version_name
    if os.path.exists(result_path) is False:
        os.mkdir(result_path)

    sequence_names = os.listdir(sequence_path)
    random.shuffle(sequence_names)
    #sequence_names.sort()
    i = 0
    for sequence_name in sequence_names:
        print(sequence_name)
        #if sequence_name != 'Suv':
        #continue
        #sequence_name='uav0000054_00000_s'
        imagenames = os.listdir(sequence_path + '/' + sequence_name)
        imagenames.sort()
        print(i)
        i = i + 1
        print(sequence_path + '/' + sequence_name)
        f = open(
            result_path + '/' + sequence_name + '_' + version_name + '.txt',
            'w')
        inited = False
        fp = open(init_path + '/' + sequence_name + '.txt')
        j = 0
        for imagename in imagenames:
            j = j + 1
            image = cv2.imread(sequence_path + '/' + sequence_name + '/' +
                               imagename)
            #init the tracker
            if inited is False:
                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                [cx, cy, w,
                 h] = (int(data[0]) + int(data[2]) // 2,
                       int(data[1]) + int(data[3]) // 2, int(data[2]),
                       int(data[3]))
                #f.write(str(annos[0]['bbox'][0])+','+str(annos[0]['bbox'][1])+','+str(annos[0]['bbox'][2])+','+str(annos[0]['bbox'][3])+','+str(1.00)+'\n')
                f.write(data[0] + ',' + data[1] + ',' + data[2] + ',' +
                        data[3] + '\n')
                target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                inited = True

                cv2.rectangle(image,
                              (int(cx) - int(w) // 2, int(cy) - int(h) // 2),
                              (int(cx) + int(w) // 2, int(cy) + int(h) // 2),
                              (0, 255, 0), 3)
                cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                cv2.putText(image, 'initing...', (100, 100), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                image2 = cv2.resize(image, (960, 540))
                cv2.imshow('aa2', image2)
                cv2.waitKey(1)
            else:

                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                try:
                    truth = (int(data[0]),
                             int(data[1]), int(data[0]) + int(data[2]),
                             int(data[1]) + int(data[3]))
                except:
                    truth = [0, 0, 0, 0]

                #update the tracker
                #print([cx, cy, w, h])
                tic = cv2.getTickCount()
                t1 = time.time()
                state = SiamRPN_track(state, image)  # track
                #state['target_sz'] = np.array( [int(data[2]), int(data[3])] )

                toc = (cv2.getTickCount() - tic) / cv2.getTickFrequency()
                #print(1/toc)
                #mytracker.target_sz = np.array([int(truth[2]),int(truth[3])])
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                #visualize the result

                cv2.rectangle(image, (int(truth[0]), int(truth[1])),
                              (int(truth[2]), int(truth[3])), (0, 255, 0), 2)
                #mytracker.target_sz=np.array([int(data[2]),int(data[3])])
                #cv2.putText(image, str(iou), (res[0] + res[2], res[1] + res[3]), 0, 5e-3*200, (0,255,0), 2)
            cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
            image2 = cv2.resize(image, (960, 540))
            cv2.imshow('aa2', image2)
            if cv2.waitKey(1) == 97:
                break
            #if j>209:
            #cv2.waitKey(0)

        f.close()
Ejemplo n.º 16
0
# for i in range(10):
#     net.temple(torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) # selonsy:cuda()表示使用GPU进行计算,FloatTensor(1, 3, 127, 127):浮点型四维张量
#     net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

# start to track
handle = vot.VOT("polygon")
Polygon = handle.region() # region:将配置消息发送到客户端并接收初始化区域和第一个图像的路径。其返回值为初始化区域。
cx, cy, w, h = get_axis_aligned_bbox(Polygon) # get_axis_aligned_bbox:将坐标数据转换成 RPN 的格式

image_file = handle.frame() # frame 函数从客户端获取帧(图像路径)
if not image_file:
    sys.exit(0)

target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_file)  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker,SiamRPN_init:构造状态结构体并运行模板分支
# 从第一帧开始跟踪,表示很奇怪,难道直接给定的不准确么?   # selonsy:改进点
while True: # 进入跟踪循环
    image_file = handle.frame()
    if not image_file:
        break
    im = cv2.imread(image_file)  # HxWxC
    state = SiamRPN_track(state, im)  # track,SiamRPN_track:运行检测分支并更新状态变量
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) # cxy_wh_2_rect:将坐标转换成矩形框的表示形式

    handle.report(Rectangle(res[0], res[1], res[2], res[3])) # report:将跟踪结果报告给客户端

print(handle.result)
print(handle.frames)

del handle