Exemple #1
0
def track(sess, run_opts, hp, run, design, image_, pos_x, pos_y, target_w,
          target_h, x_sz, scale_factors, final_score_sz, penalty, filename,
          image, templates_z, templates_z_, scores):
    scaled_search_area = x_sz * scale_factors
    scaled_target_w = target_w * scale_factors
    scaled_target_h = target_h * scale_factors

    image_, scores_ = sess.run(
        [image, scores],
        feed_dict={
            siam.pos_x_ph: pos_x,
            siam.pos_y_ph: pos_y,
            siam.x_sz0_ph: scaled_search_area[0],
            siam.x_sz1_ph: scaled_search_area[1],
            siam.x_sz2_ph: scaled_search_area[2],
            templates_z: np.squeeze(templates_z_),
            image: image_,
        },
        **run_opts)
    scores_ = np.squeeze(scores_)
    # penalize change of scale
    scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
    scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
    # find scale with highest peak (after penalty)
    new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
    # update scaled sizes
    x_sz = (1 - hp.scale_lr
            ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
    target_w = (1 - hp.scale_lr
                ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
    target_h = (1 - hp.scale_lr
                ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
    # select response with new_scale_id
    score_ = scores_[new_scale_id, :, :]

    my_score = score_

    # normalized scores
    score_ = score_ - np.min(score_)
    score_ = score_ / np.sum(score_)

    # apply displacement penalty
    score_ = (1 - hp.window_influence) * score_ + hp.window_influence * penalty
    pos_x, pos_y = _update_target_position(pos_x, pos_y, score_,
                                           final_score_sz, design.tot_stride,
                                           design.search_sz, hp.response_up,
                                           x_sz)

    bbox = pos_x, pos_y, target_w, target_h
    if run.visualization:
        # convert <cx,cy,w,h> to <x,y,w,h> and save output
        bbox_d = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
        show_frame(image_, bbox_d, 1)

    p = np.asarray(np.unravel_index(np.argmax(score_), np.shape(score_)))
    #print("Score bbox(%i,%i) %s max=%f(%f) at scale %d at %s" % (
    #    pos_x, pos_y, str(my_score.shape), np.max(my_score), np.max(score_), new_scale_id, str(p)))

    return image_, bbox, np.max(score_), new_scale_id, x_sz
Exemple #2
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, start_frame):
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames,4))

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
    # cosine window to penalize large displacements    
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context*(target_w+target_h)
    z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    # with tf.Session() as sess:
    #     tf.global_variables_initializer().run()
    #     # Coordinate the loading of image files.
    #     coord = tf.train.Coordinator()
    #     threads = tf.train.start_queue_runners(coord=coord)
    if True: # for replacing the sess.run()
        
        # save first frame position (from ground-truth)
        bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h                

        # image_, templates_z_ = sess.run([image, templates_z], feed_dict={
        #                                                                 siam.pos_x_ph: pos_x,
        #                                                                 siam.pos_y_ph: pos_y,
        #                                                                 siam.z_sz_ph: z_sz,
        #                                                                 filename: frame_name_list[0]})
        image_, templates_z_ = siam.get_template_z(pos_x, pos_y, z_sz, frame_name_list[0], design)
        new_templates_z_ = templates_z_

        t_start = time.time()

        # Get an image from the queue
        for i in range(1, num_frames):
            if i % 10 == 0:
                print('frame: %d' % (i + 1))

            scaled_exemplar = z_sz * scale_factors  
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors

            image_, scores_ = siam.get_scores(pos_x, pos_y, scaled_search_area, templates_z_, frame_name_list[i], design, final_score_sz)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:]
            scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1,2)))
            
            
            # update scaled sizes
            x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id]        
            target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id]
            target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id,:,:]
            score_ = score_ - np.min(score_)
            score_ = score_/np.sum(score_)
            # apply displacement penalty
            score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            # update the target representation with a rolling average
            if hp.z_lr>0:
                # new_templates_z_ = sess.run([templates_z], feed_dict={
                #                                                 siam.pos_x_ph: pos_x,
                #                                                 siam.pos_y_ph: pos_y,
                #                                                 siam.z_sz_ph: z_sz,
                #                                                 image: image_
                #                                                 })
                _, new_templates_z_ = siam.get_template_z(pos_x, pos_y, z_sz, image_, design)

                templates_z_ = (1 - hp.z_lr) * templates_z_ + hp.z_lr * new_templates_z_
            
            # update template patch size
            z_sz = (1 - hp.scale_lr) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]
            
            if run.visualization:
                show_frame(image_, bboxes[i,:], 1)

        t_elapsed = time.time() - t_start
        speed = num_frames / t_elapsed

    plt.close('all')

    return bboxes, speed
Exemple #3
0
def tracker_v2(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,  image, templates_z, scores, start_frame,  path_ckpt, siamNet):
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames,4))

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
    # cosine window to penalize large displacements    
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context*(target_w+target_h)
    z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context)))#(w +2p)*(h+2p)
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}
    saver = tf.train.Saver() 
    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        saver.restore(sess, path_ckpt)
        print("Model restored......")
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        
        # save first frame position (from ground-truth)
        bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h  


        z_image = cv2.imread(frame_name_list[0])
        """
        cv2.namedWindow('image', cv2.WINDOW_NORMAL)               
        cv2.rectangle(z_image, (int(pos_x-target_w/2), int(pos_y-target_h/2)), (int( pos_x+target_w/2), int(pos_y+target_h/2)), (255,0,0), 2)
        cv2.imshow('image',z_image)
                
        cv2.waitKey(0)
        """
        #z_image = cv2.resize(z_image, (resize_width,resize_height))
        
        image_, templates_z_ = sess.run([image, templates_z], feed_dict={
                                                                        siamNet.batched_pos_x_ph: [pos_x],
                                                                        siamNet.batched_pos_y_ph: [pos_y],
                                                                        siamNet.batched_z_sz_ph: [z_sz],
                                                                        image: [z_image / 255.  - 0.5]})
        new_templates_z_ = templates_z_

        t_start = time.time()
        
        # Get an image from the queue
        for i in range(1, num_frames):        
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors
            x_image = cv2.imread(frame_name_list[i])

            #x_image = cv2.resize(x_image, (resize_width,resize_height))

            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siamNet.batched_pos_x_ph: [pos_x],
                    siamNet.batched_pos_y_ph: [pos_y],
                    siamNet.batched_x_sz0_ph: [scaled_search_area[0]],
                    siamNet.batched_x_sz1_ph: [scaled_search_area[1]],
                    siamNet.batched_x_sz2_ph: [scaled_search_area[2]],
                    templates_z: np.squeeze(templates_z_),
                    image: [x_image / 255. - 0.5],
                }, **run_opts)
            """
            plt.imshow(np.squeeze(scores_[0]), cmap = 'gray')
            plt.show()
            plt.pause(5)
            """
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:]
            scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1,2)))
            # update scaled sizes
            x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id]        
            target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id]
            target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id,:,:]
            score_ = score_ - np.min(score_)
            score_ = score_/np.sum(score_)
            # apply displacement penalty
            score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty
            """
            plt.imshow(np.squeeze(score_), cmap = 'gray')
            plt.show()
            plt.pause(5)
            """
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            # update the target representation with a rolling average
           
            if hp.z_lr>0:
                new_templates_z_ = sess.run([templates_z], feed_dict={
                                                                siamNet.batched_pos_x_ph: [pos_x],
                                                                siamNet.batched_pos_y_ph: [pos_y],
                                                                siamNet.batched_z_sz_ph: [z_sz],
                                                                image: image_
                                                                })

                templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_)
            
            # update template patch size
            z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id]
            
            if run.visualization:
                show_frame((image_[0] + 0.5) * 255 , bboxes[i,:], 1)        
        
        t_elapsed = time.time() - t_start
        speed = num_frames/t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads) 

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')
    
    return bboxes, speed
Exemple #4
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image,
            templates_z, scores, start_frame, candidate_scores):
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames, 4))

    scale_factors = hp.scale_step ** np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num)
    # cosine window to penalize large displacements    
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        bboxes[0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h

        image_, templates_z_ = sess.run([image, templates_z], feed_dict={
            siam.pos_x_ph: pos_x,
            siam.pos_y_ph: pos_y,
            siam.z_sz_ph: z_sz,
            filename: frame_name_list[0]})
        new_templates_z_ = templates_z_

        t_start = time.time()

        # Get an image from the queue
        for i in range(1, num_frames):
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors
            image_, scores_, candidate_scores_ = sess.run(
                [image, scores, candidate_scores],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    templates_z: np.squeeze(templates_z_),
                    filename: frame_name_list[i],
                }, **run_opts)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            x_sz = (1 - hp.scale_lr) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
            target_w = (1 - hp.scale_lr) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
            target_h = (1 - hp.scale_lr) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - hp.window_influence) * score_ + hp.window_influence * penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride,
                                                   design.search_sz, hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            # update the target representation with a rolling average
            if hp.z_lr > 0:
                new_templates_z_ = sess.run([templates_z], feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.z_sz_ph: z_sz,
                    image: image_
                })

                templates_z_ = (1 - hp.z_lr) * np.asarray(templates_z_) + hp.z_lr * np.asarray(new_templates_z_)

            # update template patch size
            z_sz = (1 - hp.scale_lr) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]

            if run.visualization:
                show_frame(image_, bboxes[i, :], 1)

        t_elapsed = time.time() - t_start
        speed = num_frames / t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads)

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed
Exemple #5
0
def main_camera():
    cam = cv2.VideoCapture(0)
    if not cam.isOpened():
        exit()

    bboxes = np.zeros((10, 4))

    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph_cam(
        final_score_sz, design, env)

    ret, frame = cam.read()
    print(frame.dtype)
    roi = get_roi(frame)
    pos_x, pos_y, target_w, target_h = convert_roi(roi[0][0], roi[0][1])
    # pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    run_opts = {}

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        # coord = tf.train.Coordinator()
        # threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        bboxes[
            0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h

        # TODO: convert roi[0] to the silly siam format
        image_, templates_z_ = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y,
                siam.z_sz_ph: z_sz,
                image: frame
            })
        new_templates_z_ = templates_z_

        t_start = time.time()
        num_frames = 0

        # Get an image from the queue
        while True:
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors
            ret, frame = cam.read()
            num_frames += 1
            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    templates_z: np.squeeze(templates_z_),
                    image: frame,
                },
                **run_opts)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            x_sz = (1 - hp.scale_lr
                    ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
            target_w = (
                1 - hp.scale_lr
            ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
            target_h = (
                1 - hp.scale_lr
            ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - hp.window_influence
                      ) * score_ + hp.window_influence * penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_,
                                                   final_score_sz,
                                                   design.tot_stride,
                                                   design.search_sz,
                                                   hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            out = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            # out = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            # update the target representation with a rolling average

            if hp.z_lr > 0:
                new_templates_z_ = sess.run(
                    [templates_z],
                    feed_dict={
                        siam.pos_x_ph: pos_x,
                        siam.pos_y_ph: pos_y,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })

                templates_z_ = (1 - hp.z_lr) * np.asarray(
                    templates_z_) + hp.z_lr * np.asarray(new_templates_z_)

            # update template patch size
            z_sz = (1 - hp.scale_lr
                    ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]
            key = 0
            if run.visualization:
                key = show_frame(image_, out)

            t_elapsed = time.time() - t_start
            speed = num_frames / t_elapsed
            if key == 120:
                print("Speed", speed)
                sess.close()
                cv2.destroyAllWindows()
                exit()
Exemple #6
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame, video_name, frame_sz, z_crops, x_crops, anchor_coord):
    num_frames = np.size(frame_name_list) - start_frame
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames,4))
    reinitialize = False
    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
    # cosine window to penalize large displacements    
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context*(target_w+target_h)
    z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    #detector settings
    options = {"model": "/home/mdinh/siamfc-tf/cfg/yolo-mio.cfg", "pbLoad": "/home/mdinh/siamfc-tf/built_graph/yolo-mio.pb", "metaLoad": "/home/mdinh/siamfc-tf/built_graph/yolo-mio.meta", "gpu": 0.4, "threshold": 0.4}

    tfnet = TFNet(options)
    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        
        # save first frame position (from ground-truth)
        bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h                

        image_, templates_z_ = sess.run([image, templates_z], feed_dict={
                                                                        siam.pos_x_ph: pos_x,
                                                                        siam.pos_y_ph: pos_y,
                                                                        siam.z_sz_ph: z_sz,
                                                                        filename: frame_name_list[start_frame]})
        new_templates_z_ = templates_z_

        t_start = time.time()

        # Get an image from the queue
        for i in range(1, num_frames):        
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors
            image_, scores_, x_crops_ , anchor_coord__= sess.run(
                [image, scores, x_crops, anchor_coord],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    templates_z: np.squeeze(templates_z_),
                    filename: frame_name_list[i + start_frame],
                }, **run_opts)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:]
            scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1,2)))
            # update scaled sizes
            x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id]        
            target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id]
            target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id]
            #print("target size -------------")
            #print([target_h, target_w])

            #print("target sipatch size -------------")
            #print(z_sz)
            # select response with new_scale_id
            score_ = scores_[new_scale_id,:,:]

            max_score = score_.max()
            min_score = score_.min()
            score_augmentation = np.full((257, 257), float(0))

            #run detector for drift

            if abs(pos_x) > frame_sz[1] or abs(pos_y) > frame_sz[0] or pos_x < 0 or pos_y < 0:
                #print('OOB')
                OOB = True
                reinitialize = True
                best_detection = []


            else:
                OOB = False
                bbox_crop = []
                best_detection = []


            if OOB == False:
                result_crop = tfnet.return_predict(x_crops_[new_scale_id, :, :])
            #print (result_crop)


            if result_crop:
                maxdetection_crop = max(result_crop, key=lambda x: x['confidence'])



                for detection in result_crop:
                    bbox_crop.append([detection['topleft']['x'], detection['topleft']['y'],
                                          detection['bottomright']['x'] - detection['topleft']['x'],
                                          detection['bottomright']['y'] - detection['topleft']['y']])
                    best_crop = ([maxdetection_crop['topleft']['x'], maxdetection_crop['topleft']['y'],
                                      maxdetection_crop['bottomright']['x'] - maxdetection_crop['topleft']['x'],
                                      maxdetection_crop['bottomright']['y'] - maxdetection_crop['topleft']['y']])

                    peak_x = (maxdetection_crop['bottomright']['x'] + maxdetection_crop['topleft']['x']) / 4
                    peak_y = (maxdetection_crop['bottomright']['y'] + maxdetection_crop['topleft']['y'])  / 4


                    if peak_x in range(100,200) and peak_y in range(100,200):
                        reinitialize = True

                        # generate an augmentation map
                        score_augmentation = np.full((257, 257), float(min_score))
                        for x in range(peak_x - 5, peak_x + 5):
                            for y in range(peak_y - 5, peak_y + 5):
                                score_augmentation[y, x] = max_score

                    bbox_frame = [ element * scaled_search_area[new_scale_id] / design.search_sz for element in best_crop ]
                    bbox_frame[0] = best_crop[0] + anchor_coord__[new_scale_id, 1]
                    bbox_frame[1] = best_crop[1] + anchor_coord__[new_scale_id, 0]

                    #print(anchor_coord__[new_scale_id, 0])
                    #print(anchor_coord__[new_scale_id, 1])

                    #pos_x = 2 * peak_x * scaled_search_area[new_scale_id] / 512
                    #pos_y = 2 * peak_y * scaled_search_area[new_scale_id] / 512

                    #generate an augmentation map




                    target_h = (bbox_frame[3] + target_h) / 2
                    target_w =  (bbox_frame[2] + target_w) / 2

                    #print(scale_factors[new_scale_id])

                    #print("size in crop -----------")
                    #print([best_crop[3], best_crop[2]])
                    #print("size in frame -----------")
                    #print([target_h_scaled, target_w_scaled])
                    #print("scaled size -------------")
                    #print([target_h, target_w])






            #update score map


            score_updated = (hp.scale_lr)*score_ + (1-hp.scale_lr) *score_augmentation
            score_updated = score_updated - np.min(score_updated)
            score_updated = score_updated / np.sum(score_updated)
            # apply displacement penalty
            score_updated = (1 - hp.window_influence) * score_updated + hp.window_influence * penalty
            #print("old position -----------")
            #print([pos_x, pos_y])
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_updated, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz)

            #print("updated position -----------")
            #print([pos_x, pos_y])
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            bbox_detection = [0, 0, 0, 0]
            #print (bboxes[i,:])
            #print (frame_sz)


            # run detector on whole frame if out of bounds

            if OOB == True:
                OOB = False
                result = tfnet.return_predict(image_)




                #print ([sorted([object['confidence'] for object in result])])
                if len(result) > 0:
                    maxdetection = max(result, key=lambda x: x['confidence'])

                   #mindetection = min(result, key=lambda x: x['confidence'])
                    for detection in result:
                        bbox_detection.append([detection['topleft']['x'],detection['topleft']['y'], detection['bottomright']['x'] - detection['topleft']['x'], detection['bottomright']['y'] - detection['topleft']['y']])

                    best_detection = ([maxdetection['topleft']['x'], maxdetection['topleft']['y'],
                                      maxdetection['bottomright']['x'] - maxdetection['topleft']['x'],
                                      maxdetection['bottomright']['y'] - maxdetection['topleft']['y']])



                #print("frame number: " + str(i))
                #print (bbox_detection[0])
                #print (bboxes[i,:])
                #print (bbox_crop)

                # if best_detection:
                #     iou = utils.iou(best_detection, bboxes[i, :])
                #     scale = utils.scale(best_detection, bboxes[i, :])
                #     print (iou)
                #     print(scale)
                #
                #     if iou > 0 and iou < 0.5:
                #         print("drift")
                #         reinitialize = True
                #         bboxes[i, :] = np.asarray(best_detection)
                #         pos_x = (maxdetection['topleft']['x'] + maxdetection['bottomright']['x'])/2
                #         pos_y = (maxdetection['bottomright']['y'] + maxdetection['topleft']['y'])/2
                #
                #
                #         print(pos_x)
                #         print (pos_y)

                #TODO reinitialize when tracker collides with edge with detection closest to the last position



            # update the target representation with a rolling average
            if hp.z_lr>0:
                new_templates_z_ = sess.run([templates_z], feed_dict={
                                                                siam.pos_x_ph: pos_x,
                                                                siam.pos_y_ph: pos_y,
                                                                siam.z_sz_ph: z_sz,
                                                                image: image_
                                                                })

                #if best_crop:

                if reinitialize == True:
                    #print(reinitialize)

                    # assign new target height and width
                    if best_detection and OOB == True:
                        OOB = False
                        bboxes[i, :] = best_detection
                        target_h = best_detection[3]
                        target_w = best_detection[2]
                        pos_x = best_detection[0] + best_detection[2]/2
                        pos_y = best_detection[1] + best_detection[3]/2


                    context = design.context * (target_w + target_h)
                    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
                    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

                    # thresholds to saturate patches shrinking/growing
                    min_z = hp.scale_min * z_sz
                    max_z = hp.scale_max * z_sz
                    min_x = hp.scale_min * x_sz
                    max_x = hp.scale_max * x_sz

                    templates_z_ = np.asarray(new_templates_z_)

                    reinitialize = False
                else:
                    templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_)
                    #print(reinitialize)






            # update template patch size
            z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id]
            
            if run.visualization:
                show_frame(image_, bboxes[i,:],bbox_detection, i, video_name,1)
                #show_crops(x_crops_, best_crop,i,video_name, 3)
                #show_scores(scores_,1)
                #show_score(score_,i, video_name,1)
                #show_score(score_augmentation, i, video_name,2)
                #show_score(score_updated,i, video_name, 3)


        #end of loop
        t_elapsed = time.time() - t_start
        speed = num_frames/t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads) 

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed
def cozmo_program(robot: cozmo.robot.Robot):
	
	global angle
	angle = 25.
	robot.set_head_angle(degrees(angle)).wait_for_completed()
	robot.set_lift_height(0.0).wait_for_completed()
	robot.camera.image_stream_enabled = True
	robot.camera.color_image_enabled = True
	robot.camera.enable_auto_exposure = True
	
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	
	frame = os.path.join(directory, "current.jpeg")

	print("Starting Tensorflow...")
	
	with tf.Session() as sess:
		print("Session successfully started")
		model = load_model('modelv1.07-0.96.hdf5')		
		while True:
			global X, Y, W, H
			global result
			X = 245.
			Y = 165.
			W = 150.
			H = 150.
			
			gt = [X, Y, W, H]
			pos_x, pos_y, target_w, target_h = region_to_bbox(gt)
			frame = os.path.join(directory, "current.jpeg")
			result = 0
			dog_counter = 0
			cat_counter = 0
			background_counter = 0
			next_state = 0
			current_state = 0 #Background: 0, Cat:1, Dog:2
			while True:
				latest_img = robot.world.latest_image
				if latest_img is not None:
					pilImage = latest_img.raw_image
					pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") 
				show_frame(np.asarray(Image.open(frame)), [900.,900.,900.,900.], 1)
				img = load_image(frame)
				[result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\
										   ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img})
				next_state = np.argmax(result)
				print('Arg max: ',next_state)
				
				# Initial Current State is Background
				if current_state == 0: 
					print('Background')
					if next_state == 1: # Detected a Cat
						current_state = 1   # Transition to Cat State
						background_counter = 0
						cat_counter = 1
						dog_counter = 0
					elif next_state == 2: # Detected a Dog
						current_state = 2   # Transition to Dog state
						background_counter = 0
						cat_counter = 0
						dog_counter = 1
				# Current State is Cat
				elif current_state == 1: 
					print('\t\t\t\t\t\tCat')
					if next_state == 0:   # Detected Background
						background_counter += 1
						if background_counter >= 6:  # Transition to Background only if Background appeared for more than 6 times
							background_counter = 0
							current_state = 0
							cat_counter = 0
					elif next_state == 1: # Detected Cat itself
						cat_counter +=1
						if cat_counter >= 30:
							print('Cozmo sees a cat')
							dense = model.get_layer('dense').get_weights()
							weights = dense[0].T
							
							testing_counter = 0
							detected_centroid = 0
							xmin_avg = 0
							xmax_avg = 0
							ymin_avg = 0
							ymax_avg = 0
							frame_average = 2
							frame_count = 0
							while True:
								latest_img = robot.world.latest_image
								if latest_img is not None:
									pilImage = latest_img.raw_image
									pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG")
								img = load_image(frame)
								[result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\
														   ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img})
								
								kernels = out_relu.reshape(7,7,1280)
								final = np.dot(kernels,weights[result[0].argmax()])
								final1 = array_to_img(final.reshape(7,7,1))
								final1 = final1.resize((224,224), Image.ANTIALIAS)
								box = img_to_array(final1).reshape(224,224)
								#box = cv2.blur(box,(30,30))
								temp = (box > box.max()*.8) *1 
								
								temp_adjusted = np.ndarray(shape=np.shape(temp), dtype=np.dtype(np.uint8))
								temp_adjusted[:,:] = np.asarray(temp)*255
								contours, hierarchy = cv2.findContours(temp_adjusted, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2:]
								contours = np.array(contours)
								max_area = [0,0] # contours index and area
								for index, contour in enumerate(contours):
									if(max_area[1]< len(contour)):
										max_area = [index,len(contour)]
									
								contours_adjusted = contours[max_area[0]].squeeze(axis=1).T
								
								xmin = contours_adjusted[0].min() * (640./224.)
								ymin = contours_adjusted[1].min() * (480./224.)
								xmax = contours_adjusted[0].max() * (640./224.)
								ymax = contours_adjusted[1].max() * (480./224.)
																
								if result[0].argmax() == 1:
									
									# Frame smoothing
									frame_count = frame_count + 1
									xmin_avg = xmin_avg + xmin
									xmax_avg = xmax_avg + xmax
									ymin_avg = ymin_avg + ymin
									ymax_avg = ymax_avg + ymax
									
									if frame_count % frame_average == 0:
										frame_count = 0
										xmin_avg = xmin_avg/frame_average
										xmax_avg = xmax_avg/frame_average
										ymin_avg = ymin_avg/frame_average
										ymax_avg = ymax_avg/frame_average
										
										print(xmin_avg, end=",")
										print(ymin_avg, end=",")
										print(xmax_avg, end=",")
										print(ymax_avg, end="\n")
										ymin_avg = ymin_avg + (ymax_avg - ymin_avg)/2. - H/2.
										xmin_avg = xmin_avg + (xmax_avg - xmin_avg)/2. - W/2.
										print("150: ",xmin_avg, end=",")
										print("150: ",ymin_avg, end="\n")
										gt = [xmin_avg, ymin_avg, W, H]
										xmin_avg = 0
										xmax_avg = 0
										ymin_avg = 0
										ymax_avg = 0
										
										pos_x, pos_y, target_w, target_h = region_to_bbox(gt)
										bboxes = np.zeros((1, 4))
										#bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
										bboxes[0,:] = pos_x-W/2, pos_y-H/2, W, H
										print(len(contours))
										testing_counter = testing_counter + 1
										print("Testing_counter: ",testing_counter)
										show_frame(np.asarray(Image.open(frame)), gt, 1)
										print("Cat is detected")								
									
										print("Starting the tracker ...")
										if (bboxes[0,1] + bboxes[0,3]/2) < (Y + H/2 - 40):
											print("Command: Raise the head")
											angle = angle + 0.5
											if angle > 44.5:
												angle = 44.5
										elif (bboxes[0,1] + bboxes[0,3]/2) > (Y + H/2 + 40):
											print("Command: Lower the head")
											angle = angle - 0.5
											if angle < 0:
												angle = 0
										else:
											pass
										
										set_head_angle_action = robot.set_head_angle(degrees(angle), max_speed=20, in_parallel=True)
										
										if straight(bboxes[0,:])[0] != 0 and turn(bboxes[0,:])[0] != 0:
											robot.drive_wheel_motors(straight(bboxes[0,:])[0] + turn(bboxes[0,:])[0], straight(bboxes[0,:])[1] + turn(bboxes[0,:])[1])
											detected_centroid = 0
										elif straight(bboxes[0,:])[0] == 0 and turn(bboxes[0,:])[0] == 0:
											robot.stop_all_motors()
											detected_centroid = detected_centroid + 1
										elif straight(bboxes[0,:])[0] == 0:
											robot.drive_wheel_motors(turn(bboxes[0,:])[0], turn(bboxes[0,:])[1])
											detected_centroid = 0
										elif turn(bboxes[0,:])[0] == 0:
											robot.drive_wheel_motors(straight(bboxes[0,:])[0], straight(bboxes[0,:])[1])
											detected_centroid = 0
										else:
											robot.stop_all_motors()
											detected_centroid = detected_centroid + 1
										
										if detected_centroid > 20//frame_average:
											detected_centroid = 0
											print("Reached a stable state.........\t\t\t\t\t\t\t\t STABLE")
											
											# Go near the object
											
											set_head_angle_action.wait_for_completed()
											robot.abort_all_actions(log_abort_messages=True)
											robot.wait_for_all_actions_completed()
											robot.set_head_angle(degrees(0.5)).wait_for_completed()
											print("Robot's head angle: ",robot.head_angle)
											target_frame_count = 1
											while True:
												latest_img = None
												while latest_img is None:
													latest_img = robot.world.latest_image
												target_frame1 = latest_img.raw_image
												target_frame1 = target_frame1.resize((640,480), Image.ANTIALIAS)
												#target_frame1 = target_frame1.convert('L')
												target_frame1 = np.asarray(target_frame1)
												#orb1 = cv2.ORB_create(500)
												#kp1 = orb1.detect(target_frame1,None)
												#kp1, des1 = orb1.compute(target_frame1, kp1)
												#features_img1 = cv2.drawKeypoints(target_frame1, kp1, None, color=(255,0,0), flags=0)
												#plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",features_img1)
												plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",target_frame1)
											
												drive_straight_action = robot.drive_straight(distance=cozmo.util.distance_mm(distance_mm=10),speed=cozmo.util.speed_mmps(10), in_parallel=True)
												drive_straight_action.wait_for_completed()
												robot.set_head_angle(degrees(0.5)).wait_for_completed()
												print("Robot's head angle: ",robot.head_angle)
												latest_img = None
												while latest_img is None:
													latest_img = robot.world.latest_image
												target_frame2 = latest_img.raw_image
												target_frame2 = target_frame2.resize((640,480), Image.ANTIALIAS)
												#target_frame2 = target_frame2.convert('L')
												target_frame2 = np.asarray(target_frame2)
												#orb2 = cv2.ORB_create(500)
												#kp2 = orb2.detect(target_frame2,None)
												#kp2, des2 = orb2.compute(target_frame2, kp2)
												#features_img2 = cv2.drawKeypoints(target_frame2, kp2, None, color=(255,0,0), flags=0)
												#plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",features_img2)
												plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",target_frame2)
												target_frame_count = target_frame_count + 1
												'''
												matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
												matches = matcher.match(des1, des2, None)
												
												matches.sort(key=lambda x: x.distance, reverse=False)
												matches = matches[:10]
												imMatches = cv2.drawMatches(target_frame1, kp1, target_frame2, kp2, matches, None)
												cv2.imwrite("matches_tf1_tf2.jpg", imMatches)
												
												points1 = np.zeros((len(matches), 2), dtype=np.float32)
												points2 = np.zeros((len(matches), 2), dtype=np.float32)

												for i, match in enumerate(matches):
													points1[i, :] = kp1[match.queryIdx].pt
													points2[i, :] = kp2[match.trainIdx].pt
													print("Points1 [{}]: {}".format(i,points1[i][0]), points1[i][1],"\tPoints2: ",points2[i][0], points2[i][1]) 
												index = None
												dist1_x = []
												dist2_x = []
												for index in range(len(points1)):
													dist1_x.append((W/2.)-points1[index][0]) # Extract only the x-coordinate
													dist2_x.append((W/2.)-points2[index][0]) # Extract only the x-coordinate
																							
												fw_x = 1./((1./np.array(dist2_x)) - (1./np.array(dist1_x))) # Calculate the image plane to obj plane mapping in x direction
												
												pt1_x = []
												pt2_x = []
												for index in range(len(points1)):
													pt1_x.append(fw_x[index]/(W/2. - points1[index][0])) 
													pt2_x.append(fw_x[index]/(W/2. - points2[index][0]))
													print("Approx. distance[{}]: {}".format(index, pt1_x[index]))
												if len(pt2_x) < 10:
													break
												'''
											sys.exit(0)
											
					else:				   # Detected Dog
						dog_counter += 1
						if dog_counter >= 6:  # Transition to Dog only if Dog appeared for more than 6 times
							cat_counter = 0
							current_state = 2
				# Current State is Dog
				elif current_state == 2:
					print('\t\t\t\t\t\t\t\t\t\t\t\tDog')
					if next_state == 0:	 # Detected Background
						background_counter += 1
						if background_counter >= 6:  # Transition to Background only if Background appeared for more than 6 times
							background_counter = 0
							current_state = 0
							dog_counter = 0 
					elif next_state == 2:   # Detected Dog itself
						dog_counter +=1
						if dog_counter >= 30:
							print('Cozmo sees a Dog')
							robot.drive_wheels(-50, -50)
							time.sleep(3)
							robot.drive_wheels(70, -70)
							time.sleep(2.8)  
							robot.drive_wheels(0, 0)						
							break 
					else:				   # Detected Cat
						cat_counter += 1
						if cat_counter >= 6:  # Transition to Cat only if Cat appeared for more than 6 times
							dog_counter = 0
							current_state = 1			
Exemple #8
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h,
            final_score_sz, filename, image, templates_z, scores, start_frame):

    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames, 4))
    bboxesupper = np.zeros((num_frames, 4))
    bboxeslower = np.zeros((num_frames, 4))

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        bboxes[
            0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
        bboxesupper[
            0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h / 2
        bboxeslower[0, :] = pos_x - target_w / 2, pos_y, target_w, target_h / 2

        image_, templates_z_ = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y,
                siam.z_sz_ph: z_sz,
                filename: frame_name_list[0]
            })
        image_, templates_z_upper = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y - target_h / 2,
                siam.z_sz_ph: z_sz,
                filename: frame_name_list[0]
            })
        image_, templates_z_lower = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y + target_h / 2,
                siam.z_sz_ph: z_sz,
                filename: frame_name_list[0]
            })
        new_templates_z_ = templates_z_
        new_templates_z_upper = templates_z_upper
        new_templates_z_lower = templates_z_lower

        t_start = time.time()
        sco_final = np.zeros((3, 257, 257))
        # Get an image from the queue
        distance_transform = dt2d
        for i in range(1, num_frames):
            for j in range(1, 4):
                scaled_exemplar = z_sz * scale_factors
                scaled_search_area = x_sz * scale_factors
                scaled_target_w = target_w * scale_factors
                scaled_target_h = target_h * scale_factors
                image_, scores_ = sess.run(
                    [image, scores],
                    feed_dict={
                        siam.pos_x_ph:
                        pos_x,
                        #siam.pos_y_ph: pos_y,
                        siam.pos_y_ph:
                        pos_y - target_h / 2 if j == 1 else
                        (pos_y + target_h / 2 if j == 2 else pos_y),
                        siam.x_sz0_ph:
                        scaled_search_area[0],
                        siam.x_sz1_ph:
                        scaled_search_area[1],
                        siam.x_sz2_ph:
                        scaled_search_area[2],
                        templates_z:
                        np.squeeze(templates_z_upper) if j == 1 else
                        (np.squeeze(templates_z_lower)
                         if j == 2 else np.squeeze(templates_z_)),
                        filename:
                        frame_name_list[i],
                    },
                    **run_opts)

                if j == 1:
                    templates_zupper = np.squeeze(templates_z_upper)
                    templates_zupper = tf.convert_to_tensor(
                        templates_zupper, np.float32)
                elif j == 2:
                    templates_zlower = np.squeeze(templates_z_lower)
                    templates_zlower = tf.convert_to_tensor(
                        templates_zlower, np.float32)
                else:
                    templates_zmain = np.squeeze(templates_z_)
                    templates_zmain = tf.convert_to_tensor(
                        templates_zmain, np.float32)

                scores_ = np.squeeze(scores_)
                # penalize change of scale
                scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
                scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
                # find scale with highest peak (after penalty)
                new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
                # update scaled sizes
                x_sz = (
                    1 - hp.scale_lr
                ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
                target_w = (
                    1 - hp.scale_lr
                ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
                target_h = (
                    1 - hp.scale_lr
                ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
                # select response with new_scale_id
                score_ = scores_[new_scale_id, :, :]
                score_ = score_ - np.min(score_)
                score_ = score_ / np.sum(score_)
                # apply displacement penalty
                #score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty
                min1 = score_.min()
                max1 = score_.max()
                #score_max = skimage.measure.block_reduce(score_, (5,5), np.max)

                #score_max=ndimage.distance_transform_edt(score_)
                w = [0.1, 0, 0.1, 0]
                score_ = (score_ / score_.max()) * 255
                score_max = distance_transform(score_, w, 4)
                score_max = (((score_max - min1) *
                              (score_max.max() - score_max.min())) /
                             (max1 - min1)) + score_max.min()

                #score_max_norm = Image.fromarray(score_max)

                new_width = 257
                new_height = 257
                #sco = score_max_norm.resize((new_width,new_height),Image.ANTIALIAS)
                #sco = resize(int(score_max), (257, 257))
                sco = score_max
                sco_final[j - 1, :, :] = sco

            #####################################################################################
            sco_f = sco_final[0, :, :] + sco_final[1, :, :] + sco_final[
                2, :, :]
            #sco_f = sco_final[0,:,:]
            pos_x, pos_y = _update_target_position(pos_x, pos_y, sco_f,
                                                   final_score_sz,
                                                   design.tot_stride,
                                                   design.search_sz,
                                                   hp.response_up, x_sz)
            pos_x_upper, pos_y_upper = _update_target_position(
                pos_x, pos_y, sco_final[0, :, :], final_score_sz,
                design.tot_stride, design.search_sz, hp.response_up, x_sz)
            pos_x_lower, pos_y_lower = _update_target_position(
                pos_x, pos_y, sco_final[1, :, :], final_score_sz,
                design.tot_stride, design.search_sz, hp.response_up, x_sz)

            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[
                i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            bboxesupper[
                i, :] = pos_x_upper - target_w / 2, pos_y_upper - target_h / 2, target_w, target_h / 2
            bboxeslower[
                i, :] = pos_x_lower - target_w / 2, pos_y_lower, target_w, target_h / 2

            # update the target representation with a rolling average
            if hp.z_lr > 0:
                new_templates_z_ = sess.run(
                    [templates_zmain],
                    feed_dict={
                        siam.pos_x_ph: pos_x,
                        siam.pos_y_ph: pos_y,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })
                new_templates_z_upper = sess.run(
                    [templates_zupper],
                    feed_dict={
                        siam.pos_x_ph: pos_x_upper,
                        siam.pos_y_ph: pos_y_upper,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })
                new_templates_z_lower = sess.run(
                    [templates_zlower],
                    feed_dict={
                        siam.pos_x_ph: pos_x_lower,
                        siam.pos_y_ph: pos_y_lower,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })

                templates_z_ = (1 - hp.z_lr) * np.asarray(
                    templates_z_) + hp.z_lr * np.asarray(new_templates_z_)
                templates_z_upper = (1 - hp.z_lr) * np.asarray(
                    templates_z_upper) + hp.z_lr * np.asarray(
                        new_templates_z_upper)
                templates_z_lower = (1 - hp.z_lr) * np.asarray(
                    templates_z_lower) + hp.z_lr * np.asarray(
                        new_templates_z_lower)

            # update template patch size
            z_sz = (1 - hp.scale_lr
                    ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]

            if run.visualization:
                show_frame(image_, bboxes[i, :], bboxesupper[i, :],
                           bboxeslower[i, :], 1)

        t_elapsed = time.time() - t_start
        speed = num_frames / t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads)

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed
Exemple #9
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h,
            final_score_sz, image, templates_z, scores, path_ckpt, siamNet):
    """
        run the tracking steps under tensorflow session.
        
        Inputs:
            hp, run, design: system parameters.
            
            frame_name_list: a list of paths for all frames in the tracking vedio.
            
            pos_x, pos_y, target_w, target_h: target position and size in the 
                first frame from ground thruth, will be updated during tracking.
            
            final_score_sz: size of the final score map after bilinear interpolation.
            
            image, templates_z, scores: tensors that will be run in tensorflow session.
                See siamese.py for detailed explanation.
                
            path_ckpt: path of the checkpoint file used to retore model variables.
            
            siamNet: an instance of siamese network class.
            
        Returns:
            bboxes: a list of the predicted bboxes
            
            speed: average tracking speed(fps)
    """
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames, 4))

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod(
        (target_w + context) * (target_h + context)))  #(w +2p)*(h+2p)
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}
    saver = tf.train.Saver()
    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        saver.restore(sess, path_ckpt)
        print("Model restored from: ", path_ckpt)
        print("Start tracking......")
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        bboxes[
            0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h

        # Read the first frame as z, and input into conv net to get its feature map
        z_image = cv2.imread(frame_name_list[0])
        image_, templates_z_ = sess.run(
            [image, templates_z],
            feed_dict={
                siamNet.batched_pos_x_ph: [pos_x],
                siamNet.batched_pos_y_ph: [pos_y],
                siamNet.batched_z_sz_ph: [z_sz],
                image: [z_image / 255. * 2 - 1]
            })
        new_templates_z_ = templates_z_

        t_start = time.time()

        # Get an image from the queue
        for i in range(1, num_frames):
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors

            # Read the next frame as x, input x into conv net, with the featre
            # map of z to get the final score map
            x_image = cv2.imread(frame_name_list[i])
            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siamNet.batched_pos_x_ph: [pos_x],
                    siamNet.batched_pos_y_ph: [pos_y],
                    siamNet.batched_x_sz0_ph: [scaled_search_area[0]],
                    siamNet.batched_x_sz1_ph: [scaled_search_area[1]],
                    siamNet.batched_x_sz2_ph: [scaled_search_area[2]],
                    templates_z: np.squeeze(templates_z_),
                    image: [x_image / 255. * 2 - 1],
                },
                **run_opts)

            # visualize the output score map
            """
            plt.imshow(np.squeeze(scores_[0]), cmap = 'gray')
            plt.show()
            plt.pause(5)
            """

            #finalize the score map
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            x_sz = (1 - hp.scale_lr
                    ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
            target_w = (
                1 - hp.scale_lr
            ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
            target_h = (
                1 - hp.scale_lr
            ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - hp.window_influence
                      ) * score_ + hp.window_influence * penalty

            # visualize the finalized score map
            """
            plt.imshow(np.squeeze(score_), cmap = 'gray')
            plt.show()
            plt.pause(5)
            """

            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_,
                                                   final_score_sz,
                                                   design.tot_stride,
                                                   design.search_sz,
                                                   hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[
                i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            # update the target representation with a rolling average

            if hp.z_lr > 0:
                new_templates_z_ = sess.run(
                    [templates_z],
                    feed_dict={
                        siamNet.batched_pos_x_ph: [pos_x],
                        siamNet.batched_pos_y_ph: [pos_y],
                        siamNet.batched_z_sz_ph: [z_sz],
                        image: image_
                    })

                templates_z_ = (1 - hp.z_lr) * np.asarray(
                    templates_z_) + hp.z_lr * np.asarray(new_templates_z_)

            # update template patch size
            z_sz = (1 - hp.scale_lr
                    ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]

            if run.visualization:
                show_frame(x_image, bboxes[i, :], 1)

        t_elapsed = time.time() - t_start
        speed = num_frames / t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads)

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed
Exemple #10
0
def tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h,
            final_score_sz, image, templates_z, scores, process1, queue):
    # num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    # bboxes = np.zeros((num_frames,4))
    bboxes = []

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    ## model
    # model_path = '../frozen_inference_graph.pb'
    # odapi = DetectorAPI(path_to_ckpt=model_path)

    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        # bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
        frame_idx = 1
        # in_bytes = process1.stdout.read(width * height * 3)
        # if not in_bytes :
        #     print ("none")
        #     return
        # video = (np.frombuffer(in_bytes, np.unit8).reshape([height, width, 3]))
        # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)
        # box = odapi.processFrame(video, frame_idx)
        # pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3]
        # image = tf.convert_to_tensor(image)
        print(image, type(image), '*' * 10)
        image_, templates_z_ = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y,
                siam.z_sz_ph: z_sz,
                image: video
            })
        new_templates_z_ = templates_z_
        # print ('start time: ')
        # t_start = time.time()
        while True:
            frame_idx += 1

            # in_bytes = process1.stdout.read(width * height * 3)
            # if not in_bytes :
            #     print ("none")
            #     continue
            # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]))
            video = queue.get()
            # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)
            # t_start = time.time()

            # Get an image from the queue
            # for i in range(1, num_frames):
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors

            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    templates_z: np.squeeze(templates_z_),
                    # filename: frame_name_list[i],
                    image: video,
                },
                **run_opts)

            scores_ = np.squeeze(scores_)
            # penalize change of scale penalize change of scale
            scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            x_sz = (1 - hp.scale_lr
                    ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
            target_w = (
                1 - hp.scale_lr
            ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
            target_h = (
                1 - hp.scale_lr
            ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - hp.window_influence
                      ) * score_ + hp.window_influence * penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_,
                                                   final_score_sz,
                                                   design.tot_stride,
                                                   design.search_sz,
                                                   hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            # bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            current_boxes = [
                pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            ]
            # bboxes.append(current_boxes)
            # update the target representation with a rolling average
            print(time.time())
            if hp.z_lr > 0:
                new_templates_z_ = sess.run(
                    [templates_z],
                    feed_dict={
                        siam.pos_x_ph: pos_x,
                        siam.pos_y_ph: pos_y,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })

                templates_z_ = (1 - hp.z_lr) * np.asarray(
                    templates_z_) + hp.z_lr * np.asarray(new_templates_z_)
            print(time.time())
            # update template patch size
            z_sz = (1 - hp.scale_lr
                    ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]

            if run.visualization:
                # show_frame(image_, bboxes[i,:], 1)
                show_frame(video, current_boxes, 1)

        # t_elapsed = time.time() - t_start
        # speed = frame_idx/t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads)

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return
Exemple #11
0
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame):
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames,4))

    # scale_step为缩放比例,根据设置的尺度数目求出一个缩放系数的列表
    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
    # cosine window to penalize large displacements
    # 定义一个惩罚函数为海明函数,这个函数的特点是中心数值大,四周数值小
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    # context是根据box画出一个包含目标物体与部分背景的正方形,xsz与zsz的比例按照设定的比例来
    context = design.context*(target_w+target_h)
    z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }

    run_opts = {}
    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        
        # save first frame position (from ground-truth)
        bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h                

        # 开始运行神经网络来Z的提取特征值
        image_, templates_z_ = sess.run([image, templates_z], feed_dict={
                                                                        siam.pos_x_ph: pos_x,
                                                                        siam.pos_y_ph: pos_y,
                                                                        siam.z_sz_ph: z_sz,
                                                                        filename: frame_name_list[0]})
        new_templates_z_ = templates_z_

        t_start = time.time()

        # Get an image from the queue
        progress = progressbar.ProgressBar(widgets=[progressbar.Bar('=', '[', ']'),
                                                    ' ', progressbar.Percentage(), ' ', progressbar.ETA()])
        for i in progress(range(1, num_frames)):
            # 得到不同尺度的z,x,目标box的宽与高,w,h的初始值为第一帧的box
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors

            # 运行卷积部分,得到scores
            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    # siam.x_sz0_ph: scaled_search_area[0],
                    # siam.x_sz1_ph: scaled_search_area[1],
                    # siam.x_sz2_ph: scaled_search_area[2],
                    siam.x_sz_ph: scaled_search_area,
                    templates_z: np.squeeze(templates_z_),
                    filename: frame_name_list[i],
                }, **run_opts)
            scores_ = np.squeeze(scores_)

            # penalize change of scale
            scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:]
            scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:]

            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1,2)))

            # update scaled sizes
            # scaled_search_area[new_scale_id]代表分数最高的点哪种尺度上,但是不能完全取这个尺度,依然要依靠前一帧的x_sz进行加权
            # 同理,对于W,H来说也是要有加权的,scale-lr越大,上一步的尺度影响程度越小
            x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id]        
            target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id]
            target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id]

            # select response with new_scale_id
            # 得到最优的那个尺度的分数分布,然后对分数做归一化
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_/np.sum(score_)

            # apply displacement penalty
            # 这一步根据位置进一步优化score,根据penalty越接近中心分数加成越高
            # 然后根据分数得到最终确定的box的位置
            score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz)

            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            # update the target representation with a rolling average
            # 将box用于这一帧,通过神经网络得到一个新的z,同样最终的z要与上一帧的z做加权和,用z-lr来表示学习率
            if hp.z_lr > 0:
                new_templates_z_ = sess.run([templates_z], feed_dict={
                                                                siam.pos_x_ph: pos_x,
                                                                siam.pos_y_ph: pos_y,
                                                                siam.z_sz_ph: z_sz,
                                                                image: image_
                                                                })

                templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_)
            
            # update template patch size
            z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id]
            
            if run.visualization:
                show_frame(image_, bboxes[i,:], 1)        

        t_elapsed = time.time() - t_start
        speed = num_frames/t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads) 

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed
def track_one_sequence(hp,
                       design,
                       frame_name_list,
                       pos_x,
                       pos_y,
                       target_w,
                       target_h,
                       final_score_sz,
                       filename,
                       image,
                       templates_x,
                       templates_z,
                       scores_list,
                       vid_name,
                       dataset_type,
                       sess,
                       visualize_results,
                       save_images,
                       save_bboxes,
                       vot_handle,
                       gt=None):
    """ Handles tracking for one whole sequence. Inputs are fed to the network
    and the results are collected and can be shown on the screen and saved to
    the disk.

    Args:
      hp: namespace: hyperparameters.
      design: namespace: design parameters.
      frame_name_list: string list: list of sorted image paths to be read.
      pos_x: int: horizontal center of the target.
      pos_y: int: vertical center of the target.
      target_w: int: target width.
      target_h: int: target height.
      final_score_sz: int: size of the score map after upsampling.
      filename: string tensor: placeholder for the image path to be read.
      image: 3D tensor: the image read from the path.
      templates_x: 4D tensor: instance features from one or more layers
        concatenated by channels. See siam_mcf_net.inference comments for more
        details.
      templates_z: 4D tensor: exemplar features from one or more layers
        concatenated by channels. See siam_mcf_net.inference comments for more
        details.
      scores_list: 5D tensor: batch of score heatmaps for each of the selected
        layers.
      vid_name: string: name of this sequence (only for saving purposes).
      dataset_type: string: name of this dataset (only for saving purposes).
      sess: an open tf.Session to execute the graph.
      visualize_results: boolean: whether to show the results on the screen.
      save_images: boolean: whether to save image results to the disk.
      save_bboxes: boolean: whether to save bounding boxes to the disk.
      vot_handle: vot handle for running the VOT toolkit.
      gt: Nx4 array: optional ground truth bounding boxes (only for
        visualization purposes).

    Returns:
      Nx4 array: the resulting bounding boxes from the tracking.
      float: the tracking speed in frames per second.
    """
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames, 4))

    if save_images:
        res_dir = 'results/%s/frames/%s' % (
            dataset_type, vid_name)
        if not os.path.exists(res_dir):
            os.makedirs(res_dir)

    if save_bboxes:
        bb_res_dir = 'results/%s/bboxes' % (dataset_type)
        if not os.path.exists(bb_res_dir):
            os.makedirs(bb_res_dir)

    # save first frame position (from ground-truth)
    bboxes[0, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h

    if vot_handle is not None:
        frame_path = vot_handle.frame()
    else:
        frame_path = frame_name_list[0]

    tracker = SiamMcfTracker(
        design.context, design.exemplar_sz, design.search_sz, hp.scale_step,
        hp.scale_num, hp.scale_penalty, hp.scale_lr, hp.window_influence,
        design.tot_stride, hp.response_up, final_score_sz, pos_x, pos_y,
        target_w, target_h, frame_path, sess, templates_z, filename)

    t_start = time.time()

    # Get an image from the queue
    for i in range(1, num_frames):
        if vot_handle is not None:
            frame_path = vot_handle.frame()
        else:
            frame_path = frame_name_list[i]

        if save_images or visualize_results:
            image_ = sess.run(image, feed_dict={filename: frame_path})

        bbox = tracker.track(
            frame_path, sess, templates_z, templates_x, scores_list, filename)

        # convert <cx,cy,w,h> to <x,y,w,h> and save output
        bboxes[i, :] = bbox

        if vot_handle is not None:
            vot_rect = vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
            vot_handle.report(vot_rect)

        if visualize_results:
            show_frame(image_, bboxes[i, :], 1)

        if save_images:
            out_img = Image.fromarray(image_.copy().astype(np.uint8))
            out_draw = ImageDraw.Draw(out_img)

            if gt is not None:
                gt_rect = np.array(region_to_bbox(gt[i, :], False)).astype(
                    np.int32)
                gt_rect[2:] = gt_rect[:2] + gt_rect[2:]

            rect = bboxes[i].copy()
            rect[2:] = rect[:2] + rect[2:]
            rect = rect.astype(np.int32)

            pillow_version = [int(x) for x in PIL.__version__.split('.')]
            if (pillow_version[0] > 5 or
                    (pillow_version[0] == 5 and pillow_version[1] >= 3)):
                if gt is not None:
                    out_draw.rectangle(
                        [tuple(gt_rect[:2]), tuple(gt_rect[2:])],
                        outline=(0, 0, 255),
                        width=2)
                out_draw.rectangle(
                    [tuple(rect[:2]), tuple(rect[2:])],
                    outline=(255, 0, 0),
                    width=3)
            else:
                if gt is not None:
                    out_draw.rectangle(
                        [tuple(gt_rect[:2]), tuple(gt_rect[2:])],
                        outline=(0, 0, 255))
                out_draw.rectangle(
                    [tuple(rect[:2]), tuple(rect[2:])],
                    outline=(255, 0, 0))

            out_img.save(os.path.join(res_dir, '%05d.jpg' % (i + 1)))

    t_elapsed = time.time() - t_start
    speed = num_frames/t_elapsed

    if save_bboxes:
        with open(os.path.join(bb_res_dir, vid_name+'.txt'), 'w') as f:
            for bb in bboxes:
                f.write('%.02f,%.02f,%.02f,%.02f\n' % tuple(bb))

    return bboxes, speed
Exemple #13
0
def tracker(hp, run, design, frame_name_list, objects, final_score_sz,
            filename, image, templates_z, scores, start_frame):
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = [np.zeros((len(objects), 4)) for i in range(0, num_frames)]

    # save first frame position (from ground-truth)
    for i in range(len(objects)):
        pos_x = objects[i][0]
        pos_y = objects[i][1]
        target_w = objects[i][2]
        target_h = objects[i][3]
        #bboxes[0][i] = objects[i]
        bboxes[0][
            i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)
    """    
    # I don't see this values in any part of the code, so I assume it's safe to comment them
    # thresholds to saturate patches shrinking/growing    
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz
    """

    # This variables use the box data, so they should have different values per object
    # Object Box information
    pos_x = [0] * len(objects)
    pos_y = [0] * len(objects)
    target_w = [0] * len(objects)
    target_h = [0] * len(objects)
    # Other variables
    context = [0] * len(objects)
    z_sz = [0] * len(objects)
    x_sz = [0] * len(objects)
    for o in range(len(objects)):
        pos_x[o] = objects[o][0]
        pos_y[o] = objects[o][1]
        target_w[o] = objects[o][2]
        target_h[o] = objects[o][3]
        context[o] = design.context * (target_w[o] + target_h[o])
        z_sz[o] = np.sqrt(
            np.prod((target_w[o] + context[o]) * (target_h[o] + context[o])))
        x_sz[o] = float(design.search_sz) / design.exemplar_sz * z_sz[o]

    # run_metadata = tf.RunMetadata()
    # run_opts = {
    #     'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
    #     'run_metadata': run_metadata,
    # }
    run_opts = {}

    # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        #bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h

        scores_ = [0] * len(objects)
        templates_z_ = [0] * len(objects)
        for o in range(len(objects)):
            #print ('Box {} template! x: {}, y: {}, z_sz: {}'.format(o, pos_x[o], pos_y[o], z_sz[o]) )
            image_, templates_z_[o] = sess.run(
                [image, templates_z],
                feed_dict={
                    #image_, templates_z_res = sess.run([image, templates_z], feed_dict={
                    siam.pos_x_ph:
                    pos_x[o],
                    siam.pos_y_ph:
                    pos_y[o],
                    siam.z_sz_ph:
                    z_sz[o],
                    filename:
                    frame_name_list[0]
                })
            #templates_z_[o] = templates_z_res

        t_start = time.time()

        # Get an image from the queue
        for i in range(1, num_frames):
            for o in range(len(objects)):
                scaled_exemplar = z_sz[o] * scale_factors
                scaled_search_area = x_sz[o] * scale_factors
                scaled_target_w = target_w[o] * scale_factors
                scaled_target_h = target_h[o] * scale_factors

                image_, scores_ = sess.run(
                    [image, scores],
                    feed_dict={
                        siam.pos_x_ph: pos_x[o],
                        siam.pos_y_ph: pos_y[o],
                        siam.x_sz0_ph: scaled_search_area[0],
                        siam.x_sz1_ph: scaled_search_area[1],
                        siam.x_sz2_ph: scaled_search_area[2],
                        templates_z: np.squeeze(templates_z_[o]),
                        filename: frame_name_list[i],
                    },
                    **run_opts)
                scores_ = np.squeeze(scores_)
                # penalize change of scale
                scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
                scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
                # find scale with highest peak (after penalty)
                new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
                # update scaled sizes
                x_sz[o] = (1 - hp.scale_lr) * x_sz[
                    o] + hp.scale_lr * scaled_search_area[new_scale_id]
                target_w[o] = (1 - hp.scale_lr) * target_w[
                    o] + hp.scale_lr * scaled_target_w[new_scale_id]
                target_h[o] = (1 - hp.scale_lr) * target_h[
                    o] + hp.scale_lr * scaled_target_h[new_scale_id]
                # select response with new_scale_id
                score_ = scores_[new_scale_id, :, :]
                score_ = score_ - np.min(score_)
                score_ = score_ / np.sum(score_)
                # apply displacement penalty
                score_ = (1 - hp.window_influence
                          ) * score_ + hp.window_influence * penalty
                pos_x[o], pos_y[o] = _update_target_position(
                    pos_x[o], pos_y[o], score_, final_score_sz,
                    design.tot_stride, design.search_sz, hp.response_up,
                    x_sz[o])
                # convert <cx,cy,w,h> to <x,y,w,h> and save output
                #bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
                bboxes[i][o, :] = pos_x[o] - target_w[o] / 2, pos_y[
                    o] - target_h[o] / 2, target_w[o], target_h[o]
                if hp.z_lr > 0:
                    new_templates_z_ = sess.run(
                        [templates_z],
                        feed_dict={
                            siam.pos_x_ph: pos_x[o],
                            siam.pos_y_ph: pos_y[o],
                            siam.z_sz_ph: z_sz[o],
                            image: image_
                        })
                    templates_z_[o] = (1 - hp.z_lr) * np.asarray(templates_z_[
                        o]) + hp.z_lr * np.asarray(new_templates_z_)

                # update template patch size
                z_sz[o] = (1 - hp.scale_lr) * z_sz[
                    o] + hp.scale_lr * scaled_exemplar[new_scale_id]

            if run.visualization:
                show_frame(image_, bboxes[i], 1)

        t_elapsed = time.time() - t_start
        speed = num_frames / t_elapsed

        # Finish off the filename queue coordinator.
        coord.request_stop()
        coord.join(threads)

        # from tensorflow.python.client import timeline
        # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
        # trace_file = open('timeline-search.ctf.json', 'w')
        # trace_file.write(trace.generate_chrome_trace_format())

    plt.close('all')

    return bboxes, speed