Example #1
0
def main(im, bbox):
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)
    
     
    
    with Image.fromarray(im) as img:
        frame_sz = np.asarray(img.size)
        frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1]
    
    
    im = Image.fromarray(im)   
    
    torch.save(siam.state_dict(), '/home/nvidia/jlaplaza/siamfc_pytorch_test/siamfc_pretrained.pt')
        
    #gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
    pos_x, pos_y, target_w, target_h = region_to_bbox(bbox)
    
    print(target_w, target_h)
    # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
    #                         filename, image, templates_z, scores, evaluation.start_frame)
    
        
        
        
    tracker(hp, run, design, im, pos_x, pos_y, target_w, target_h, final_score_sz,
            siam, evaluation.start_frame)
    
    
        
    """    
Example #2
0
def evaluate():
	# avoid printing TF debugging information
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	# TODO: allow parameters from command line or leave everything in json files?
	hp, evaluation, run, env, design = parse_arguments()
	# Set size for use with tf.image.resize_images with align_corners=True.
	# For example,
	#   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
	# instead of
	# [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
	final_score_sz = hp.response_up * (design.score_sz - 1) + 1
	# build TF graph once for all
	gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
	
	frame_sz = [i for i in cv2.imread(frame_name_list[0]).shape]
	
	siamNet = siam.Siamese(batch_size = 1);
	image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train(final_score_sz, design, env, hp, frame_sz)


	
	pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])
	bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
		                    image, templates_z, scores, evaluation.start_frame,  path_ckpt = os.path.join(design.saver_folder, design.path_ckpt), siamNet = siamNet)
	_, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold)
	
	print(evaluation.video + \
		  ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
		  ' -- Precision AUC: ' + "%.2f" % precision_auc + \
		  ' -- IOU: ' + "%.2f" % iou + \
		  ' -- Speed: ' + "%.2f" % speed + ' --')
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1

    # build the computational graph of Siamese fully-convolutional network
    siamNet = siam.Siamese(design.batch_size)
    # get tensors that will be used during training
    image, z_crops, x_crops, templates_z, scores, loss, train_step, distance_to_gt, summary = siamNet.build_tracking_graph_train(
        final_score_sz, design, env, hp)

    # read tfrecodfile holding all the training data
    data_reader = src.read_training_dataset.myReader(design.resize_width,
                                                     design.resize_height,
                                                     design.channel)
    batched_data = data_reader.read_tfrecord(os.path.join(
        env.tfrecord_path, env.tfrecord_filename),
                                             num_epochs=design.num_epochs,
                                             batch_size=design.batch_size)

    # run trainer
    trainer(hp, run, design, final_score_sz, batched_data, image, templates_z,
            scores, loss, train_step, distance_to_gt, z_crops, x_crops,
            siamNet, summary)
Example #4
0
def run_SiamFCpytorch(seq, rp, bSaveImage):
    hp, evaluation, run, env, design = parse_arguments()
    #final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    final_score_sz = 265
    siam = SiameseNet(env.root_pretrained, design.net)
    load_net(NET_PATH, siam)
    siam.cuda()

    frame_name_list = seq.s_frames
    init_rect = seq.init_rect
    x, y, width, height = init_rect  # OTB format

    init_bb = Rectangle(x - 1, y - 1, float(width), float(height))
    init_bb = convert_bbox_format(init_bb, 'center-based')

    bboxes, speed = tracker(hp, run, design, frame_name_list, init_bb.x,
                            init_bb.y, init_bb.width, init_bb.height,
                            final_score_sz, siam, evaluation.start_frame)

    trajectory = [
        Rectangle(val[0] + 1, val[1] + 1, val[2], val[3]) for val in bboxes
    ]
    result = dict()
    result['res'] = trajectory
    result['type'] = 'rect'
    result['fps'] = speed
    return result
Example #5
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    #filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

    siamNet = siam.Siamese(design.batch_size)
    image, z_crops, x_crops, templates_z, scores, loss, train_step, distance_to_gt, summary, tz, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train(
        final_score_sz, design, env, hp)

    batched_data = read_tfrecord(os.path.join(env.tfrecord_path,
                                              env.tfrecord_filename),
                                 num_epochs=design.num_epochs,
                                 batch_size=design.batch_size)

    trainer(hp, run, design, final_score_sz, image, templates_z, scores, loss,
            train_step, distance_to_gt, batched_data, z_crops, x_crops,
            siamNet, summary, tz, max_pos_x, max_pos_y)
Example #6
0
    def __init__(self):
        super(SiamMCF, self).__init__("SiamMCF")
        root_dir = path_config.SIAMMCF_ROOT_DIR
        self.hp, self.evaluation, self.env, self.design = parse_arguments(root_dir)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1
        # build TF graph once for all
        (
            self.filename,
            self.image,
            self.templates_x,
            self.templates_z,
            self.scores_list,
        ) = siam.build_tracking_graph(
            root_dir, self.final_score_sz, self.design, self.env, self.hp
        )
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        tf.global_variables_initializer().run(session=self.sess)
        vars_to_load = []
        for v in tf.global_variables():
            if "postnorm" not in v.name:
                vars_to_load.append(v)

        siam_ckpt_name = path_config.SIAMMCF_MODEL
        siam_saver = tf.train.Saver(vars_to_load)
        siam_saver.restore(self.sess, siam_ckpt_name)
Example #7
0
    def __init__(self, imagefile, region):
        #param
        self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments(
        )

        self.final_score_sz = 273

        #init network
        self.siam = SiameseNet(self.env.root_pretrained, self.design.net)

        NET_PATH = '/home/lee/tracking/challenge/vot-toolkit/tracker/examples/python/pretrained/000100vggv1net1-5.weights'
        load_net(NET_PATH, self.siam)
        self.siam.cuda()

        #init bbox
        bbox = convert_bbox_format(region, 'center-based')

        self.pos_x, self.pos_y, self.target_w, self.target_h = bbox.x, bbox.y, bbox.width, bbox.height

        #init scale factor, penalty
        self.scale_factors = self.hp.scale_step**np.linspace(
            -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2),
            self.hp.scale_num)
        hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0)
        self.penalty = np.transpose(hann_1d) * hann_1d
        self.penalty = self.penalty / np.sum(self.penalty)

        context = self.design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(
            np.prod((self.target_w + context) * (self.target_h + context)))
        self.x_sz = float(
            self.design.search_sz) / self.design.exemplar_sz * self.z_sz

        image_, self.templates_z_ = self.siam.get_template_z_new(
            self.pos_x, self.pos_y, self.z_sz, imagefile, self.design)
Example #8
0
    def __init__(self):
        hp, evaluation, run, env, design = parse_arguments()
        final_score_sz = hp.response_up * (design.score_sz - 1) + 1

        self.image_input = tf.placeholder(tf.float32, name='img_in', shape=(360, 640, 3))

        self.pos_x_ph = tf.placeholder(tf.float64, name='pos_x_ph', shape=(1,))
        self.pos_y_ph = tf.placeholder(tf.float64, name='pos_y_ph', shape=(1,))
        # target的尺寸 size
        self.z_sz_ph = tf.placeholder(tf.float64, name='z_sz_ph', shape=(1,))
        # 对search input 进行三种系数的缩放后的输入结果
        #   将search input进行不同大小的缩放,满足当target的scale出现变化时,
        #   tracker也能保证sampler和search input中的target大小尽可能相似
        self.x_sz0_ph = tf.placeholder(tf.float64, name='x_sz0_ph', shape=(1,))
        self.x_sz1_ph = tf.placeholder(tf.float64, name='x_sz1_ph', shape=(1,))
        self.x_sz2_ph = tf.placeholder(tf.float64, name='x_sz2_ph', shape=(1,))

        # self.pos_x_ph = tf.placeholder(tf.float64, name='pos_x_ph', )
        # self.pos_y_ph = tf.placeholder(tf.float64, name='pos_y_ph', )
        # self.z_sz_ph = tf.placeholder(tf.float64, name='z_sz_ph', )
        # self.x_sz0_ph = tf.placeholder(tf.float64, name='x_sz0_ph', )
        # self.x_sz1_ph = tf.placeholder(tf.float64, name='x_sz1_ph', )
        # self.x_sz2_ph = tf.placeholder(tf.float64, name='x_sz2_ph', )
        
        self.template_x, self.templates_z, self.scores, \
        self.crop_x, self.crop_z, \
        self.padded_x, self.padded_z = _build_tracking_graph(self.image_input, final_score_sz, design, env,
                                                             self.pos_x_ph, self.pos_y_ph, self.z_sz_ph,
                                                             self.x_sz0_ph, self.x_sz1_ph, self.x_sz2_ph)
        
        self.scale_factors = hp.scale_step ** np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2),
                                                          hp.scale_num)
        self.scale_factors = np.expand_dims(self.scale_factors, axis=-1)
        self.final_score_sz = hp.response_up * (design.score_sz - 1) + 1
        
        self.template_data = None
        """
        region 形式:
            中心点坐标 + 目标的长宽
        """
        self.last_pos_x = None
        self.last_pos_y = None
        self.target_w = 160.
        self.target_h = 160.
        
        # cosine window to penalize large displacements
        self.hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
        penalty = np.transpose(self.hann_1d) * self.hann_1d
        self.penalty = penalty / np.sum(penalty)
        
        self.context = design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(np.prod((self.target_w + self.context) * (self.target_h + self.context)))
        self.x_sz = float(design.search_sz) / design.exemplar_sz * self.z_sz
        
        self.hp = hp
        self.design = design
Example #9
0
    def ColdInit(self, imagepath, region):
        #Parse the arguments
        self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments(
            mode='siamese')

        #Get first frame image and ground-truth
        self.region = region
        self.pos_x = region.x + region.width / 2
        self.pos_y = region.y + region.height / 2
        self.target_w = region.width
        self.target_h = region.height
        self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h

        #Calculate the size of final score (upscaled size of score matrix, where score matrix
        # is convolution of results of two branches of siamese network)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz -
                                                     1) + 1

        #Initialize the COLOR network and load the weights
        self.color_params = self.InitColorNetwork()

        #Initialize the SIAMESE network and load the weights
        self.siam_params = self.InitSiamNetwork()

        #Calculate the scale factors
        self.scale_factors = self.hp.scale_step**np.linspace(
            -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2),
            self.hp.scale_num)

        # cosine window to penalize large displacements
        hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0)
        penalty = np.transpose(hann_1d) * hann_1d
        self.penalty = penalty / np.sum(penalty)

        #Calculate search and target patch sizes
        context = self.design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(
            np.prod((self.target_w + context) * (self.target_h + context)))
        self.x_sz = float(
            self.design.search_sz) / self.design.exemplar_sz * self.z_sz

        #Load the colorization model
        self.LoadColorModel()

        #Extract Siam template
        image_, templates_z_ = self.ExtractSiamTemplate(imagepath)
        self.siam_ret = {"image_": image_, "templates_z_": templates_z_}

        #Extract Color template
        templates_z_, z_crops_ = self.ExtractColorTemplate(imagepath)
        self.color_ret = {"templates_z_": templates_z_, "z_crops_": z_crops_}

        return
Example #10
0
def main():
    # Avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    # --- Parse arguments from JSON file ---
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # --- Start Streaming from Video ---
    cap = cv2.VideoCapture(env.root_sequences + '/' + sys.argv[1] + '.mp4')
    ret, frame = cap.read()
    if (not ret):
        print "Error opening video sequence"

    # --- Save Video (Optional) ---
    if run.save_video:
        vid_write = cv2.VideoWriter(
            env.root_sequences + '/' + sys.argv[1] + '_out.avi',
            cv2.VideoWriter_fourcc(*'MJPG'), 25,
            (frame.shape[1], frame.shape[0]), True)

    # --- Define Initial Bounding Box ---
    BB = click_and_crop(frame, design.window_name)

    cv2.namedWindow(design.window_name)
    cv2.startWindowThread()
    cv2.setMouseCallback(design.window_name, BB.callback)

    cv2.imshow(design.window_name, frame)
    cv2.waitKey(0)

    while True:
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # ----- Define Initial Bounding Box Params & Template -----
    pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2)  # Template Center
    pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2)  # Template Center
    target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0]))  # Template Width / 2
    target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1]))  # Template Height / 2

    # ----- Beging Tracking -----
    tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz,
            templates_z, scores, cap, vid_write, frame)

    cap.release()
    cv2.destroyAllWindows()

    if run.save_video:
        vid_write.release()
def main():
    #avoid printing TF debugging information(only show error log)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    hp, evaluation, run, env, design = parse_arguments()
    #build TF graph in siamese once for all
    #siam.init_create_net()
    filename, siam_net_z, loss, train_op = siam.make_siameseFC(env, design, hp)

    #iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        #the path of folder of all videos
        train_data_folder = os.path.join(env.root_train_dataset,
                                         evaluation.dataset)
        videos_list = [v for v in os.listdir(train_data_folder)]
        videos_list.sort()
        num_v = len(videos_list)
        for i in range(num_v):
            gt, frame_name_list, frame_sz, n_frames = _init_train_video(
                env, evaluation, videos_list[i])
            start_frame = evaluation.start_frame
            #not sure
            #gt_=gt[start_frame:,:]
            gt_ = gt[start_frame:]
            frame_name_list_ = frame_name_list[start_frame:]
            num_frames = np.size(frame_name_list_)

            for j in range(num_frames - 1):
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[j])
                #train_siam_net(design,hp,frame_name_list,z_index,pos_x,pos_y,target_w,target_h,filename,siam_net_z,loss)
                train_siam_net(design, hp, frame_name_list, j, pos_x, pos_y,
                               target_w, target_h, filename, siam_net_z, loss,
                               train_op)

    else:
        gt, frame_name_list, _, _ = _init_train_video(env, evaluation,
                                                      evaluation.video)
        start_frame = evaluation.start_frame
        gt_ = gt[start_frame:]
        frame_name_list_ = frame_name_list[start_frame:]
        num_frames = np.size(frame_name_list_)

        train_siam_net(design, hp, frame_name_list, num_frames, gt, filename,
                       siam_net_z, loss, train_op)
        '''for i in range(num_frames-1):
def main(process, queue, box, video):
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)
    
    # read radio
    # width = 640
    # height = 480
    # process1 = (
    #     ffmpeg
    #     .input('tcp://192.168.1.155:8300',vcodec='h264',r = 24,probesize=32,fflags="nobuffer",flags="low_delay",analyzeduration=1)
    #     .output('pipe:', format='rawvideo',pix_fmt="rgb24")
    #     .run_async(pipe_stdout=True)
    # )
    ## model 
    # model_path = './frozen_inference_graph.pb'
    # odapi = DetectorAPI(path_to_ckpt=model_path)
    # while True :
    # in_bytes = process1.stdout.read(width * height * 3)
    # if not in_bytes :
    #     print ("none")
    # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]))
    # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)

    # read target from mat
    # box = odapi.processFrame(video)
    box[2] -= box[0]
    box[3] -= box[1]
    box[0] += box[2]/2
    box[1] += box[3]/2
    print ('box', box)
    pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3]
    tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h, final_score_sz,
                            image, templates_z, scores, process, queue)
    print ('done')
Example #13
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph_2(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
    pos_x, pos_y, target_w, target_h = region_to_bbox(
        gt[evaluation.start_frame])
    track_cam(hp, run, design, final_score_sz, image, templates_z, scores,
              evaluation.start_frame)
Example #14
0
def main():

    _, _, _, env, design = parse_arguments()

    gpu_options = tf.GPUOptions(allow_growth=True,
                                per_process_gpu_memory_fraction=0.3)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)

    cam = cv2.VideoCapture('/home/yoonyoungcho/ext/frame%04d.jpg')
    for i in range(10):
        ret, image_ = cam.read()

    bbox_ = init_bbox(image_)

    tracker = SiamFCTracker(env, design)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        tracker.initialize(sess, image_, bbox_)
        while True:
            ret, image_ = cam.read()
            if not ret:
                break

            start = time.time()
            bbox_ = tracker.update(sess, image_)
            fps = 1.0 / (time.time() - start)

            if bbox_ is None:
                bbox_ = init_bbox(image_)
                tracker.initialize(sess, image_, bbox_)
            else:
                x, y, w, h = map(int, bbox_)
                cv2.rectangle(image_, (x, y), (x + w, y + h), (255, 0, 0), 2)
                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(image_, 'fps:%d' % fps, (0, 20), font, 0.5,
                            (0, 0, 255), 1)
                cv2.imshow('image', image_)
            if cv2.waitKey(10) == 27:
                break
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    #由于得到的sorcemap与原图像大小不一致,所以要在这里按比例进行放大以此来得到图像中每个位置对应的score
    #这里的final也就是design中的search_sz
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env, hp)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            if os.path.exists(os.path.join('data/result', videos_list[i])):
                continue
            gt, frame_name_list, frame_sz, n_frames, video_folder, equal = _init_video(
                env, evaluation, videos_list[i])
            if not equal:
                print('The .jpg and .xml is not equal in', video_folder)
                continue
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            # for j in range(evaluation.n_subseq):
            for j in range(1):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:]
                frame_name_list_ = frame_name_list[start_frame:]
                # pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                pos_x, pos_y, target_w, target_h = xml_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] =\
                    _compile_results(gt_, bboxes, evaluation.dist_threshold)

                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

                # Draw_Result(bboxes, frame_name_list_, gt_, env)

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

    else:
        gt, frame_name_list, _, _, video_folder, equal = _init_video(
            env, evaluation, evaluation.video)
        if not equal:
            print("The .jpg and .xml is not equal in", video_folder)
            exit(0)
        pos_x, pos_y, target_w, target_h = xml_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #16
0
def runSiamfc(folderPath, fourcc, testWidth, testHeight, graph, scfg):
    print('Running Siamfc: ' + folderPath)
    frame_name_list = _init_video(folderPath)
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    fp = open(os.path.join(folderPath, 'YoloBoxes.txt'))
    filename, image, templates_z, scores, graph1, scfg1 = siam.build_tracking_graph(
        final_score_sz, design, env)
    finalImages = []
    Allbboxes = []
    SiamfcVid = cv2.VideoWriter(join(folderPath, 'SiamfcVid.avi'), fourcc, 10,
                                (testWidth, testHeight))
    f = open(folderPath + "/SiamfcBoxes.txt", "w+")
    nucAngles = open(os.path.join(folderPath, "nuclearAngles.txt"), "r")
    print(refreshRate)
    for i in range(len(frame_name_list)):
        line = fp.readline()
        finalImages = []
        if (line == '\n'):
            continue
        elif (i % refreshRate == 0):
            boxes = line[:-1].split(':')
            boxNr = 0
            for j in Allbboxes:
                #label = j.label
                #pos_x = j.positions[len(j.positions)-1][0]
                #pos_y = j.positions[len(j.positions)-1][1]
                #target_w = j.positions[len(j.positions)-1][2]
                #target_h = j.positions[len(j.positions)-1][3]
                #bboxes, speed, finalImages = tracker(graph1, scfg1, hp, run, design, frame_name_list[i:i+refreshRate-1], pos_x, pos_y, target_w, target_h, final_score_sz,
                #                                    filename, image, templates_z, scores, label,0,colors[boxNr%len(colors)],0,refreshRate-1, finalImages, 0)
                #j.positions = np.concatenate((j.positions,bboxes),0)
                j.padafter(refreshRate)
            for j in boxes:
                box = j.split(',')
                label = box[0]
                box = map(int, box[1:])
                print(
                    'In folder %s Image %d has a box at %d,%d,%d,%d with label %s'
                    % (folderPath, i, box[0], box[1], box[2], box[3], label))
                pos_x = box[0]
                pos_y = box[1]
                target_w = box[2]
                target_h = box[3]
                print('Pos_x: %d, Pos_y:%d, width:%d, height:%d' %
                      (pos_x, pos_y, target_w, target_h))
                bboxes, speed, finalImages = tracker(
                    graph1, scfg1, hp, run, design,
                    frame_name_list[i:i + refreshRate - 1], pos_x, pos_y,
                    target_w, target_h, final_score_sz, filename, image,
                    templates_z, scores, label, 0, colors[boxNr % len(colors)],
                    0, refreshRate - 1, finalImages, 0)
                newBox = sfc_bbox(colors[boxNr % len(colors)], label, bboxes,
                                  0)
                newBox.padfront(i)
                Allbboxes.append(newBox)
                boxNr = boxNr + 1
                print(bboxes)
            fname = i
            probs = [0] * len(Allbboxes)
            #print(Allbboxes)
            if (liveFeed):
                try:
                    oldFolderPath = folderPath.split('_')
                    oldFolderPath[2] = str(int(oldFolderPath[2]) - 1)
                    underscore = '_'
                    oldFolderPath = underscore.join(oldFolderPath)
                except:
                    print("Could not load old probabilies")
                    oldFolderPath = ""
            else:
                oldFolderPath = ""
            if (os.path.isfile(oldFolderPath + "/SiamfcBoxes.txt")):
                print('Extracting old probs')
                probs = getOldProbs(oldFolderPath, Allbboxes,
                                    [(item.split(','))[0] for item in boxes])
                print(probs)
            for j in range(len(finalImages)):
                #print(probs)
                angle = int(nucAngles.readline())
                calcProbs(finalImages[j], angle, Allbboxes, i + j, f)
                cv2.circle(finalImages[j],
                           (int(float(1.0 - angle / 180.0) * testWidth),
                            int(testHeight / 2)), 10, (0, 0, 225), -1)
                SiamfcVid.write(finalImages[j])
                cv2.imwrite(frame_name_list[j][0:-4] + '_siamfc.png',
                            finalImages[j])
                fname = fname + 1
            #break
    f.close()
    SiamfcVid.release()
    return
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i])
            starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                                                                     target_w, target_h, final_score_sz, filename,
                                                                     image, templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold)
                print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --')
        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --')

    else:
        images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) 
      
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
                                filename, image, templates_z, scores, evaluation.start_frame)        
                            
        num_frames = np.size(frame_name_list)
        bboxes_final = np.zeros((num_frames,4))

        lk_params = dict( winSize  = (5,5),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03))

        bboxes_final = np.zeros((num_frames, 4))
        for i in range(1,len(images_arr)-1):
            # Create some random colors
            color = np.random.randint(0,255,(100,3))

            # Take first frame and find corners in it
            #ret, old_frame = cap.read()
            frame = images_arr[i+1]
            old_frame = images_arr[i]
            old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
            p0 = np.zeros((1,1,2), dtype=np.float32)
            bbox_i = bboxes[i]
            c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3])
            p0[0,0,0] = c
            p0[0,0,1] = r
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
            good_new = p1[st==1]
            bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h
            good_old = p0[st==1]   
            for i,(new,old) in enumerate(zip(good_new,good_old)):
                a,b = new.ravel()
                c,d = old.ravel()
            fig = plt.figure(1)
            ax = fig.add_subplot(111)
            r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") 
            r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False)
            ax.imshow(np.uint8(frame))
            ax.add_patch(r2)
            ax.add_patch(r1)
            plt.ion()
            plt.show()
            plt.pause(0.001)
            plt.clf()
            old_gray = frame_gray.copy()
            p0 = good_new.reshape(-1,1,2)
        _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold)
        print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
Example #18
0
    def __init__(self, image_path, region):
        #Parse the arguments
        self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments(
            mode='siamese')

        #Get first frame image and ground-truth
        self.region = region
        self.pos_x = region.x + region.width / 2
        self.pos_y = region.y + region.height / 2
        self.target_w = region.width
        self.target_h = region.height
        self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h

        #Calculate the size of final score (upscaled size of score matrix, where score matrix
        # is convolution of results of two branches of siamese network)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz -
                                                     1) + 1

        #Initialize the network and load the weights
        self.filename, self.image, self.templates_z, \
        self.templates_x, self.scores, self.scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env)

        #Calculate the scale factors
        self.scale_factors = self.hp.scale_step**np.linspace(
            -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2),
            self.hp.scale_num)

        # cosine window to penalize large displacements
        hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0)
        penalty = np.transpose(hann_1d) * hann_1d
        self.penalty = penalty / np.sum(penalty)

        #Calculate search and target patch sizes
        context = self.design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(
            np.prod((self.target_w + context) * (self.target_h + context)))
        self.x_sz = float(
            self.design.search_sz) / self.design.exemplar_sz * self.z_sz

        #Create a tensorflow session
        config = tf.ConfigProto()
        config.gpu_options.visible_device_list = "1"
        config.gpu_options.per_process_gpu_memory_fraction = 0.9
        self.sess = tf.Session(config=config)
        with self.sess.as_default():
            tf.global_variables_initializer().run()
            # Coordinate the loading of image files.
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(coord=self.coord)

            self.run_opts = {}

            #Calculate the template for the given region
            image_, self.templates_z_ = self.sess.run(
                [self.image, self.templates_z],
                feed_dict={
                    siam.pos_x_ph: self.pos_x,
                    siam.pos_y_ph: self.pos_y,
                    siam.z_sz_ph: self.z_sz,
                    self.filename: image_path
                })

        return
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from
    # parameters/ directory.
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #20
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments(mode="conv2")
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * design.score_sz
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)
    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        # videos_list = videos_list[91:][:] #only use vot 2016
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        success_auc = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i])
            gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB(
                env, evaluation, videos_list[i])

            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx], success_auc[idx] = _compile_results(
                        gt_, bboxes, evaluation.dist_threshold)

                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.mean(lengths)
        mean_precision = np.mean(precisions)
        mean_precision_auc = np.mean(precisions_auc)
        mean_iou = np.mean(ious)
        mean_success_auc = np.mean(success_auc)
        mean_speed = np.mean(speed)

        print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % (
            hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
            hp.window_influence)
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Success AUC: ' + "%.2f" % mean_success_auc +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

        with open('log_test.txt', 'a+') as f:
            f.write(time.asctime(time.localtime(time.time())) + '\r\n')
            f.write(
                'data set ' + evaluation.dataset +
                ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n'
                % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
                   hp.window_influence))
            f.write('-- Overall stats (averaged per frame) on ' + str(nv) +
                    ' videos (' + str(tot_frames) + ' frames) --\r\n')
            f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \
                    ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \
                    ' -- IOU: ' + "%.2f" % mean_iou + \
                    ' -- AUC: ' + "%.3f" % mean_success_auc + \
                    ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n')
            f.write('\r\n')

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #21
0
def main():

    #Command Line Arguments
    parser = argparse.ArgumentParser(description="Run WIND Project")
    parser.add_argument('-c',
                        '--clear',
                        action='store_true',
                        default=False,
                        help='Delete old camera data')
    parser.add_argument('-ny',
                        '--noYolo',
                        action='store_true',
                        default=False,
                        help='Do not use Yolo')
    parser.add_argument('-ns',
                        '--noSiamfc',
                        action='store_true',
                        default=False,
                        help='Do not use Simafc')
    parser.add_argument('-nv',
                        '--noVideo',
                        action='store_true',
                        default=False,
                        help='Do not generate video')
    parser.add_argument('-dl',
                        '--dataLocation',
                        choices=['fromFile', 'fromCamera'],
                        help='Use live camera or folder of images')
    parser.add_argument('-cn',
                        '--cameraNumber',
                        default=0,
                        help='Camera number to use')
    parser.add_argument('-ff', '--filesFolder', default="")
    parser.add_argument('-rf',
                        '--refreshRate',
                        default=10,
                        help='Refresh rate for siamfc')
    parser.add_argument('-t',
                        '--timeRecording',
                        default=3,
                        help='Seconds to record from camera')

    #Assign command line arguments to global variables
    global cameraNumber
    global doYolo
    global doSiamfc
    global genVideos
    global refreshRate
    global liveFeed
    args = parser.parse_args()
    cameraNumber = int(args.cameraNumber)
    doYolo = not args.noYolo
    doSiamfc = not args.noSiamfc
    genVideos = not args.noVideo
    refreshRate = int(args.refreshRate)
    liveFeed = args.dataLocation == 'fromCamera'
    VideoLength = int(args.timeRecording)

    #Command line argument error checking
    #Clearing out old data in the cameradata folder
    if (args.clear == True):
        for oldData in os.listdir('CameraData'):
            filep = os.path.join('CameraData', oldData)
            if (os.path.isfile(filep)):
                os.remove(filep)
                print('Deleted: ' + filep)
            elif os.path.isdir(filep):
                for reallyOldData in os.listdir(filep):
                    newfilep = os.path.join(filep, reallyOldData)
                    if (os.path.isfile(newfilep)):
                        os.remove(newfilep)
                os.rmdir(filep)
                print('Deleted: ' + filep + '/')

        print('Done Deleting')

    #Checking if file folder is valid
    if (args.dataLocation == 'fromFile'):
        if (os.path.isdir(args.filesFolder)):
            dirList = [
                os.path.join(args.filesFolder, d)
                for d in os.listdir(args.filesFolder)
                if os.path.isdir(os.path.join(args.filesFolder, d))
            ]
            dirList.sort()
            testImage = Image.open(
                os.path.join(dirList[0],
                             os.listdir(dirList[0])[0]))
            testWidth, testHeight = testImage.size
        else:
            if (args.filesFolder == ""):
                print("No folder locatoin was given")
            else:
                print(args.filesFolder + " is not a valid file location")
            return
    #Check if the camera is valid
    else:
        try:
            cam = cv2.VideoCapture(cameraNumber)
            ret, testImage = cam.read()
            testHeight, testWidth = testImage.shape[:2]
            cam.release()
        except:
            print("Camera number given is not valid or connected")
            return

    # Initialize object detector
    database.net = load_net(b"YoloConfig/yolov3-tiny.cfg",
                            b"YoloConfig/yolov3-tiny.weights", 0)
    database.meta = load_meta(b"YoloConfig/coco.data")

    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1

    # build TF graph once for all
    filename, image, templates_z, scores, graph, scfg = siam.build_tracking_graph(
        final_score_sz, design, env)
    #sFCgraph = siamfcGraph(filename, image, templates_z, scores)

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    finalImages = []
    plt.xticks([]), plt.yticks([])
    YoloVid = cv2.VideoWriter('Yolov3Vid.avi', fourcc, 10,
                              (testWidth, testHeight))
    SiamfcVid = cv2.VideoWriter('SiamfcVid.avi', fourcc, 10,
                                (testWidth, testHeight))
    i = 0
    notDone = True

    now = datetime.datetime.now()
    if (args.dataLocation == 'fromCamera'):
        haha = threading.Thread(target=getImages,
                                args=('CameraData/', 10, VideoLength, database,
                                      now, graph, scfg))
        haha.start()
        haha.join()
        frame_name_list = _init_video('CameraData/%d_%d_%d/' %
                                      (now.hour, now.minute, now.second))
        while notDone and i < VideoLength and datetime.datetime.now(
        ) < now + datetime.timedelta(seconds=20 + VideoLength) and genVideos:
            try:
                if os.path.isdir(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i))):
                    showYoloResult(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i)), YoloVid,
                        False)
                    showSiamFCResult(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i)), SiamfcVid,
                        True)
                    i = i + 1
                else:
                    time.sleep(.2)
            except (KeyboardInterrupt, SystemExit):
                notDone = False
    else:
        for dirName in dirList:
            if (doYolo):
                runYolo(dirName, database, fourcc, testWidth, testHeight,
                        graph, scfg)
                if (genVideos):
                    showYoloResult(dirName, YoloVid, False)
            if (doSiamfc and dirName == dirList[0]):
                runSiamfc(dirName, fourcc, testWidth, testHeight, graph, scfg)
                if (genVideos):
                    showSiamFCResult(dirName, SiamfcVid, True)

    # haha.join()
    YoloVid.release()
    SiamfcVid.release()
    return
                    'z_target_h':
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[z_target_h])),
                    'x_pos_x':
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[x_pos_x])),
                    'x_pos_y':
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[x_pos_y])),
                    'x_target_w':
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[x_target_w])),
                    'x_target_h':
                    tf.train.Feature(float_list=tf.train.FloatList(
                        value=[x_target_h]))
                }))
            writer.write(example.SerializeToString())

    writer.close()
    print("Writer closed.")
    print(tfrecord_name + '.tfrecords' + " is written to " + output_directory)


if __name__ == "__main__":
    hp, evaluation, run, env, design = parse_arguments()
    transform2tfrecord("shuffled_data_list.txt",
                       "training_dataset",
                       "tfrecords",
                       resize_width=design.resize_width,
                       resize_height=design.resize_height)
Example #23
0
def main_camera():
    cam = cv2.VideoCapture(0)
    if not cam.isOpened():
        exit()

    bboxes = np.zeros((10, 4))

    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph_cam(
        final_score_sz, design, env)

    ret, frame = cam.read()
    print(frame.dtype)
    roi = get_roi(frame)
    pos_x, pos_y, target_w, target_h = convert_roi(roi[0][0], roi[0][1])
    # pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])

    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    run_opts = {}

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        tf.global_variables_initializer().run()
        # Coordinate the loading of image files.
        # coord = tf.train.Coordinator()
        # threads = tf.train.start_queue_runners(coord=coord)

        # save first frame position (from ground-truth)
        bboxes[
            0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h

        # TODO: convert roi[0] to the silly siam format
        image_, templates_z_ = sess.run(
            [image, templates_z],
            feed_dict={
                siam.pos_x_ph: pos_x,
                siam.pos_y_ph: pos_y,
                siam.z_sz_ph: z_sz,
                image: frame
            })
        new_templates_z_ = templates_z_

        t_start = time.time()
        num_frames = 0

        # Get an image from the queue
        while True:
            scaled_exemplar = z_sz * scale_factors
            scaled_search_area = x_sz * scale_factors
            scaled_target_w = target_w * scale_factors
            scaled_target_h = target_h * scale_factors
            ret, frame = cam.read()
            num_frames += 1
            image_, scores_ = sess.run(
                [image, scores],
                feed_dict={
                    siam.pos_x_ph: pos_x,
                    siam.pos_y_ph: pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    templates_z: np.squeeze(templates_z_),
                    image: frame,
                },
                **run_opts)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            x_sz = (1 - hp.scale_lr
                    ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id]
            target_w = (
                1 - hp.scale_lr
            ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id]
            target_h = (
                1 - hp.scale_lr
            ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - hp.window_influence
                      ) * score_ + hp.window_influence * penalty
            pos_x, pos_y = _update_target_position(pos_x, pos_y, score_,
                                                   final_score_sz,
                                                   design.tot_stride,
                                                   design.search_sz,
                                                   hp.response_up, x_sz)
            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            out = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h
            # out = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
            # update the target representation with a rolling average

            if hp.z_lr > 0:
                new_templates_z_ = sess.run(
                    [templates_z],
                    feed_dict={
                        siam.pos_x_ph: pos_x,
                        siam.pos_y_ph: pos_y,
                        siam.z_sz_ph: z_sz,
                        image: image_
                    })

                templates_z_ = (1 - hp.z_lr) * np.asarray(
                    templates_z_) + hp.z_lr * np.asarray(new_templates_z_)

            # update template patch size
            z_sz = (1 - hp.scale_lr
                    ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id]
            key = 0
            if run.visualization:
                key = show_frame(image_, out)

            t_elapsed = time.time() - t_start
            speed = num_frames / t_elapsed
            if key == 120:
                print("Speed", speed)
                sess.close()
                cv2.destroyAllWindows()
                exit()
Example #24
0
def main():
    # Avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    # --- Parse arguments from JSON file ---
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # --- Start Streaming from Live Video ---
    stream_path = "/home/hugogermain/stream.flv"
    cap = cv2.VideoCapture(stream_path)
    start_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)  # Start at last frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 15)

    ret, frame = cap.read()
    if (not ret):
        print "Error opening video sequence"

    # --- Save Video (Optional) ---
    vid_write = cv2.VideoWriter(env.root_sequences + '/stream_out.avi',
                                cv2.VideoWriter_fourcc(*'MJPG'), 25,
                                (frame.shape[1], frame.shape[0]), True)

    # --- Initialize projection maps ---
    e2s = equirect2stereograph(-2.5, frame, 0, 0)

    # ===================================
    # --- Define Initial Bounding Box ---
    # ===================================

    BB = click_and_crop(e2s.project(frame), design.window_name)
    cv2.namedWindow(design.window_name)
    cv2.startWindowThread()
    cv2.setMouseCallback(design.window_name, BB.callback)
    cv2.imshow(design.window_name, e2s.project(frame))
    cv2.waitKey(1)

    while True:
        ret, frame = cap.read()
        cv2.waitKey(1)

        if ret:
            # --- Equirectangular to Stereographic Projection ---
            BB.img = e2s.project(frame)
            BB.refresh()
            # --- Reset to last frame to avoid cumulative lagging ---
            cap.release()
            cap = cv2.VideoCapture(stream_path)
            start_frame = cap.get(
                cv2.CAP_PROP_FRAME_COUNT)  # Start at last frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10)
            cv2.waitKey(1)

            start_frame += 1

        else:
            # --- Reached end of file, wait for new frames ---
            cap.release()
            cap = cv2.VideoCapture(stream_path)
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10)
            cv2.waitKey(1)

        # ---- Rotate Camera Viewpoint ---
        k = cv2.waitKey(33)
        if k == 119:  # w
            e2s.set_lat(e2s.lat + 10)
        if k == 115:  # s
            e2s.set_lat(e2s.lat - 10)
        if k == 100:  # d
            e2s.set_roll(e2s.roll + 10)
        if k == 97:  # a
            e2s.set_roll(e2s.roll - 10)

        # ---- Selection is done ----
        if k == 113:  # q
            break
        if BB.ready:
            break

    print("[INFO]: Bounding Box Selection: Done")

    # ----- Define Initial Bounding Box Params & Template -----
    pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2)  # Template Center
    pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2)  # Template Center
    target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0]))  # Template Width / 2
    target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1]))  # Template Height / 2

    # ===========================
    # ----- Beging Tracking -----
    # ===========================

    live_tracker(hp, run, design, pos_x, pos_y, target_w, target_h,
                 final_score_sz, templates_z, scores, cap, vid_write, frame,
                 stream_path, e2s)
    cap.release()
    cv2.destroyAllWindows()

    if run.save_video:
        vid_write.release()
def main():

    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    print("final_score_sz is:%d" % (final_score_sz))
    gt, frame_name_list, frame_sz, n_frames = _init_video(
        env, evaluation, videos_path)
    num_frames = np.size(frame_name_list)
    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    pos_x, pos_y, target_w, target_h = region_to_bbox(gt[start_frame])

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz
    scaled_exemplar = z_sz * scale_factors
    scaled_search_area = x_sz * scale_factors
    scaled_target_w = target_w * scale_factors
    scaled_target_h = target_h * scale_factors

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    #search size
    x_sz0_ph = scaled_search_area[0]
    x_sz1_ph = scaled_search_area[1]
    x_sz2_ph = scaled_search_area[2]
    image = Image.open(frame_name_list[0])
    image.show()
    image = np.array(image)

    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = np.mean(image, axis=(0, 1))
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame_numpy(image, frame_sz, pos_y, pos_x,
                                             z_sz, avg_chan)
    # extract tensor of z_crops
    # print  type(design.exemplar_sz)
    z_crops = extract_crops_z_numpy(frame_padded_z, npad_z, pos_y, pos_x, z_sz,
                                    design.exemplar_sz)
    print 'the shape of the img z_crops is :' + ' ' + str(np.shape(z_crops))
    z_crops = np.squeeze(z_crops)
    img = Image.fromarray(z_crops.astype('uint8'), 'RGB')
    img.show()
    frame_padded_x, npad_x = pad_frame_numpy(image, frame_sz, pos_y, pos_x,
                                             x_sz2_ph, avg_chan)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x_numpy(frame_padded_x, npad_x, pos_y, pos_x,
                                    x_sz0_ph, x_sz1_ph, x_sz2_ph,
                                    design.search_sz)
    print 'the shape of the img x_crops is :' + ' ' + str(np.shape(x_crops))
    x_crops_1 = np.squeeze(x_crops[0, :, :])
    img_1 = Image.fromarray(x_crops_1.astype('uint8'), 'RGB')
    img_1.show()
    x_crops_2 = np.squeeze(x_crops[1, :, :])
    img_2 = Image.fromarray(x_crops_2.astype('uint8'), 'RGB')
    img_2.show()
    x_crops_3 = np.squeeze(x_crops[2, :, :])
    img_3 = Image.fromarray(x_crops_3.astype('uint8'), 'RGB')
    img_3.show()
def main():
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)
    if torch.cuda.is_available():
        siam = siam.cuda()

    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        video_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.']
        video_list.sort()
        nv = np.size(video_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precision_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            print('video: %d' % (i + 1))
            gt, frame_name_list, frame_sz, n_frame = _init_video(
                env, evaluation, video_list[i])
            starts = np.rint(
                np.linspace(0, n_frame - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                starts_frame = int(starts[j])
                gt_ = gt[starts_frame:, :]
                frame_name_list = frame_name_list[starts_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, siam,
                                             starts_frame)
                lengths[idx], precisions[idx], precision_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print(str(i) + ' -- ' + video_list[i] + \
                      ' -- Precision: ' + "%.2f" %precisions[idx] + \
                      ' -- Precisions AUC: ' + "%.2f" %precision_auc[idx] + \
                      ' -- IOU: ' + "%.2f" % ious[idx] + \
                      '-- Speed: ' + "%.2f" % speed[idx] + ' --\n')
        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_precision_auc = np.sum(precision_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print(' -- Overall stats (averaged per frame) on ' + str(nv) +
              'videos(' + str(tot_frames) + 'frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" %mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" %mean_iou +\
              ' -- Speed: ' + "%.2f" %mean_speed + '-- \n')
    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, siam,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print(evaluation.video + \
              ' -- precision ' + "(%d px)" %evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- precision AUC: ' + "%.2f" %precision_auc + \
              ' -- IOU: ' + "%.2f" %iou +\
              '-- Speed: ' + "%.2f" %speed + ' -- \n')
Example #27
0
def main(argv):
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    hp, evaluation, env, design = parse_arguments(root_dir)
    cmd_args = parse_command_line_arguments()

    if 'otb13' in cmd_args.dataset_name:
        dataset_type = 'otb13'
    elif 'otb15' in cmd_args.dataset_name:
        dataset_type = 'otb15'
    elif 'vot16' in cmd_args.dataset_name:
        dataset_type = 'vot16'
    elif 'vot17' in cmd_args.dataset_name:
        dataset_type = 'vot17'

    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_x, templates_z, scores_list =\
        siam.build_tracking_graph(
            root_dir, final_score_sz, design, env, hp)

    # iterate through all videos of dataset_name
    videos_folder = os.path.join(root_dir, env.root_dataset,
                                 cmd_args.dataset_name)
    videos_list = [
        v for v in os.listdir(videos_folder)
        if os.path.isdir(os.path.join(videos_folder, v))
    ]
    videos_list.sort()
    nv = np.size(videos_list)
    speed = np.zeros(nv * evaluation.n_subseq)
    precisions = np.zeros(nv * evaluation.n_subseq)
    precisions_auc = np.zeros(nv * evaluation.n_subseq)
    ious = np.zeros(nv * evaluation.n_subseq)
    lengths = np.zeros(nv * evaluation.n_subseq)
    successes = np.zeros(nv * evaluation.n_subseq)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()

        vars_to_load = []
        for v in tf.global_variables():
            if 'postnorm' not in v.name:
                vars_to_load.append(v)

        siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000'
        siam_saver = tf.train.Saver(vars_to_load)
        siam_saver.restore(sess, siam_ckpt_name)

        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                videos_list[i], videos_folder, dataset_type)
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = track_one_sequence(hp,
                                                        design,
                                                        frame_name_list_,
                                                        pos_x,
                                                        pos_y,
                                                        target_w,
                                                        target_h,
                                                        final_score_sz,
                                                        filename,
                                                        image,
                                                        templates_x,
                                                        templates_z,
                                                        scores_list,
                                                        videos_list[i],
                                                        dataset_type,
                                                        sess,
                                                        cmd_args.visualize,
                                                        cmd_args.save_images,
                                                        cmd_args.save_bboxes,
                                                        vot_handle=None,
                                                        gt=gt_)
                (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx],
                 successes[idx]) = _compile_results(gt_, bboxes,
                                                    evaluation.dist_threshold)
                print(
                    str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' +
                    "%.2f" % precisions[idx] + ' -- Precisions AUC: ' +
                    "%.2f" % precisions_auc[idx] + ' -- IOU: ' +
                    "%.2f" % ious[idx] + ' -- [email protected]: ' +
                    "%.2f" % successes[idx] + ' -- Speed: ' +
                    "%.2f" % speed[idx] + ' --')

    tot_frames = np.sum(lengths)
    mean_precision = np.sum(precisions * lengths) / tot_frames
    mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
    mean_iou = np.sum(ious * lengths) / tot_frames
    mean_speed = np.sum(speed * lengths) / tot_frames
    mean_success = np.sum(successes * lengths) / tot_frames
    print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' +
          str(tot_frames) + ' frames) --')
    print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' +
          '%.2f' % mean_precision + ' -- Precisions AUC: ' +
          "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou +
          ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' +
          "%.2f" % mean_speed + ' --')
def evaluate():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1

    # build the computational graph of Siamese fully-convolutional network
    siamNet = siam.Siamese(batch_size=1)
    # get tensors that will be used during tracking
    image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary = siamNet.build_tracking_graph_train(
        final_score_sz, design, env, hp)
    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv)
        precisions = np.zeros(nv)
        precisions_auc = np.zeros(nv)
        ious = np.zeros(nv)
        lengths = np.zeros(nv)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])

            gt_ = gt[0:, :]
            frame_name_list_ = frame_name_list[0:]
            pos_x, pos_y, target_w, target_h = region_to_bbox(
                gt_[0]
            )  # coordinate of gt is the bottom left point of the bbox
            idx = i
            bboxes, speed[idx] = tracker(hp,
                                         run,
                                         design,
                                         frame_name_list,
                                         pos_x,
                                         pos_y,
                                         target_w,
                                         target_h,
                                         final_score_sz,
                                         image,
                                         templates_z,
                                         scores,
                                         path_ckpt=os.path.join(
                                             design.saver_folder,
                                             design.path_ckpt),
                                         siamNet=siamNet)
            lengths[idx], precisions[idx], precisions_auc[idx], ious[
                idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold)
            print(str(i) + ' -- ' + videos_list[i] + \
            ' -- Precision: ' + "%.2f" % precisions[idx] + \
            ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
            ' -- IOU: ' + "%.2f" % ious[idx] + \
            ' -- Speed: ' + "%.2f" % speed[idx] + ' --')

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) +
              ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --')
    #evaluate only one vedio
    else:
        gt, frame_name_list, frame_sz, n_frames = _init_video(
            env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[0])
        bboxes, speed = tracker(hp,
                                run,
                                design,
                                frame_name_list,
                                pos_x,
                                pos_y,
                                target_w,
                                target_h,
                                final_score_sz,
                                image,
                                templates_z,
                                scores,
                                path_ckpt=os.path.join(design.saver_folder,
                                                       design.path_ckpt),
                                siamNet=siamNet)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)

        print(evaluation.video + \
           ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
           ' -- Precision AUC: ' + "%.2f" % precision_auc + \
           ' -- IOU: ' + "%.2f" % iou + \
           ' -- Speed: ' + "%.2f" % speed + ' --')
        return precision, precision_auc, iou, speed
Example #29
0
    def __init__(self, imagepath, region):
        self.track_count = 0
        #Parameters
        self.exemplar_sz = 128
        self.search_sz = 256
        #Parse the arguments
        self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments(
            mode='color')

        #Get first frame image and ground-truth
        self.region = region
        self.pos_x = region.x + region.width / 2
        self.pos_y = region.y + region.height / 2
        self.target_w = region.width
        self.target_h = region.height

        #Calculate the size of final score (upscaled size of score matrix, where score matrix
        # is convolution of results of two branches of siamese network)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1)

        #Calculate the scale factors
        self.scale_factors = self.hp.scale_step**np.linspace(
            -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2),
            self.hp.scale_num)

        # cosine window to penalize large displacements
        hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0)
        penalty = np.transpose(hann_1d) * hann_1d
        self.penalty = penalty / np.sum(penalty)

        #Calculate search and target patch sizes
        context = self.design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(
            np.prod((self.target_w + context) * (self.target_h + context)))
        self.x_sz = float(
            self.design.search_sz) / self.design.exemplar_sz * self.z_sz

        #Initialize the network
        self.features_x, self.features_z, self.scores, self.z_crops, self.x_crops = self.InitNetwork(
        )

        latest_checkpoint = "/media/engin/63c43c7a-cb63-4c43-b70c-f3cb4d68762a/models/wbaek_colorization/model1_18022020/model.ckpt-56000"

        config1 = tf.ConfigProto()
        config1.gpu_options.visible_device_list = "1"
        config1.gpu_options.per_process_gpu_memory_fraction = 0.45

        #Load the model for search branch
        with self.graph_search.as_default():
            self.session_search = tf.Session(graph=self.graph_search,
                                             config=config1)
            saver = tf.train.Saver(tf.global_variables())
            saver.restore(self.session_search, latest_checkpoint)

        #Load the model for exemplar branch
        with self.graph_exemplar.as_default():
            self.session_exemplar = tf.Session(graph=self.graph_exemplar,
                                               config=config1)
            saver = tf.train.Saver(tf.global_variables())
            saver.restore(self.session_exemplar, latest_checkpoint)

        config2 = tf.ConfigProto()
        config2.gpu_options.visible_device_list = "1"
        config2.gpu_options.per_process_gpu_memory_fraction = 0.1
        #Create a session for matching branch
        with self.graph_match.as_default():
            self.session_match = tf.Session(graph=self.graph_match,
                                            config=config2)

        #Calculate the score for template
        # Run the template session
        with self.graph_exemplar.as_default():
            self.templates_z_, z_crops_ = self.session_exemplar.run(
                [self.features_z, self.z_crops],
                feed_dict={
                    self.exemplar_ph['filename_ph']: imagepath,
                    self.exemplar_ph['pos_x_ph']: self.pos_x,
                    self.exemplar_ph['pos_y_ph']: self.pos_y,
                    self.exemplar_ph['z_sz_ph']: self.z_sz
                })

        #Write the template image
        z_crops_image = Image.fromarray(
            np.reshape(z_crops_, (128, 128)).astype(np.uint8))
        z_crops_image.save("/home/engin/Documents/output/template.jpg")

        return
Example #30
0
def main():
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.']
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv): # -- Iterate through all videos 
            print('video: %d' % (i + 1))
            gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i])
            starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq): # -- Iterate through a single video
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                # bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                #                                                      target_w, target_h, final_score_sz, filename,
                #                                                      image, templates_z, scores, start_frame)
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                                             target_w, target_h, final_score_sz, siam, start_frame) # -- here is where tracker.py is called 
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold)
                print(str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --\n')

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --\n')

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])
        # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
        #                         filename, image, templates_z, scores, evaluation.start_frame)
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
                                siam, evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold)
        print(evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --\n')