Esempio n. 1
0
def evaluate():
	# avoid printing TF debugging information
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	# TODO: allow parameters from command line or leave everything in json files?
	hp, evaluation, run, env, design = parse_arguments()
	# Set size for use with tf.image.resize_images with align_corners=True.
	# For example,
	#   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
	# instead of
	# [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
	final_score_sz = hp.response_up * (design.score_sz - 1) + 1
	# build TF graph once for all
	gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
	
	frame_sz = [i for i in cv2.imread(frame_name_list[0]).shape]
	
	siamNet = siam.Siamese(batch_size = 1);
	image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train(final_score_sz, design, env, hp, frame_sz)


	
	pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])
	bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
		                    image, templates_z, scores, evaluation.start_frame,  path_ckpt = os.path.join(design.saver_folder, design.path_ckpt), siamNet = siamNet)
	_, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold)
	
	print(evaluation.video + \
		  ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
		  ' -- Precision AUC: ' + "%.2f" % precision_auc + \
		  ' -- IOU: ' + "%.2f" % iou + \
		  ' -- Speed: ' + "%.2f" % speed + ' --')
Esempio n. 2
0
def main(im, bbox):
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)
    
     
    
    with Image.fromarray(im) as img:
        frame_sz = np.asarray(img.size)
        frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1]
    
    
    im = Image.fromarray(im)   
    
    torch.save(siam.state_dict(), '/home/nvidia/jlaplaza/siamfc_pytorch_test/siamfc_pretrained.pt')
        
    #gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
    pos_x, pos_y, target_w, target_h = region_to_bbox(bbox)
    
    print(target_w, target_h)
    # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
    #                         filename, image, templates_z, scores, evaluation.start_frame)
    
        
        
        
    tracker(hp, run, design, im, pos_x, pos_y, target_w, target_h, final_score_sz,
            siam, evaluation.start_frame)
    
    
        
    """    
def _compile_results(gt, bboxes, dist_threshold):
    l = np.size(bboxes, 0)
    gt4 = np.zeros((l, 4))
    new_distance = np.zeros(l)
    new_ious = np.zeros(l)
    n_thresholds = 50
    precisions_ths = np.zeros(n_thresholds)

    for i in range(l):
        gt4[i, :] = region_to_bbox(gt[i, :], center=False)
        new_distance[i] = _compute_distance(bboxes[i, :], gt4[i, :])
        new_ious = _compute_iou(bboxes[i, :], gt4[i, :])
        precision = sum(
            new_distance < dist_threshold) / np.size(new_distance) * 100
    # find above result for many thresholds, then report the AUC
    thresholds = np.linspace(0, 25, n_thresholds + 1)
    thresholds = thresholds[-n_thresholds:]
    # reverse it so that higher values of precision goes at the begainning
    thresholds = thresholds[::-1]
    for i in range(n_thresholds):
        precisions_ths[i] = sum(
            new_distance < thresholds[i]) / np.size(new_distance)
    precision_auc = np.trapz(precisions_ths)
    iou = np.mean(new_ious) * 100
    return l, precision, precision_auc, iou
Esempio n. 4
0
def _compile_results(gt, bboxes, dist_threshold):
	l = np.size(bboxes, 0)
	gt4 = np.zeros((l, 4))
	new_distances = np.zeros(l)
	new_ious = np.zeros(l)
	n_thresholds = 50
	precisions_ths = np.zeros(n_thresholds)

	for i in range(l):
		gt4[i, :] = region_to_bbox(gt[i, :], center=False)
		new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :])
		new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :])

	# what's the percentage of frame in which center displacement is inferior to given threshold? (OTB metric)
	precision = sum(new_distances < dist_threshold)/np.size(new_distances) * 100

	# find above result for many thresholds, then report the AUC
	thresholds = np.linspace(0, 25, n_thresholds+1)
	thresholds = thresholds[-n_thresholds:]
	# reverse it so that higher values of precision goes at the beginning
	thresholds = thresholds[::-1]
	for i in range(n_thresholds):
		precisions_ths[i] = sum(new_distances < thresholds[i])/np.size(new_distances)

	# integrate over the thresholds
	precision_auc = np.trapz(precisions_ths)    

	# per frame averaged intersection over union (OTB metric)
	iou = np.mean(new_ious) * 100

	return l, precision, precision_auc, iou
def train_siam_net(design,hp,frame_name_list,num_frames,gt,filename,conv_W,conv_b,siam_net_z,loss,train_op):
    #-------------------------------------------------------------------------
    #index_z:the index of template in the frame_name_list
    #-------------------------------------------------------------------------
    
    with tf.Session() as sess:
        #tf.global_variables_initializer().run()
        sess.run(tf.global_variables_initializer())
        #tf.local_variables_initializer().run()
        #Coordinate the loading of image files
        coord=tf.train.Coordinator()
        threads=tf.train.start_queue_runners(coord=coord)
        
        #TB
        merged=tf.summary.merge_all()
        writer=tf.summary.FileWriter('/tmp/tensorlogs/siamtf',sess.graph)
        
        for i in range(0,num_frames-1):
            pos_x,pos_y,target_w,target_h=region_to_bbox(gt[i])
             #connect the context to get the size of x and z crops
            t_sz=(target_w+target_h)*design.context_amount
            w_crop_z=target_w+t_sz
            h_crop_z=target_h+t_sz
            sz_z=np.sqrt(float(w_crop_z)*float(h_crop_z))
            sz_x=float(design.instacneSize)/float(design.exemplarSize)*sz_z
            
            siam_net_z_ = sess.run([siam_net_z],feed_dict={
            #sess.run([train_op],feed_dict={
                                                       siam.pos_x:pos_x,
                                                       siam.pos_y:pos_y,
                                                       siam.z_size:sz_z,
                                                       filename:frame_name_list[i]})
        
            #t_start=time.time()
            #print('begin')
            #train the image which is the pair of siam_net_z
            result,train_op_=sess.run([merged,train_op],feed_dict={
                                              siam.pos_x:pos_x,
                                              siam.pos_y:pos_y,
                                              siam.z_size:sz_z,
                                              siam.x_size:float(sz_x),
                                              siam_net_z:siam_net_z_[0],
                                              filename:frame_name_list[i+1] })
            
            writer.add_summary(result,i)
        #print('loss end')
        #train --back propagation
        #tf.train.AdamOptimizer(hp.learning_rate).minimize(loss_)
           
        
        coord.request_stop()
        coord.join(threads)
Esempio n. 6
0
def _compile_results(gt, bboxes, dist_threshold):
    """ Computes the results for one sequence based on the tracking bounding
    boxes.

    Args:
      gt: Nx4 array: ground truth bounding boxes.
      bboxes: Nx4 array: predicted bounding boxes.
      dist_threshold: int: threshold in pixels to calculate the precision.

    Returns:
      int: number of boxes/frames in the sequence.
      float: precision of the results.
      float: precision AuC of the results.
      float: IoU of the results.
      float: success rate of the results.
    """
    l = np.size(bboxes, 0)
    gt4 = np.zeros((l, 4))
    new_distances = np.zeros(l)
    new_ious = np.zeros(l)
    n_thresholds = 50
    precisions_ths = np.zeros(n_thresholds)

    for i in range(l):
        gt4[i, :] = region_to_bbox(gt[i, :], center=False)
        new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :])
        new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :])

    # what's the percentage of frame in which center displacement is inferior
    # to given threshold? (OTB metric)
    precision = (sum(new_distances < dist_threshold) / np.size(new_distances) *
                 100)

    success = sum(new_ious > 0.5) / np.size(new_ious) * 100

    # find above result for many thresholds, then report the AUC
    thresholds = np.linspace(0, 25, n_thresholds + 1)
    thresholds = thresholds[-n_thresholds:]
    # reverse it so that higher values of precision goes at the beginning
    thresholds = thresholds[::-1]
    for i in range(n_thresholds):
        precisions_ths[i] = (sum(new_distances < thresholds[i]) /
                             np.size(new_distances))

    # integrate over the thresholds
    precision_auc = np.trapz(precisions_ths)

    # per frame averaged intersection over union (OTB metric)
    iou = np.mean(new_ious) * 100

    return l, precision, precision_auc, iou, success
def main():
    #avoid printing TF debugging information(only show error log)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    hp, evaluation, run, env, design = parse_arguments()
    #build TF graph in siamese once for all
    #siam.init_create_net()
    filename, siam_net_z, loss, train_op = siam.make_siameseFC(env, design, hp)

    #iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        #the path of folder of all videos
        train_data_folder = os.path.join(env.root_train_dataset,
                                         evaluation.dataset)
        videos_list = [v for v in os.listdir(train_data_folder)]
        videos_list.sort()
        num_v = len(videos_list)
        for i in range(num_v):
            gt, frame_name_list, frame_sz, n_frames = _init_train_video(
                env, evaluation, videos_list[i])
            start_frame = evaluation.start_frame
            #not sure
            #gt_=gt[start_frame:,:]
            gt_ = gt[start_frame:]
            frame_name_list_ = frame_name_list[start_frame:]
            num_frames = np.size(frame_name_list_)

            for j in range(num_frames - 1):
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[j])
                #train_siam_net(design,hp,frame_name_list,z_index,pos_x,pos_y,target_w,target_h,filename,siam_net_z,loss)
                train_siam_net(design, hp, frame_name_list, j, pos_x, pos_y,
                               target_w, target_h, filename, siam_net_z, loss,
                               train_op)

    else:
        gt, frame_name_list, _, _ = _init_train_video(env, evaluation,
                                                      evaluation.video)
        start_frame = evaluation.start_frame
        gt_ = gt[start_frame:]
        frame_name_list_ = frame_name_list[start_frame:]
        num_frames = np.size(frame_name_list_)

        train_siam_net(design, hp, frame_name_list, num_frames, gt, filename,
                       siam_net_z, loss, train_op)
        '''for i in range(num_frames-1):
Esempio n. 8
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph_2(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
    pos_x, pos_y, target_w, target_h = region_to_bbox(
        gt[evaluation.start_frame])
    track_cam(hp, run, design, final_score_sz, image, templates_z, scores,
              evaluation.start_frame)
def _compile_results(gt, bboxes, dist_threshold):
    l = np.size(bboxes, 0)
    #np.zeros(shape=(1,4),dtype=float, order='C')
    gt4 = np.zeros((l, 4))
    new_distances = np.zeros(l)
    new_ious = np.zeros(l)
    n_thresholds = 50
    precisions_ths = np.zeros(n_thresholds)

    for i in range(l):
        gt4[i, :] = region_to_bbox(gt[i, :], center=False)
        new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :])
        #计算重叠率
        new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :])

    #what's the percentage of from in which center displacement is inferior to given threshold?(OTB metric)
    #sum(new_distances<dist_threshold):get the number of (new_distances<dist_threshold)
    precision = sum(
        new_distances < dist_threshold) / np.size(new_distances) * 100

    #find above result for many thresholds,then report the AUC
    thresholds = np.linspace(0, 25, n_thresholds + 1)
    #get the number from the index of 1
    thresholds = thresholds[-n_thresholds:]
    #!!!reverse it so that higer values of precision goes at the beginning
    thresholds = thresholds[::-1]
    for i in range(n_thresholds):
        precisions_ths[i] = sum(
            new_distances < thresholds[i]) / np.size(new_distances)

    #integrate over the thresholds
    #AUC(Area Under Curve)被定义为ROC曲线下的面积
    precision_auc = np.trapz(precisions_ths)

    #per frame averaged interseciton over union (OTB metric)
    iou = np.mean(new_ious) * 100

    return l, precision, precision_auc, iou
Esempio n. 10
0
 def initialize(self, image_file, box):
     pos_x, pos_y, target_w, target_h = region_to_bbox(box)
     self.tracker = SiamMcfTracker(
         self.design.context,
         self.design.exemplar_sz,
         self.design.search_sz,
         self.hp.scale_step,
         self.hp.scale_num,
         self.hp.scale_penalty,
         self.hp.scale_lr,
         self.hp.window_influence,
         self.design.tot_stride,
         self.hp.response_up,
         self.final_score_sz,
         pos_x,
         pos_y,
         target_w,
         target_h,
         image_file,
         self.sess,
         self.templates_z,
         self.filename,
     )
def evaluate():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1

    # build the computational graph of Siamese fully-convolutional network
    siamNet = siam.Siamese(batch_size=1)
    # get tensors that will be used during tracking
    image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary = siamNet.build_tracking_graph_train(
        final_score_sz, design, env, hp)
    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv)
        precisions = np.zeros(nv)
        precisions_auc = np.zeros(nv)
        ious = np.zeros(nv)
        lengths = np.zeros(nv)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])

            gt_ = gt[0:, :]
            frame_name_list_ = frame_name_list[0:]
            pos_x, pos_y, target_w, target_h = region_to_bbox(
                gt_[0]
            )  # coordinate of gt is the bottom left point of the bbox
            idx = i
            bboxes, speed[idx] = tracker(hp,
                                         run,
                                         design,
                                         frame_name_list,
                                         pos_x,
                                         pos_y,
                                         target_w,
                                         target_h,
                                         final_score_sz,
                                         image,
                                         templates_z,
                                         scores,
                                         path_ckpt=os.path.join(
                                             design.saver_folder,
                                             design.path_ckpt),
                                         siamNet=siamNet)
            lengths[idx], precisions[idx], precisions_auc[idx], ious[
                idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold)
            print(str(i) + ' -- ' + videos_list[i] + \
            ' -- Precision: ' + "%.2f" % precisions[idx] + \
            ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
            ' -- IOU: ' + "%.2f" % ious[idx] + \
            ' -- Speed: ' + "%.2f" % speed[idx] + ' --')

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) +
              ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --')
    #evaluate only one vedio
    else:
        gt, frame_name_list, frame_sz, n_frames = _init_video(
            env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[0])
        bboxes, speed = tracker(hp,
                                run,
                                design,
                                frame_name_list,
                                pos_x,
                                pos_y,
                                target_w,
                                target_h,
                                final_score_sz,
                                image,
                                templates_z,
                                scores,
                                path_ckpt=os.path.join(design.saver_folder,
                                                       design.path_ckpt),
                                siamNet=siamNet)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)

        print(evaluation.video + \
           ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
           ' -- Precision AUC: ' + "%.2f" % precision_auc + \
           ' -- IOU: ' + "%.2f" % iou + \
           ' -- Speed: ' + "%.2f" % speed + ' --')
        return precision, precision_auc, iou, speed
Esempio n. 12
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                # Call Tracker for the selected sequence
                print("Tracking started!")
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print( str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --' )

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) +
              ' videos (' + str(tot_frames) + ' frames) --')
        print( ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --' )

    else:
        gt, frame_name_list, _, n_frames = _init_video(env, evaluation,
                                                       evaluation.video)
        #pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])

        # np.size(frame_name_list) = Amount of frames
        # ott = amount of Objects To Track
        ott = len(gt) if evaluation.multi_object else 1
        objects = np.zeros((ott, 4))
        for i in range(ott):
            objects[i, :] = region_to_bbox(gt[i])

        # Call Tracker for the selected sequence.
        print("Tracking started!")
        bboxes, speed = tracker(hp, run, design, frame_name_list, objects,
                                final_score_sz, filename, image, templates_z,
                                scores, evaluation.start_frame)

        if evaluation.multi_object:
            print('No Ground Truth available for multi object, just printing speed result....\n' + \
                  evaluation.video + \
                  ' -- Speed: ' + "%.2f" % speed + ' --' )
        else:
            _, precision, precision_auc, iou = _compile_results(
                gt, bboxes, evaluation.dist_threshold)
            print( evaluation.video + \
                  ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
                  ' -- Precision AUC: ' + "%.2f" % precision_auc + \
                  ' -- IOU: ' + "%.2f" % iou + \
                  ' -- Speed: ' + "%.2f" % speed + ' --' )
    print("Tracking finished!")
Esempio n. 13
0
def main():
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.']
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv): # -- Iterate through all videos 
            print('video: %d' % (i + 1))
            gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i])
            starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq): # -- Iterate through a single video
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                # bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                #                                                      target_w, target_h, final_score_sz, filename,
                #                                                      image, templates_z, scores, start_frame)
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                                             target_w, target_h, final_score_sz, siam, start_frame) # -- here is where tracker.py is called 
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold)
                print(str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --\n')

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --\n')

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])
        # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
        #                         filename, image, templates_z, scores, evaluation.start_frame)
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
                                siam, evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold)
        print(evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --\n')
def main():
    #avoid printing TF debugging information
    #仅显示error log
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    #TODO:allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()

    #gt_,frame_name_list_,_,_=_init_video(env,evaluation,evaluation.video)
    #pos_x,pos_y,target_w,target_h=region_to_bbox(gt_[0])
    #print('---target_w---'+"%d"%target_w+'--target_h---'+"%d"%target_h)
    #why?????????????
    #Set size for use with tf.image.resize_images with align_corners=True
    #For example:
    # [1,4,7]=>[1 2 3 4 5 6 7]  (length 3*(3-1)+1)
    #instead of
    #[1,4,7]=>[1 1 2 3 4 5 6 7 7](length 3*3)
    #Why hp.response_up???
    #design.score_sz=33
    #hp.response_up=8
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    #build TF graph once for all
    #filename,image,templates_z,scores are only processes.!!!
    #真正返回信息需要用sess去执行(tracker中执行)
    #return filename, image, templates_z, scores_up
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    #iterate through all videos of evaluation dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        #os.listdir(path):返回指定路径下的文件和文件夹
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        #遍历不同的视频样本
        for i in range(nv):
            #frame_name_list:each image of a video sequence
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            #np.rint():对浮点数取整但不改变浮点数类型
            #n_subseq=3
            starts = np.rint(
                np.linspace(0, n_frame - 1, evaluation.n_subseq + 1))
            #分成n_subseq+1份,将数组赋给starts
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                #start_frame:指start_frame及以后(选取了n_subseq中的一份)
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                #Update
                bboxes, speed[idx] = tracker(
                    hp,
                    run,
                    design,
                    env,
                    evaluation,
                    frame_name_list_,
                    pos_x,
                    pos_y,
                    #bboxes,speed[idx]=tracker(hp,run,design,frame_name_list_,pos_x,pos_y,
                    target_w,
                    target_h,
                    final_score_sz,
                    filename,
                    image,
                    templates_z,
                    scores,
                    start_frame)
                #gt_:ground truth
                #bboxes:the result of tracking
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print(
                    str(i) + '--' + videos_list[i] + '--Precision: ' +
                    "%.2f" % precisions[idx] + '--Precisions AUC: ' +
                    "%.2f" % precisions_auc[idx] + '--IOU: ' +
                    "%.2f" % ious[idx] + '--Speed: ' + "%.2f" % speed[idx] +
                    '--')

    else:
        #evaluation.video='all'
        print(evaluation.video)
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        #evaluation.start_frame=0
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])

        #Update
        #bboxes,speed=tracker(hp,run,design,frame_name_list,pos_x,pos_y,target_w,target_h,final_score_sz,
        bboxes, speed = tracker(hp, run, design, env, evaluation,
                                frame_name_list, pos_x, pos_y, target_w,
                                target_h, final_score_sz, filename, image,
                                templates_z, scores, evaluation.start_frame)
        _, precision, precisions_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        #print(evaluation.video+
        print(evaluation.video + '--Precision: ' +
              "(%d px)" % evaluation.dist_threshold + ': ' +
              "%.2f" % precision + '--Precisions AUC: ' +
              "%.2f" % precisions_auc + '--IOU: ' + "%.2f" % iou +
              '--Speed: ' + "%.2f" % speed + '--')
Esempio n. 15
0
def main(argv):
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    hp, evaluation, env, design = parse_arguments(root_dir)
    cmd_args = parse_command_line_arguments()

    if 'otb13' in cmd_args.dataset_name:
        dataset_type = 'otb13'
    elif 'otb15' in cmd_args.dataset_name:
        dataset_type = 'otb15'
    elif 'vot16' in cmd_args.dataset_name:
        dataset_type = 'vot16'
    elif 'vot17' in cmd_args.dataset_name:
        dataset_type = 'vot17'

    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_x, templates_z, scores_list =\
        siam.build_tracking_graph(
            root_dir, final_score_sz, design, env, hp)

    # iterate through all videos of dataset_name
    videos_folder = os.path.join(root_dir, env.root_dataset,
                                 cmd_args.dataset_name)
    videos_list = [
        v for v in os.listdir(videos_folder)
        if os.path.isdir(os.path.join(videos_folder, v))
    ]
    videos_list.sort()
    nv = np.size(videos_list)
    speed = np.zeros(nv * evaluation.n_subseq)
    precisions = np.zeros(nv * evaluation.n_subseq)
    precisions_auc = np.zeros(nv * evaluation.n_subseq)
    ious = np.zeros(nv * evaluation.n_subseq)
    lengths = np.zeros(nv * evaluation.n_subseq)
    successes = np.zeros(nv * evaluation.n_subseq)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()

        vars_to_load = []
        for v in tf.global_variables():
            if 'postnorm' not in v.name:
                vars_to_load.append(v)

        siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000'
        siam_saver = tf.train.Saver(vars_to_load)
        siam_saver.restore(sess, siam_ckpt_name)

        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                videos_list[i], videos_folder, dataset_type)
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = track_one_sequence(hp,
                                                        design,
                                                        frame_name_list_,
                                                        pos_x,
                                                        pos_y,
                                                        target_w,
                                                        target_h,
                                                        final_score_sz,
                                                        filename,
                                                        image,
                                                        templates_x,
                                                        templates_z,
                                                        scores_list,
                                                        videos_list[i],
                                                        dataset_type,
                                                        sess,
                                                        cmd_args.visualize,
                                                        cmd_args.save_images,
                                                        cmd_args.save_bboxes,
                                                        vot_handle=None,
                                                        gt=gt_)
                (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx],
                 successes[idx]) = _compile_results(gt_, bboxes,
                                                    evaluation.dist_threshold)
                print(
                    str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' +
                    "%.2f" % precisions[idx] + ' -- Precisions AUC: ' +
                    "%.2f" % precisions_auc[idx] + ' -- IOU: ' +
                    "%.2f" % ious[idx] + ' -- [email protected]: ' +
                    "%.2f" % successes[idx] + ' -- Speed: ' +
                    "%.2f" % speed[idx] + ' --')

    tot_frames = np.sum(lengths)
    mean_precision = np.sum(precisions * lengths) / tot_frames
    mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
    mean_iou = np.sum(ious * lengths) / tot_frames
    mean_speed = np.sum(speed * lengths) / tot_frames
    mean_success = np.sum(successes * lengths) / tot_frames
    print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' +
          str(tot_frames) + ' frames) --')
    print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' +
          '%.2f' % mean_precision + ' -- Precisions AUC: ' +
          "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou +
          ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' +
          "%.2f" % mean_speed + ' --')
Esempio n. 16
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments(mode="conv2")
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * design.score_sz
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)
    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        # videos_list = videos_list[91:][:] #only use vot 2016
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        success_auc = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i])
            gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB(
                env, evaluation, videos_list[i])

            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx], success_auc[idx] = _compile_results(
                        gt_, bboxes, evaluation.dist_threshold)

                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.mean(lengths)
        mean_precision = np.mean(precisions)
        mean_precision_auc = np.mean(precisions_auc)
        mean_iou = np.mean(ious)
        mean_success_auc = np.mean(success_auc)
        mean_speed = np.mean(speed)

        print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % (
            hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
            hp.window_influence)
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Success AUC: ' + "%.2f" % mean_success_auc +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

        with open('log_test.txt', 'a+') as f:
            f.write(time.asctime(time.localtime(time.time())) + '\r\n')
            f.write(
                'data set ' + evaluation.dataset +
                ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n'
                % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
                   hp.window_influence))
            f.write('-- Overall stats (averaged per frame) on ' + str(nv) +
                    ' videos (' + str(tot_frames) + ' frames) --\r\n')
            f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \
                    ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \
                    ' -- IOU: ' + "%.2f" % mean_iou + \
                    ' -- AUC: ' + "%.3f" % mean_success_auc + \
                    ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n')
            f.write('\r\n')

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Esempio n. 17
0
def track_one_sequence(hp,
                       design,
                       frame_name_list,
                       pos_x,
                       pos_y,
                       target_w,
                       target_h,
                       final_score_sz,
                       filename,
                       image,
                       templates_x,
                       templates_z,
                       scores_list,
                       vid_name,
                       dataset_type,
                       sess,
                       visualize_results,
                       save_images,
                       save_bboxes,
                       vot_handle,
                       gt=None):
    """ Handles tracking for one whole sequence. Inputs are fed to the network
    and the results are collected and can be shown on the screen and saved to
    the disk.

    Args:
      hp: namespace: hyperparameters.
      design: namespace: design parameters.
      frame_name_list: string list: list of sorted image paths to be read.
      pos_x: int: horizontal center of the target.
      pos_y: int: vertical center of the target.
      target_w: int: target width.
      target_h: int: target height.
      final_score_sz: int: size of the score map after upsampling.
      filename: string tensor: placeholder for the image path to be read.
      image: 3D tensor: the image read from the path.
      templates_x: 4D tensor: instance features from one or more layers
        concatenated by channels. See siam_mcf_net.inference comments for more
        details.
      templates_z: 4D tensor: exemplar features from one or more layers
        concatenated by channels. See siam_mcf_net.inference comments for more
        details.
      scores_list: 5D tensor: batch of score heatmaps for each of the selected
        layers.
      vid_name: string: name of this sequence (only for saving purposes).
      dataset_type: string: name of this dataset (only for saving purposes).
      sess: an open tf.Session to execute the graph.
      visualize_results: boolean: whether to show the results on the screen.
      save_images: boolean: whether to save image results to the disk.
      save_bboxes: boolean: whether to save bounding boxes to the disk.
      vot_handle: vot handle for running the VOT toolkit.
      gt: Nx4 array: optional ground truth bounding boxes (only for
        visualization purposes).

    Returns:
      Nx4 array: the resulting bounding boxes from the tracking.
      float: the tracking speed in frames per second.
    """
    num_frames = np.size(frame_name_list)
    # stores tracker's output for evaluation
    bboxes = np.zeros((num_frames, 4))

    if save_images:
        res_dir = 'results/%s/frames/%s' % (
            dataset_type, vid_name)
        if not os.path.exists(res_dir):
            os.makedirs(res_dir)

    if save_bboxes:
        bb_res_dir = 'results/%s/bboxes' % (dataset_type)
        if not os.path.exists(bb_res_dir):
            os.makedirs(bb_res_dir)

    # save first frame position (from ground-truth)
    bboxes[0, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h

    if vot_handle is not None:
        frame_path = vot_handle.frame()
    else:
        frame_path = frame_name_list[0]

    tracker = SiamMcfTracker(
        design.context, design.exemplar_sz, design.search_sz, hp.scale_step,
        hp.scale_num, hp.scale_penalty, hp.scale_lr, hp.window_influence,
        design.tot_stride, hp.response_up, final_score_sz, pos_x, pos_y,
        target_w, target_h, frame_path, sess, templates_z, filename)

    t_start = time.time()

    # Get an image from the queue
    for i in range(1, num_frames):
        if vot_handle is not None:
            frame_path = vot_handle.frame()
        else:
            frame_path = frame_name_list[i]

        if save_images or visualize_results:
            image_ = sess.run(image, feed_dict={filename: frame_path})

        bbox = tracker.track(
            frame_path, sess, templates_z, templates_x, scores_list, filename)

        # convert <cx,cy,w,h> to <x,y,w,h> and save output
        bboxes[i, :] = bbox

        if vot_handle is not None:
            vot_rect = vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
            vot_handle.report(vot_rect)

        if visualize_results:
            show_frame(image_, bboxes[i, :], 1)

        if save_images:
            out_img = Image.fromarray(image_.copy().astype(np.uint8))
            out_draw = ImageDraw.Draw(out_img)

            if gt is not None:
                gt_rect = np.array(region_to_bbox(gt[i, :], False)).astype(
                    np.int32)
                gt_rect[2:] = gt_rect[:2] + gt_rect[2:]

            rect = bboxes[i].copy()
            rect[2:] = rect[:2] + rect[2:]
            rect = rect.astype(np.int32)

            pillow_version = [int(x) for x in PIL.__version__.split('.')]
            if (pillow_version[0] > 5 or
                    (pillow_version[0] == 5 and pillow_version[1] >= 3)):
                if gt is not None:
                    out_draw.rectangle(
                        [tuple(gt_rect[:2]), tuple(gt_rect[2:])],
                        outline=(0, 0, 255),
                        width=2)
                out_draw.rectangle(
                    [tuple(rect[:2]), tuple(rect[2:])],
                    outline=(255, 0, 0),
                    width=3)
            else:
                if gt is not None:
                    out_draw.rectangle(
                        [tuple(gt_rect[:2]), tuple(gt_rect[2:])],
                        outline=(0, 0, 255))
                out_draw.rectangle(
                    [tuple(rect[:2]), tuple(rect[2:])],
                    outline=(255, 0, 0))

            out_img.save(os.path.join(res_dir, '%05d.jpg' % (i + 1)))

    t_elapsed = time.time() - t_start
    speed = num_frames/t_elapsed

    if save_bboxes:
        with open(os.path.join(bb_res_dir, vid_name+'.txt'), 'w') as f:
            for bb in bboxes:
                f.write('%.02f,%.02f,%.02f,%.02f\n' % tuple(bb))

    return bboxes, speed
def main():
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    siam = SiameseNet(env.root_pretrained, design.net)
    if torch.cuda.is_available():
        siam = siam.cuda()

    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        video_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.']
        video_list.sort()
        nv = np.size(video_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precision_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            print('video: %d' % (i + 1))
            gt, frame_name_list, frame_sz, n_frame = _init_video(
                env, evaluation, video_list[i])
            starts = np.rint(
                np.linspace(0, n_frame - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                starts_frame = int(starts[j])
                gt_ = gt[starts_frame:, :]
                frame_name_list = frame_name_list[starts_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, siam,
                                             starts_frame)
                lengths[idx], precisions[idx], precision_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print(str(i) + ' -- ' + video_list[i] + \
                      ' -- Precision: ' + "%.2f" %precisions[idx] + \
                      ' -- Precisions AUC: ' + "%.2f" %precision_auc[idx] + \
                      ' -- IOU: ' + "%.2f" % ious[idx] + \
                      '-- Speed: ' + "%.2f" % speed[idx] + ' --\n')
        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_precision_auc = np.sum(precision_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print(' -- Overall stats (averaged per frame) on ' + str(nv) +
              'videos(' + str(tot_frames) + 'frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" %mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" %mean_iou +\
              ' -- Speed: ' + "%.2f" %mean_speed + '-- \n')
    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, siam,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print(evaluation.video + \
              ' -- precision ' + "(%d px)" %evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- precision AUC: ' + "%.2f" %precision_auc + \
              ' -- IOU: ' + "%.2f" %iou +\
              '-- Speed: ' + "%.2f" %speed + ' -- \n')
def get_gt_bbox_cv(bbox):
    x, y, width, height = region_to_bbox(bbox, False)
    return (int(x), int(y)), (int(x + width), int(y + height))
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from
    # parameters/ directory.
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    # hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    # final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    # filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph(final_score_sz, design, env)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args_dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    # iterate through all videos of evaluation.dataset
    videos_list = list(dataset.videos.keys())
    videos_list.sort()
    nv = np.size(videos_list)
    tracker = None
    for i in range(nv):
        current_key = sorted(list(dataset.videos.keys()))[i]
        gt, frame_name_list, frame_sz, n_frames = _init_video(
            dataset, current_key)
        rect = region_to_bbox(gt[0], False)
        # tracker = SiamFCTracker(frame_name_list[0], vot.Rectangle(rect[0],rect[1],rect[2],rect[3]))
        # tracker = ColorizationTracker(frame_name_list[0], vot.Rectangle(rect[0], rect[1], rect[2], rect[3]))
        if tracker is None:
            tracker = HybridTracker(
                frame_name_list[0],
                vot.Rectangle(rect[0], rect[1], rect[2], rect[3]))
        else:
            tracker.HotInit(frame_name_list[0],
                            vot.Rectangle(rect[0], rect[1], rect[2], rect[3]))
        bboxes = []
        for i in range(0, n_frames):
            bbox, confidence = tracker.track(frame_name_list[i])
            bboxes.append(bbox)

        #Visualize
        if visualize:
            for i, (bbox, groundt,
                    frame_name) in enumerate(zip(bboxes, gt, frame_name_list)):
                image = cv2.imread(frame_name)
                bbox_pt1, bbox_pt2 = get_bbox_rect(bbox)
                bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt)

                #Draw result
                cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0))
                #Draw ground truth
                cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0))
                cv2.imshow("Results:", image)
                if save_vis:
                    cv2.imwrite(os.path.join(vis_output,
                                             str(i) + ".jpg"), image)
                cv2.waitKey()

        #Reset the tracker, get prepared for next subset
        tracker.reset()

        # Write the results to disc for evaluation
        bboxes_n = []
        for bbox in bboxes:
            bboxes_n.append([bbox.x, bbox.y, bbox.width, bbox.height])
        bboxes_n[0] = [1]
        target_dir = os.path.join(result_output, current_key)
        if not os.path.exists(target_dir):
            os.mkdir(target_dir)
        results_file = current_key + "_" + "{:03d}".format(1) + ".txt"
        results_abs_file = os.path.join(target_dir, results_file)
        with open(results_abs_file, "w") as f:
            for bbox in bboxes_n:
                if len(bbox) == 1:
                    f.write('%d\n' % (bbox[0]))
                else:
                    f.write('%.2f, %.2f, %.2f, %.2f\n' %
                            (bbox[0], bbox[1], bbox[2], bbox[3]))
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i])
            starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                                                                     target_w, target_h, final_score_sz, filename,
                                                                     image, templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold)
                print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --')
        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --')

    else:
        images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) 
      
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
                                filename, image, templates_z, scores, evaluation.start_frame)        
                            
        num_frames = np.size(frame_name_list)
        bboxes_final = np.zeros((num_frames,4))

        lk_params = dict( winSize  = (5,5),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03))

        bboxes_final = np.zeros((num_frames, 4))
        for i in range(1,len(images_arr)-1):
            # Create some random colors
            color = np.random.randint(0,255,(100,3))

            # Take first frame and find corners in it
            #ret, old_frame = cap.read()
            frame = images_arr[i+1]
            old_frame = images_arr[i]
            old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
            p0 = np.zeros((1,1,2), dtype=np.float32)
            bbox_i = bboxes[i]
            c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3])
            p0[0,0,0] = c
            p0[0,0,1] = r
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
            good_new = p1[st==1]
            bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h
            good_old = p0[st==1]   
            for i,(new,old) in enumerate(zip(good_new,good_old)):
                a,b = new.ravel()
                c,d = old.ravel()
            fig = plt.figure(1)
            ax = fig.add_subplot(111)
            r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") 
            r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False)
            ax.imshow(np.uint8(frame))
            ax.add_patch(r2)
            ax.add_patch(r1)
            plt.ion()
            plt.show()
            plt.pause(0.001)
            plt.clf()
            old_gray = frame_gray.copy()
            p0 = good_new.reshape(-1,1,2)
        _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold)
        print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
def cozmo_program(robot: cozmo.robot.Robot):
	
	global angle
	angle = 25.
	robot.set_head_angle(degrees(angle)).wait_for_completed()
	robot.set_lift_height(0.0).wait_for_completed()
	robot.camera.image_stream_enabled = True
	robot.camera.color_image_enabled = True
	robot.camera.enable_auto_exposure = True
	
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	
	frame = os.path.join(directory, "current.jpeg")

	print("Starting Tensorflow...")
	
	with tf.Session() as sess:
		print("Session successfully started")
		model = load_model('modelv1.07-0.96.hdf5')		
		while True:
			global X, Y, W, H
			global result
			X = 245.
			Y = 165.
			W = 150.
			H = 150.
			
			gt = [X, Y, W, H]
			pos_x, pos_y, target_w, target_h = region_to_bbox(gt)
			frame = os.path.join(directory, "current.jpeg")
			result = 0
			dog_counter = 0
			cat_counter = 0
			background_counter = 0
			next_state = 0
			current_state = 0 #Background: 0, Cat:1, Dog:2
			while True:
				latest_img = robot.world.latest_image
				if latest_img is not None:
					pilImage = latest_img.raw_image
					pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") 
				show_frame(np.asarray(Image.open(frame)), [900.,900.,900.,900.], 1)
				img = load_image(frame)
				[result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\
										   ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img})
				next_state = np.argmax(result)
				print('Arg max: ',next_state)
				
				# Initial Current State is Background
				if current_state == 0: 
					print('Background')
					if next_state == 1: # Detected a Cat
						current_state = 1   # Transition to Cat State
						background_counter = 0
						cat_counter = 1
						dog_counter = 0
					elif next_state == 2: # Detected a Dog
						current_state = 2   # Transition to Dog state
						background_counter = 0
						cat_counter = 0
						dog_counter = 1
				# Current State is Cat
				elif current_state == 1: 
					print('\t\t\t\t\t\tCat')
					if next_state == 0:   # Detected Background
						background_counter += 1
						if background_counter >= 6:  # Transition to Background only if Background appeared for more than 6 times
							background_counter = 0
							current_state = 0
							cat_counter = 0
					elif next_state == 1: # Detected Cat itself
						cat_counter +=1
						if cat_counter >= 30:
							print('Cozmo sees a cat')
							dense = model.get_layer('dense').get_weights()
							weights = dense[0].T
							
							testing_counter = 0
							detected_centroid = 0
							xmin_avg = 0
							xmax_avg = 0
							ymin_avg = 0
							ymax_avg = 0
							frame_average = 2
							frame_count = 0
							while True:
								latest_img = robot.world.latest_image
								if latest_img is not None:
									pilImage = latest_img.raw_image
									pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG")
								img = load_image(frame)
								[result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\
														   ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img})
								
								kernels = out_relu.reshape(7,7,1280)
								final = np.dot(kernels,weights[result[0].argmax()])
								final1 = array_to_img(final.reshape(7,7,1))
								final1 = final1.resize((224,224), Image.ANTIALIAS)
								box = img_to_array(final1).reshape(224,224)
								#box = cv2.blur(box,(30,30))
								temp = (box > box.max()*.8) *1 
								
								temp_adjusted = np.ndarray(shape=np.shape(temp), dtype=np.dtype(np.uint8))
								temp_adjusted[:,:] = np.asarray(temp)*255
								contours, hierarchy = cv2.findContours(temp_adjusted, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2:]
								contours = np.array(contours)
								max_area = [0,0] # contours index and area
								for index, contour in enumerate(contours):
									if(max_area[1]< len(contour)):
										max_area = [index,len(contour)]
									
								contours_adjusted = contours[max_area[0]].squeeze(axis=1).T
								
								xmin = contours_adjusted[0].min() * (640./224.)
								ymin = contours_adjusted[1].min() * (480./224.)
								xmax = contours_adjusted[0].max() * (640./224.)
								ymax = contours_adjusted[1].max() * (480./224.)
																
								if result[0].argmax() == 1:
									
									# Frame smoothing
									frame_count = frame_count + 1
									xmin_avg = xmin_avg + xmin
									xmax_avg = xmax_avg + xmax
									ymin_avg = ymin_avg + ymin
									ymax_avg = ymax_avg + ymax
									
									if frame_count % frame_average == 0:
										frame_count = 0
										xmin_avg = xmin_avg/frame_average
										xmax_avg = xmax_avg/frame_average
										ymin_avg = ymin_avg/frame_average
										ymax_avg = ymax_avg/frame_average
										
										print(xmin_avg, end=",")
										print(ymin_avg, end=",")
										print(xmax_avg, end=",")
										print(ymax_avg, end="\n")
										ymin_avg = ymin_avg + (ymax_avg - ymin_avg)/2. - H/2.
										xmin_avg = xmin_avg + (xmax_avg - xmin_avg)/2. - W/2.
										print("150: ",xmin_avg, end=",")
										print("150: ",ymin_avg, end="\n")
										gt = [xmin_avg, ymin_avg, W, H]
										xmin_avg = 0
										xmax_avg = 0
										ymin_avg = 0
										ymax_avg = 0
										
										pos_x, pos_y, target_w, target_h = region_to_bbox(gt)
										bboxes = np.zeros((1, 4))
										#bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
										bboxes[0,:] = pos_x-W/2, pos_y-H/2, W, H
										print(len(contours))
										testing_counter = testing_counter + 1
										print("Testing_counter: ",testing_counter)
										show_frame(np.asarray(Image.open(frame)), gt, 1)
										print("Cat is detected")								
									
										print("Starting the tracker ...")
										if (bboxes[0,1] + bboxes[0,3]/2) < (Y + H/2 - 40):
											print("Command: Raise the head")
											angle = angle + 0.5
											if angle > 44.5:
												angle = 44.5
										elif (bboxes[0,1] + bboxes[0,3]/2) > (Y + H/2 + 40):
											print("Command: Lower the head")
											angle = angle - 0.5
											if angle < 0:
												angle = 0
										else:
											pass
										
										set_head_angle_action = robot.set_head_angle(degrees(angle), max_speed=20, in_parallel=True)
										
										if straight(bboxes[0,:])[0] != 0 and turn(bboxes[0,:])[0] != 0:
											robot.drive_wheel_motors(straight(bboxes[0,:])[0] + turn(bboxes[0,:])[0], straight(bboxes[0,:])[1] + turn(bboxes[0,:])[1])
											detected_centroid = 0
										elif straight(bboxes[0,:])[0] == 0 and turn(bboxes[0,:])[0] == 0:
											robot.stop_all_motors()
											detected_centroid = detected_centroid + 1
										elif straight(bboxes[0,:])[0] == 0:
											robot.drive_wheel_motors(turn(bboxes[0,:])[0], turn(bboxes[0,:])[1])
											detected_centroid = 0
										elif turn(bboxes[0,:])[0] == 0:
											robot.drive_wheel_motors(straight(bboxes[0,:])[0], straight(bboxes[0,:])[1])
											detected_centroid = 0
										else:
											robot.stop_all_motors()
											detected_centroid = detected_centroid + 1
										
										if detected_centroid > 20//frame_average:
											detected_centroid = 0
											print("Reached a stable state.........\t\t\t\t\t\t\t\t STABLE")
											
											# Go near the object
											
											set_head_angle_action.wait_for_completed()
											robot.abort_all_actions(log_abort_messages=True)
											robot.wait_for_all_actions_completed()
											robot.set_head_angle(degrees(0.5)).wait_for_completed()
											print("Robot's head angle: ",robot.head_angle)
											target_frame_count = 1
											while True:
												latest_img = None
												while latest_img is None:
													latest_img = robot.world.latest_image
												target_frame1 = latest_img.raw_image
												target_frame1 = target_frame1.resize((640,480), Image.ANTIALIAS)
												#target_frame1 = target_frame1.convert('L')
												target_frame1 = np.asarray(target_frame1)
												#orb1 = cv2.ORB_create(500)
												#kp1 = orb1.detect(target_frame1,None)
												#kp1, des1 = orb1.compute(target_frame1, kp1)
												#features_img1 = cv2.drawKeypoints(target_frame1, kp1, None, color=(255,0,0), flags=0)
												#plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",features_img1)
												plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",target_frame1)
											
												drive_straight_action = robot.drive_straight(distance=cozmo.util.distance_mm(distance_mm=10),speed=cozmo.util.speed_mmps(10), in_parallel=True)
												drive_straight_action.wait_for_completed()
												robot.set_head_angle(degrees(0.5)).wait_for_completed()
												print("Robot's head angle: ",robot.head_angle)
												latest_img = None
												while latest_img is None:
													latest_img = robot.world.latest_image
												target_frame2 = latest_img.raw_image
												target_frame2 = target_frame2.resize((640,480), Image.ANTIALIAS)
												#target_frame2 = target_frame2.convert('L')
												target_frame2 = np.asarray(target_frame2)
												#orb2 = cv2.ORB_create(500)
												#kp2 = orb2.detect(target_frame2,None)
												#kp2, des2 = orb2.compute(target_frame2, kp2)
												#features_img2 = cv2.drawKeypoints(target_frame2, kp2, None, color=(255,0,0), flags=0)
												#plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",features_img2)
												plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",target_frame2)
												target_frame_count = target_frame_count + 1
												'''
												matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
												matches = matcher.match(des1, des2, None)
												
												matches.sort(key=lambda x: x.distance, reverse=False)
												matches = matches[:10]
												imMatches = cv2.drawMatches(target_frame1, kp1, target_frame2, kp2, matches, None)
												cv2.imwrite("matches_tf1_tf2.jpg", imMatches)
												
												points1 = np.zeros((len(matches), 2), dtype=np.float32)
												points2 = np.zeros((len(matches), 2), dtype=np.float32)

												for i, match in enumerate(matches):
													points1[i, :] = kp1[match.queryIdx].pt
													points2[i, :] = kp2[match.trainIdx].pt
													print("Points1 [{}]: {}".format(i,points1[i][0]), points1[i][1],"\tPoints2: ",points2[i][0], points2[i][1]) 
												index = None
												dist1_x = []
												dist2_x = []
												for index in range(len(points1)):
													dist1_x.append((W/2.)-points1[index][0]) # Extract only the x-coordinate
													dist2_x.append((W/2.)-points2[index][0]) # Extract only the x-coordinate
																							
												fw_x = 1./((1./np.array(dist2_x)) - (1./np.array(dist1_x))) # Calculate the image plane to obj plane mapping in x direction
												
												pt1_x = []
												pt2_x = []
												for index in range(len(points1)):
													pt1_x.append(fw_x[index]/(W/2. - points1[index][0])) 
													pt2_x.append(fw_x[index]/(W/2. - points2[index][0]))
													print("Approx. distance[{}]: {}".format(index, pt1_x[index]))
												if len(pt2_x) < 10:
													break
												'''
											sys.exit(0)
											
					else:				   # Detected Dog
						dog_counter += 1
						if dog_counter >= 6:  # Transition to Dog only if Dog appeared for more than 6 times
							cat_counter = 0
							current_state = 2
				# Current State is Dog
				elif current_state == 2:
					print('\t\t\t\t\t\t\t\t\t\t\t\tDog')
					if next_state == 0:	 # Detected Background
						background_counter += 1
						if background_counter >= 6:  # Transition to Background only if Background appeared for more than 6 times
							background_counter = 0
							current_state = 0
							dog_counter = 0 
					elif next_state == 2:   # Detected Dog itself
						dog_counter +=1
						if dog_counter >= 30:
							print('Cozmo sees a Dog')
							robot.drive_wheels(-50, -50)
							time.sleep(3)
							robot.drive_wheels(70, -70)
							time.sleep(2.8)  
							robot.drive_wheels(0, 0)						
							break 
					else:				   # Detected Cat
						cat_counter += 1
						if cat_counter >= 6:  # Transition to Cat only if Cat appeared for more than 6 times
							dog_counter = 0
							current_state = 1			
def main():

    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    print("final_score_sz is:%d" % (final_score_sz))
    gt, frame_name_list, frame_sz, n_frames = _init_video(
        env, evaluation, videos_path)
    num_frames = np.size(frame_name_list)
    scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2),
                                               np.ceil(hp.scale_num / 2),
                                               hp.scale_num)
    # cosine window to penalize large displacements
    hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
    penalty = np.transpose(hann_1d) * hann_1d
    penalty = penalty / np.sum(penalty)

    pos_x, pos_y, target_w, target_h = region_to_bbox(gt[start_frame])

    context = design.context * (target_w + target_h)
    z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context)))
    x_sz = float(design.search_sz) / design.exemplar_sz * z_sz
    scaled_exemplar = z_sz * scale_factors
    scaled_search_area = x_sz * scale_factors
    scaled_target_w = target_w * scale_factors
    scaled_target_h = target_h * scale_factors

    # thresholds to saturate patches shrinking/growing
    min_z = hp.scale_min * z_sz
    max_z = hp.scale_max * z_sz
    min_x = hp.scale_min * x_sz
    max_x = hp.scale_max * x_sz

    #search size
    x_sz0_ph = scaled_search_area[0]
    x_sz1_ph = scaled_search_area[1]
    x_sz2_ph = scaled_search_area[2]
    image = Image.open(frame_name_list[0])
    image.show()
    image = np.array(image)

    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = np.mean(image, axis=(0, 1))
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame_numpy(image, frame_sz, pos_y, pos_x,
                                             z_sz, avg_chan)
    # extract tensor of z_crops
    # print  type(design.exemplar_sz)
    z_crops = extract_crops_z_numpy(frame_padded_z, npad_z, pos_y, pos_x, z_sz,
                                    design.exemplar_sz)
    print 'the shape of the img z_crops is :' + ' ' + str(np.shape(z_crops))
    z_crops = np.squeeze(z_crops)
    img = Image.fromarray(z_crops.astype('uint8'), 'RGB')
    img.show()
    frame_padded_x, npad_x = pad_frame_numpy(image, frame_sz, pos_y, pos_x,
                                             x_sz2_ph, avg_chan)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x_numpy(frame_padded_x, npad_x, pos_y, pos_x,
                                    x_sz0_ph, x_sz1_ph, x_sz2_ph,
                                    design.search_sz)
    print 'the shape of the img x_crops is :' + ' ' + str(np.shape(x_crops))
    x_crops_1 = np.squeeze(x_crops[0, :, :])
    img_1 = Image.fromarray(x_crops_1.astype('uint8'), 'RGB')
    img_1.show()
    x_crops_2 = np.squeeze(x_crops[1, :, :])
    img_2 = Image.fromarray(x_crops_2.astype('uint8'), 'RGB')
    img_2.show()
    x_crops_3 = np.squeeze(x_crops[2, :, :])
    img_3 = Image.fromarray(x_crops_3.astype('uint8'), 'RGB')
    img_3.show()
Esempio n. 25
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph(
        final_score_sz, design, env)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args_dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    # iterate through all videos of evaluation.dataset
    videos_list = list(dataset.videos.keys())
    videos_list.sort()
    nv = np.size(videos_list)
    for i in range(nv):
        current_key = sorted(list(dataset.videos.keys()))[i]
        gt, frame_name_list, frame_sz, n_frames = _init_video(
            dataset, current_key)
        for j in range(1):
            start_frame = 0
            gt_ = gt[start_frame:, :]
            frame_name_list_ = frame_name_list[start_frame:]
            pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
            bboxes, _ = tracker(videos_list[i], hp, run, design,
                                frame_name_list_, pos_x, pos_y, target_w,
                                target_h, final_score_sz, filename, image,
                                templates_z, templates_x, scores,
                                scores_original, start_frame)

            #Visualize
            if visualize:
                for bbox, groundt, frame_name in zip(bboxes, gt_,
                                                     frame_name_list_):
                    image = cv2.imread(frame_name)
                    bbox_pt1, bbox_pt2 = get_bbox_cv(bbox)
                    bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt)

                    #Draw result
                    cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0))
                    #Draw ground truth
                    cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0))
                    cv2.imshow("Results:", image)
                    cv2.waitKey()

            bboxes = bboxes.tolist()
            bboxes[0] = [1]
            target_dir = os.path.join(result_output, current_key)
            if not os.path.exists(target_dir):
                os.mkdir(target_dir)
            results_file = current_key + "_" + "{:03d}".format(1) + ".txt"
            results_abs_file = os.path.join(target_dir, results_file)
            with open(results_abs_file, "w") as f:
                for bbox in bboxes:
                    if len(bbox) == 1:
                        f.write('%d\n' % (bbox[0]))
                    else:
                        f.write('%.2f, %.2f, %.2f, %.2f\n' %
                                (bbox[0], bbox[1], bbox[2], bbox[3]))