Example #1
0
    def __init__(self):
        super(SiamMCF, self).__init__("SiamMCF")
        root_dir = path_config.SIAMMCF_ROOT_DIR
        self.hp, self.evaluation, self.env, self.design = parse_arguments(root_dir)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1
        # build TF graph once for all
        (
            self.filename,
            self.image,
            self.templates_x,
            self.templates_z,
            self.scores_list,
        ) = siam.build_tracking_graph(
            root_dir, self.final_score_sz, self.design, self.env, self.hp
        )
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        tf.global_variables_initializer().run(session=self.sess)
        vars_to_load = []
        for v in tf.global_variables():
            if "postnorm" not in v.name:
                vars_to_load.append(v)

        siam_ckpt_name = path_config.SIAMMCF_MODEL
        siam_saver = tf.train.Saver(vars_to_load)
        siam_saver.restore(self.sess, siam_ckpt_name)
Example #2
0
def main():
    # Avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    # --- Parse arguments from JSON file ---
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # --- Start Streaming from Video ---
    cap = cv2.VideoCapture(env.root_sequences + '/' + sys.argv[1] + '.mp4')
    ret, frame = cap.read()
    if (not ret):
        print "Error opening video sequence"

    # --- Save Video (Optional) ---
    if run.save_video:
        vid_write = cv2.VideoWriter(
            env.root_sequences + '/' + sys.argv[1] + '_out.avi',
            cv2.VideoWriter_fourcc(*'MJPG'), 25,
            (frame.shape[1], frame.shape[0]), True)

    # --- Define Initial Bounding Box ---
    BB = click_and_crop(frame, design.window_name)

    cv2.namedWindow(design.window_name)
    cv2.startWindowThread()
    cv2.setMouseCallback(design.window_name, BB.callback)

    cv2.imshow(design.window_name, frame)
    cv2.waitKey(0)

    while True:
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # ----- Define Initial Bounding Box Params & Template -----
    pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2)  # Template Center
    pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2)  # Template Center
    target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0]))  # Template Width / 2
    target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1]))  # Template Height / 2

    # ----- Beging Tracking -----
    tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz,
            templates_z, scores, cap, vid_write, frame)

    cap.release()
    cv2.destroyAllWindows()

    if run.save_video:
        vid_write.release()
Example #3
0
    def InitSiamNetwork(self):
        #Initialize the network and load the weights
        # self.graph_siam = tf.Graph()
        # with self.graph_siam.as_default():
        filename, image, templates_z, \
        templates_x, scores, scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env)
        siam_params = {
            "filename": filename,
            "image": image,
            "templates_z": templates_z,
            "templates_x": templates_x,
            "scores": scores,
            "scores_original": scores_original
        }

        self.graph_siam = tf.get_default_graph()
        return siam_params
def main(process, queue, box, video):
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)
    
    # read radio
    # width = 640
    # height = 480
    # process1 = (
    #     ffmpeg
    #     .input('tcp://192.168.1.155:8300',vcodec='h264',r = 24,probesize=32,fflags="nobuffer",flags="low_delay",analyzeduration=1)
    #     .output('pipe:', format='rawvideo',pix_fmt="rgb24")
    #     .run_async(pipe_stdout=True)
    # )
    ## model 
    # model_path = './frozen_inference_graph.pb'
    # odapi = DetectorAPI(path_to_ckpt=model_path)
    # while True :
    # in_bytes = process1.stdout.read(width * height * 3)
    # if not in_bytes :
    #     print ("none")
    # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3]))
    # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)

    # read target from mat
    # box = odapi.processFrame(video)
    box[2] -= box[0]
    box[3] -= box[1]
    box[0] += box[2]/2
    box[1] += box[3]/2
    print ('box', box)
    pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3]
    tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h, final_score_sz,
                            image, templates_z, scores, process, queue)
    print ('done')
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i])
            starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y,
                                                                     target_w, target_h, final_score_sz, filename,
                                                                     image, templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold)
                print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --')
        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --')
        print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --')

    else:
        images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) 
      
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz,
                                filename, image, templates_z, scores, evaluation.start_frame)        
                            
        num_frames = np.size(frame_name_list)
        bboxes_final = np.zeros((num_frames,4))

        lk_params = dict( winSize  = (5,5),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03))

        bboxes_final = np.zeros((num_frames, 4))
        for i in range(1,len(images_arr)-1):
            # Create some random colors
            color = np.random.randint(0,255,(100,3))

            # Take first frame and find corners in it
            #ret, old_frame = cap.read()
            frame = images_arr[i+1]
            old_frame = images_arr[i]
            old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
            p0 = np.zeros((1,1,2), dtype=np.float32)
            bbox_i = bboxes[i]
            c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3])
            p0[0,0,0] = c
            p0[0,0,1] = r
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
            good_new = p1[st==1]
            bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h
            good_old = p0[st==1]   
            for i,(new,old) in enumerate(zip(good_new,good_old)):
                a,b = new.ravel()
                c,d = old.ravel()
            fig = plt.figure(1)
            ax = fig.add_subplot(111)
            r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") 
            r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False)
            ax.imshow(np.uint8(frame))
            ax.add_patch(r2)
            ax.add_patch(r1)
            plt.ion()
            plt.show()
            plt.pause(0.001)
            plt.clf()
            old_gray = frame_gray.copy()
            p0 = good_new.reshape(-1,1,2)
        _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold)
        print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from
    # parameters/ directory.
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #7
0
    def __init__(self, image_path, region):
        #Parse the arguments
        self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments(
            mode='siamese')

        #Get first frame image and ground-truth
        self.region = region
        self.pos_x = region.x + region.width / 2
        self.pos_y = region.y + region.height / 2
        self.target_w = region.width
        self.target_h = region.height
        self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h

        #Calculate the size of final score (upscaled size of score matrix, where score matrix
        # is convolution of results of two branches of siamese network)
        self.final_score_sz = self.hp.response_up * (self.design.score_sz -
                                                     1) + 1

        #Initialize the network and load the weights
        self.filename, self.image, self.templates_z, \
        self.templates_x, self.scores, self.scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env)

        #Calculate the scale factors
        self.scale_factors = self.hp.scale_step**np.linspace(
            -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2),
            self.hp.scale_num)

        # cosine window to penalize large displacements
        hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0)
        penalty = np.transpose(hann_1d) * hann_1d
        self.penalty = penalty / np.sum(penalty)

        #Calculate search and target patch sizes
        context = self.design.context * (self.target_w + self.target_h)
        self.z_sz = np.sqrt(
            np.prod((self.target_w + context) * (self.target_h + context)))
        self.x_sz = float(
            self.design.search_sz) / self.design.exemplar_sz * self.z_sz

        #Create a tensorflow session
        config = tf.ConfigProto()
        config.gpu_options.visible_device_list = "1"
        config.gpu_options.per_process_gpu_memory_fraction = 0.9
        self.sess = tf.Session(config=config)
        with self.sess.as_default():
            tf.global_variables_initializer().run()
            # Coordinate the loading of image files.
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(coord=self.coord)

            self.run_opts = {}

            #Calculate the template for the given region
            image_, self.templates_z_ = self.sess.run(
                [self.image, self.templates_z],
                feed_dict={
                    siam.pos_x_ph: self.pos_x,
                    siam.pos_y_ph: self.pos_y,
                    siam.z_sz_ph: self.z_sz,
                    self.filename: image_path
                })

        return
Example #8
0
def main():
    # Avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    # --- Parse arguments from JSON file ---
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # --- Start Streaming from Live Video ---
    stream_path = "/home/hugogermain/stream.flv"
    cap = cv2.VideoCapture(stream_path)
    start_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)  # Start at last frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 15)

    ret, frame = cap.read()
    if (not ret):
        print "Error opening video sequence"

    # --- Save Video (Optional) ---
    vid_write = cv2.VideoWriter(env.root_sequences + '/stream_out.avi',
                                cv2.VideoWriter_fourcc(*'MJPG'), 25,
                                (frame.shape[1], frame.shape[0]), True)

    # --- Initialize projection maps ---
    e2s = equirect2stereograph(-2.5, frame, 0, 0)

    # ===================================
    # --- Define Initial Bounding Box ---
    # ===================================

    BB = click_and_crop(e2s.project(frame), design.window_name)
    cv2.namedWindow(design.window_name)
    cv2.startWindowThread()
    cv2.setMouseCallback(design.window_name, BB.callback)
    cv2.imshow(design.window_name, e2s.project(frame))
    cv2.waitKey(1)

    while True:
        ret, frame = cap.read()
        cv2.waitKey(1)

        if ret:
            # --- Equirectangular to Stereographic Projection ---
            BB.img = e2s.project(frame)
            BB.refresh()
            # --- Reset to last frame to avoid cumulative lagging ---
            cap.release()
            cap = cv2.VideoCapture(stream_path)
            start_frame = cap.get(
                cv2.CAP_PROP_FRAME_COUNT)  # Start at last frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10)
            cv2.waitKey(1)

            start_frame += 1

        else:
            # --- Reached end of file, wait for new frames ---
            cap.release()
            cap = cv2.VideoCapture(stream_path)
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10)
            cv2.waitKey(1)

        # ---- Rotate Camera Viewpoint ---
        k = cv2.waitKey(33)
        if k == 119:  # w
            e2s.set_lat(e2s.lat + 10)
        if k == 115:  # s
            e2s.set_lat(e2s.lat - 10)
        if k == 100:  # d
            e2s.set_roll(e2s.roll + 10)
        if k == 97:  # a
            e2s.set_roll(e2s.roll - 10)

        # ---- Selection is done ----
        if k == 113:  # q
            break
        if BB.ready:
            break

    print("[INFO]: Bounding Box Selection: Done")

    # ----- Define Initial Bounding Box Params & Template -----
    pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2)  # Template Center
    pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2)  # Template Center
    target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0]))  # Template Width / 2
    target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1]))  # Template Height / 2

    # ===========================
    # ----- Beging Tracking -----
    # ===========================

    live_tracker(hp, run, design, pos_x, pos_y, target_w, target_h,
                 final_score_sz, templates_z, scores, cap, vid_write, frame,
                 stream_path, e2s)
    cap.release()
    cv2.destroyAllWindows()

    if run.save_video:
        vid_write.release()
Example #9
0
def main(argv):
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    hp, evaluation, env, design = parse_arguments(root_dir)
    cmd_args = parse_command_line_arguments()

    if 'otb13' in cmd_args.dataset_name:
        dataset_type = 'otb13'
    elif 'otb15' in cmd_args.dataset_name:
        dataset_type = 'otb15'
    elif 'vot16' in cmd_args.dataset_name:
        dataset_type = 'vot16'
    elif 'vot17' in cmd_args.dataset_name:
        dataset_type = 'vot17'

    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_x, templates_z, scores_list =\
        siam.build_tracking_graph(
            root_dir, final_score_sz, design, env, hp)

    # iterate through all videos of dataset_name
    videos_folder = os.path.join(root_dir, env.root_dataset,
                                 cmd_args.dataset_name)
    videos_list = [
        v for v in os.listdir(videos_folder)
        if os.path.isdir(os.path.join(videos_folder, v))
    ]
    videos_list.sort()
    nv = np.size(videos_list)
    speed = np.zeros(nv * evaluation.n_subseq)
    precisions = np.zeros(nv * evaluation.n_subseq)
    precisions_auc = np.zeros(nv * evaluation.n_subseq)
    ious = np.zeros(nv * evaluation.n_subseq)
    lengths = np.zeros(nv * evaluation.n_subseq)
    successes = np.zeros(nv * evaluation.n_subseq)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()

        vars_to_load = []
        for v in tf.global_variables():
            if 'postnorm' not in v.name:
                vars_to_load.append(v)

        siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000'
        siam_saver = tf.train.Saver(vars_to_load)
        siam_saver.restore(sess, siam_ckpt_name)

        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                videos_list[i], videos_folder, dataset_type)
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = track_one_sequence(hp,
                                                        design,
                                                        frame_name_list_,
                                                        pos_x,
                                                        pos_y,
                                                        target_w,
                                                        target_h,
                                                        final_score_sz,
                                                        filename,
                                                        image,
                                                        templates_x,
                                                        templates_z,
                                                        scores_list,
                                                        videos_list[i],
                                                        dataset_type,
                                                        sess,
                                                        cmd_args.visualize,
                                                        cmd_args.save_images,
                                                        cmd_args.save_bboxes,
                                                        vot_handle=None,
                                                        gt=gt_)
                (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx],
                 successes[idx]) = _compile_results(gt_, bboxes,
                                                    evaluation.dist_threshold)
                print(
                    str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' +
                    "%.2f" % precisions[idx] + ' -- Precisions AUC: ' +
                    "%.2f" % precisions_auc[idx] + ' -- IOU: ' +
                    "%.2f" % ious[idx] + ' -- [email protected]: ' +
                    "%.2f" % successes[idx] + ' -- Speed: ' +
                    "%.2f" % speed[idx] + ' --')

    tot_frames = np.sum(lengths)
    mean_precision = np.sum(precisions * lengths) / tot_frames
    mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
    mean_iou = np.sum(ious * lengths) / tot_frames
    mean_speed = np.sum(speed * lengths) / tot_frames
    mean_success = np.sum(successes * lengths) / tot_frames
    print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' +
          str(tot_frames) + ' frames) --')
    print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' +
          '%.2f' % mean_precision + ' -- Precisions AUC: ' +
          "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou +
          ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' +
          "%.2f" % mean_speed + ' --')
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    #由于得到的sorcemap与原图像大小不一致,所以要在这里按比例进行放大以此来得到图像中每个位置对应的score
    #这里的final也就是design中的search_sz
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env, hp)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            if os.path.exists(os.path.join('data/result', videos_list[i])):
                continue
            gt, frame_name_list, frame_sz, n_frames, video_folder, equal = _init_video(
                env, evaluation, videos_list[i])
            if not equal:
                print('The .jpg and .xml is not equal in', video_folder)
                continue
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            # for j in range(evaluation.n_subseq):
            for j in range(1):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:]
                frame_name_list_ = frame_name_list[start_frame:]
                # pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                pos_x, pos_y, target_w, target_h = xml_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] =\
                    _compile_results(gt_, bboxes, evaluation.dist_threshold)

                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

                # Draw_Result(bboxes, frame_name_list_, gt_, env)

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

    else:
        gt, frame_name_list, _, _, video_folder, equal = _init_video(
            env, evaluation, evaluation.video)
        if not equal:
            print("The .jpg and .xml is not equal in", video_folder)
            exit(0)
        pos_x, pos_y, target_w, target_h = xml_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #11
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments(mode="conv2")
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * design.score_sz
    # build TF graph once for all
    image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)
    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        # videos_list = videos_list[91:][:] #only use vot 2016
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        success_auc = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i])
            gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB(
                env, evaluation, videos_list[i])

            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx], success_auc[idx] = _compile_results(
                        gt_, bboxes, evaluation.dist_threshold)

                print str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --'
                print

        tot_frames = np.mean(lengths)
        mean_precision = np.mean(precisions)
        mean_precision_auc = np.mean(precisions_auc)
        mean_iou = np.mean(ious)
        mean_success_auc = np.mean(success_auc)
        mean_speed = np.mean(speed)

        print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % (
            hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
            hp.window_influence)
        print '-- Overall stats (averaged per frame) on ' + str(
            nv) + ' videos (' + str(tot_frames) + ' frames) --'
        print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Success AUC: ' + "%.2f" % mean_success_auc +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --'
        print

        with open('log_test.txt', 'a+') as f:
            f.write(time.asctime(time.localtime(time.time())) + '\r\n')
            f.write(
                'data set ' + evaluation.dataset +
                ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n'
                % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr,
                   hp.window_influence))
            f.write('-- Overall stats (averaged per frame) on ' + str(nv) +
                    ' videos (' + str(tot_frames) + ' frames) --\r\n')
            f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \
                    ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \
                    ' -- IOU: ' + "%.2f" % mean_iou + \
                    ' -- AUC: ' + "%.3f" % mean_success_auc + \
                    ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n')
            f.write('\r\n')

    else:
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])
        bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y,
                                target_w, target_h, final_score_sz, filename,
                                image, templates_z, scores,
                                evaluation.start_frame)
        _, precision, precision_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        print evaluation.video + \
              ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
              ' -- Precision AUC: ' + "%.2f" % precision_auc + \
              ' -- IOU: ' + "%.2f" % iou + \
              ' -- Speed: ' + "%.2f" % speed + ' --'
        print
Example #12
0
def main():

    #Command Line Arguments
    parser = argparse.ArgumentParser(description="Run WIND Project")
    parser.add_argument('-c',
                        '--clear',
                        action='store_true',
                        default=False,
                        help='Delete old camera data')
    parser.add_argument('-ny',
                        '--noYolo',
                        action='store_true',
                        default=False,
                        help='Do not use Yolo')
    parser.add_argument('-ns',
                        '--noSiamfc',
                        action='store_true',
                        default=False,
                        help='Do not use Simafc')
    parser.add_argument('-nv',
                        '--noVideo',
                        action='store_true',
                        default=False,
                        help='Do not generate video')
    parser.add_argument('-dl',
                        '--dataLocation',
                        choices=['fromFile', 'fromCamera'],
                        help='Use live camera or folder of images')
    parser.add_argument('-cn',
                        '--cameraNumber',
                        default=0,
                        help='Camera number to use')
    parser.add_argument('-ff', '--filesFolder', default="")
    parser.add_argument('-rf',
                        '--refreshRate',
                        default=10,
                        help='Refresh rate for siamfc')
    parser.add_argument('-t',
                        '--timeRecording',
                        default=3,
                        help='Seconds to record from camera')

    #Assign command line arguments to global variables
    global cameraNumber
    global doYolo
    global doSiamfc
    global genVideos
    global refreshRate
    global liveFeed
    args = parser.parse_args()
    cameraNumber = int(args.cameraNumber)
    doYolo = not args.noYolo
    doSiamfc = not args.noSiamfc
    genVideos = not args.noVideo
    refreshRate = int(args.refreshRate)
    liveFeed = args.dataLocation == 'fromCamera'
    VideoLength = int(args.timeRecording)

    #Command line argument error checking
    #Clearing out old data in the cameradata folder
    if (args.clear == True):
        for oldData in os.listdir('CameraData'):
            filep = os.path.join('CameraData', oldData)
            if (os.path.isfile(filep)):
                os.remove(filep)
                print('Deleted: ' + filep)
            elif os.path.isdir(filep):
                for reallyOldData in os.listdir(filep):
                    newfilep = os.path.join(filep, reallyOldData)
                    if (os.path.isfile(newfilep)):
                        os.remove(newfilep)
                os.rmdir(filep)
                print('Deleted: ' + filep + '/')

        print('Done Deleting')

    #Checking if file folder is valid
    if (args.dataLocation == 'fromFile'):
        if (os.path.isdir(args.filesFolder)):
            dirList = [
                os.path.join(args.filesFolder, d)
                for d in os.listdir(args.filesFolder)
                if os.path.isdir(os.path.join(args.filesFolder, d))
            ]
            dirList.sort()
            testImage = Image.open(
                os.path.join(dirList[0],
                             os.listdir(dirList[0])[0]))
            testWidth, testHeight = testImage.size
        else:
            if (args.filesFolder == ""):
                print("No folder locatoin was given")
            else:
                print(args.filesFolder + " is not a valid file location")
            return
    #Check if the camera is valid
    else:
        try:
            cam = cv2.VideoCapture(cameraNumber)
            ret, testImage = cam.read()
            testHeight, testWidth = testImage.shape[:2]
            cam.release()
        except:
            print("Camera number given is not valid or connected")
            return

    # Initialize object detector
    database.net = load_net(b"YoloConfig/yolov3-tiny.cfg",
                            b"YoloConfig/yolov3-tiny.weights", 0)
    database.meta = load_meta(b"YoloConfig/coco.data")

    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1

    # build TF graph once for all
    filename, image, templates_z, scores, graph, scfg = siam.build_tracking_graph(
        final_score_sz, design, env)
    #sFCgraph = siamfcGraph(filename, image, templates_z, scores)

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    finalImages = []
    plt.xticks([]), plt.yticks([])
    YoloVid = cv2.VideoWriter('Yolov3Vid.avi', fourcc, 10,
                              (testWidth, testHeight))
    SiamfcVid = cv2.VideoWriter('SiamfcVid.avi', fourcc, 10,
                                (testWidth, testHeight))
    i = 0
    notDone = True

    now = datetime.datetime.now()
    if (args.dataLocation == 'fromCamera'):
        haha = threading.Thread(target=getImages,
                                args=('CameraData/', 10, VideoLength, database,
                                      now, graph, scfg))
        haha.start()
        haha.join()
        frame_name_list = _init_video('CameraData/%d_%d_%d/' %
                                      (now.hour, now.minute, now.second))
        while notDone and i < VideoLength and datetime.datetime.now(
        ) < now + datetime.timedelta(seconds=20 + VideoLength) and genVideos:
            try:
                if os.path.isdir(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i))):
                    showYoloResult(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i)), YoloVid,
                        False)
                    showSiamFCResult(
                        os.path.join(
                            'CameraData', '%d_%d_%d' %
                            (now.hour, now.minute, now.second + i)), SiamfcVid,
                        True)
                    i = i + 1
                else:
                    time.sleep(.2)
            except (KeyboardInterrupt, SystemExit):
                notDone = False
    else:
        for dirName in dirList:
            if (doYolo):
                runYolo(dirName, database, fourcc, testWidth, testHeight,
                        graph, scfg)
                if (genVideos):
                    showYoloResult(dirName, YoloVid, False)
            if (doSiamfc and dirName == dirList[0]):
                runSiamfc(dirName, fourcc, testWidth, testHeight, graph, scfg)
                if (genVideos):
                    showSiamFCResult(dirName, SiamfcVid, True)

    # haha.join()
    YoloVid.release()
    SiamfcVid.release()
    return
Example #13
0
def runSiamfc(folderPath, fourcc, testWidth, testHeight, graph, scfg):
    print('Running Siamfc: ' + folderPath)
    frame_name_list = _init_video(folderPath)
    hp, evaluation, run, env, design = parse_arguments()
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    fp = open(os.path.join(folderPath, 'YoloBoxes.txt'))
    filename, image, templates_z, scores, graph1, scfg1 = siam.build_tracking_graph(
        final_score_sz, design, env)
    finalImages = []
    Allbboxes = []
    SiamfcVid = cv2.VideoWriter(join(folderPath, 'SiamfcVid.avi'), fourcc, 10,
                                (testWidth, testHeight))
    f = open(folderPath + "/SiamfcBoxes.txt", "w+")
    nucAngles = open(os.path.join(folderPath, "nuclearAngles.txt"), "r")
    print(refreshRate)
    for i in range(len(frame_name_list)):
        line = fp.readline()
        finalImages = []
        if (line == '\n'):
            continue
        elif (i % refreshRate == 0):
            boxes = line[:-1].split(':')
            boxNr = 0
            for j in Allbboxes:
                #label = j.label
                #pos_x = j.positions[len(j.positions)-1][0]
                #pos_y = j.positions[len(j.positions)-1][1]
                #target_w = j.positions[len(j.positions)-1][2]
                #target_h = j.positions[len(j.positions)-1][3]
                #bboxes, speed, finalImages = tracker(graph1, scfg1, hp, run, design, frame_name_list[i:i+refreshRate-1], pos_x, pos_y, target_w, target_h, final_score_sz,
                #                                    filename, image, templates_z, scores, label,0,colors[boxNr%len(colors)],0,refreshRate-1, finalImages, 0)
                #j.positions = np.concatenate((j.positions,bboxes),0)
                j.padafter(refreshRate)
            for j in boxes:
                box = j.split(',')
                label = box[0]
                box = map(int, box[1:])
                print(
                    'In folder %s Image %d has a box at %d,%d,%d,%d with label %s'
                    % (folderPath, i, box[0], box[1], box[2], box[3], label))
                pos_x = box[0]
                pos_y = box[1]
                target_w = box[2]
                target_h = box[3]
                print('Pos_x: %d, Pos_y:%d, width:%d, height:%d' %
                      (pos_x, pos_y, target_w, target_h))
                bboxes, speed, finalImages = tracker(
                    graph1, scfg1, hp, run, design,
                    frame_name_list[i:i + refreshRate - 1], pos_x, pos_y,
                    target_w, target_h, final_score_sz, filename, image,
                    templates_z, scores, label, 0, colors[boxNr % len(colors)],
                    0, refreshRate - 1, finalImages, 0)
                newBox = sfc_bbox(colors[boxNr % len(colors)], label, bboxes,
                                  0)
                newBox.padfront(i)
                Allbboxes.append(newBox)
                boxNr = boxNr + 1
                print(bboxes)
            fname = i
            probs = [0] * len(Allbboxes)
            #print(Allbboxes)
            if (liveFeed):
                try:
                    oldFolderPath = folderPath.split('_')
                    oldFolderPath[2] = str(int(oldFolderPath[2]) - 1)
                    underscore = '_'
                    oldFolderPath = underscore.join(oldFolderPath)
                except:
                    print("Could not load old probabilies")
                    oldFolderPath = ""
            else:
                oldFolderPath = ""
            if (os.path.isfile(oldFolderPath + "/SiamfcBoxes.txt")):
                print('Extracting old probs')
                probs = getOldProbs(oldFolderPath, Allbboxes,
                                    [(item.split(','))[0] for item in boxes])
                print(probs)
            for j in range(len(finalImages)):
                #print(probs)
                angle = int(nucAngles.readline())
                calcProbs(finalImages[j], angle, Allbboxes, i + j, f)
                cv2.circle(finalImages[j],
                           (int(float(1.0 - angle / 180.0) * testWidth),
                            int(testHeight / 2)), 10, (0, 0, 225), -1)
                SiamfcVid.write(finalImages[j])
                cv2.imwrite(frame_name_list[j][0:-4] + '_siamfc.png',
                            finalImages[j])
                fname = fname + 1
            #break
    f.close()
    SiamfcVid.release()
    return
def main():
    #avoid printing TF debugging information
    #仅显示error log
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    #TODO:allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()

    #gt_,frame_name_list_,_,_=_init_video(env,evaluation,evaluation.video)
    #pos_x,pos_y,target_w,target_h=region_to_bbox(gt_[0])
    #print('---target_w---'+"%d"%target_w+'--target_h---'+"%d"%target_h)
    #why?????????????
    #Set size for use with tf.image.resize_images with align_corners=True
    #For example:
    # [1,4,7]=>[1 2 3 4 5 6 7]  (length 3*(3-1)+1)
    #instead of
    #[1,4,7]=>[1 1 2 3 4 5 6 7 7](length 3*3)
    #Why hp.response_up???
    #design.score_sz=33
    #hp.response_up=8
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    #build TF graph once for all
    #filename,image,templates_z,scores are only processes.!!!
    #真正返回信息需要用sess去执行(tracker中执行)
    #return filename, image, templates_z, scores_up
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    #iterate through all videos of evaluation dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        #os.listdir(path):返回指定路径下的文件和文件夹
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        #遍历不同的视频样本
        for i in range(nv):
            #frame_name_list:each image of a video sequence
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            #np.rint():对浮点数取整但不改变浮点数类型
            #n_subseq=3
            starts = np.rint(
                np.linspace(0, n_frame - 1, evaluation.n_subseq + 1))
            #分成n_subseq+1份,将数组赋给starts
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                #start_frame:指start_frame及以后(选取了n_subseq中的一份)
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                #Update
                bboxes, speed[idx] = tracker(
                    hp,
                    run,
                    design,
                    env,
                    evaluation,
                    frame_name_list_,
                    pos_x,
                    pos_y,
                    #bboxes,speed[idx]=tracker(hp,run,design,frame_name_list_,pos_x,pos_y,
                    target_w,
                    target_h,
                    final_score_sz,
                    filename,
                    image,
                    templates_z,
                    scores,
                    start_frame)
                #gt_:ground truth
                #bboxes:the result of tracking
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print(
                    str(i) + '--' + videos_list[i] + '--Precision: ' +
                    "%.2f" % precisions[idx] + '--Precisions AUC: ' +
                    "%.2f" % precisions_auc[idx] + '--IOU: ' +
                    "%.2f" % ious[idx] + '--Speed: ' + "%.2f" % speed[idx] +
                    '--')

    else:
        #evaluation.video='all'
        print(evaluation.video)
        gt, frame_name_list, _, _ = _init_video(env, evaluation,
                                                evaluation.video)
        #evaluation.start_frame=0
        pos_x, pos_y, target_w, target_h = region_to_bbox(
            gt[evaluation.start_frame])

        #Update
        #bboxes,speed=tracker(hp,run,design,frame_name_list,pos_x,pos_y,target_w,target_h,final_score_sz,
        bboxes, speed = tracker(hp, run, design, env, evaluation,
                                frame_name_list, pos_x, pos_y, target_w,
                                target_h, final_score_sz, filename, image,
                                templates_z, scores, evaluation.start_frame)
        _, precision, precisions_auc, iou = _compile_results(
            gt, bboxes, evaluation.dist_threshold)
        #print(evaluation.video+
        print(evaluation.video + '--Precision: ' +
              "(%d px)" % evaluation.dist_threshold + ': ' +
              "%.2f" % precision + '--Precisions AUC: ' +
              "%.2f" % precisions_auc + '--IOU: ' + "%.2f" % iou +
              '--Speed: ' + "%.2f" % speed + '--')
Example #15
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, scores = siam.build_tracking_graph(
        final_score_sz, design, env)

    # iterate through all videos of evaluation.dataset
    if evaluation.video == 'all':
        dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
        videos_list = [v for v in os.listdir(dataset_folder)]
        videos_list.sort()
        nv = np.size(videos_list)
        speed = np.zeros(nv * evaluation.n_subseq)
        precisions = np.zeros(nv * evaluation.n_subseq)
        precisions_auc = np.zeros(nv * evaluation.n_subseq)
        ious = np.zeros(nv * evaluation.n_subseq)
        lengths = np.zeros(nv * evaluation.n_subseq)
        for i in range(nv):
            gt, frame_name_list, frame_sz, n_frames = _init_video(
                env, evaluation, videos_list[i])
            starts = np.rint(
                np.linspace(0, n_frames - 1, evaluation.n_subseq + 1))
            starts = starts[0:evaluation.n_subseq]
            for j in range(evaluation.n_subseq):
                start_frame = int(starts[j])
                gt_ = gt[start_frame:, :]
                frame_name_list_ = frame_name_list[start_frame:]
                pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
                idx = i * evaluation.n_subseq + j
                # Call Tracker for the selected sequence
                print("Tracking started!")
                bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_,
                                             pos_x, pos_y, target_w, target_h,
                                             final_score_sz, filename, image,
                                             templates_z, scores, start_frame)
                lengths[idx], precisions[idx], precisions_auc[idx], ious[
                    idx] = _compile_results(gt_, bboxes,
                                            evaluation.dist_threshold)
                print( str(i) + ' -- ' + videos_list[i] + \
                ' -- Precision: ' + "%.2f" % precisions[idx] + \
                ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \
                ' -- IOU: ' + "%.2f" % ious[idx] + \
                ' -- Speed: ' + "%.2f" % speed[idx] + ' --' )

        tot_frames = np.sum(lengths)
        mean_precision = np.sum(precisions * lengths) / tot_frames
        mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames
        mean_iou = np.sum(ious * lengths) / tot_frames
        mean_speed = np.sum(speed * lengths) / tot_frames
        print('-- Overall stats (averaged per frame) on ' + str(nv) +
              ' videos (' + str(tot_frames) + ' frames) --')
        print( ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\
              ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\
              ' -- IOU: ' + "%.2f" % mean_iou +\
              ' -- Speed: ' + "%.2f" % mean_speed + ' --' )

    else:
        gt, frame_name_list, _, n_frames = _init_video(env, evaluation,
                                                       evaluation.video)
        #pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])

        # np.size(frame_name_list) = Amount of frames
        # ott = amount of Objects To Track
        ott = len(gt) if evaluation.multi_object else 1
        objects = np.zeros((ott, 4))
        for i in range(ott):
            objects[i, :] = region_to_bbox(gt[i])

        # Call Tracker for the selected sequence.
        print("Tracking started!")
        bboxes, speed = tracker(hp, run, design, frame_name_list, objects,
                                final_score_sz, filename, image, templates_z,
                                scores, evaluation.start_frame)

        if evaluation.multi_object:
            print('No Ground Truth available for multi object, just printing speed result....\n' + \
                  evaluation.video + \
                  ' -- Speed: ' + "%.2f" % speed + ' --' )
        else:
            _, precision, precision_auc, iou = _compile_results(
                gt, bboxes, evaluation.dist_threshold)
            print( evaluation.video + \
                  ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\
                  ' -- Precision AUC: ' + "%.2f" % precision_auc + \
                  ' -- IOU: ' + "%.2f" % iou + \
                  ' -- Speed: ' + "%.2f" % speed + ' --' )
    print("Tracking finished!")
Example #16
0
def main():
    # avoid printing TF debugging information
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # TODO: allow parameters from command line or leave everything in json files?
    hp, evaluation, run, env, design = parse_arguments()
    # Set size for use with tf.image.resize_images with align_corners=True.
    # For example,
    #   [1 4 7] =>   [1 2 3 4 5 6 7]    (length 3*(3-1)+1)
    # instead of
    # [1 4 7] => [1 1 2 3 4 5 6 7 7]  (length 3*3)
    final_score_sz = hp.response_up * (design.score_sz - 1) + 1
    # build TF graph once for all
    filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph(
        final_score_sz, design, env)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args_dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    # iterate through all videos of evaluation.dataset
    videos_list = list(dataset.videos.keys())
    videos_list.sort()
    nv = np.size(videos_list)
    for i in range(nv):
        current_key = sorted(list(dataset.videos.keys()))[i]
        gt, frame_name_list, frame_sz, n_frames = _init_video(
            dataset, current_key)
        for j in range(1):
            start_frame = 0
            gt_ = gt[start_frame:, :]
            frame_name_list_ = frame_name_list[start_frame:]
            pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0])
            bboxes, _ = tracker(videos_list[i], hp, run, design,
                                frame_name_list_, pos_x, pos_y, target_w,
                                target_h, final_score_sz, filename, image,
                                templates_z, templates_x, scores,
                                scores_original, start_frame)

            #Visualize
            if visualize:
                for bbox, groundt, frame_name in zip(bboxes, gt_,
                                                     frame_name_list_):
                    image = cv2.imread(frame_name)
                    bbox_pt1, bbox_pt2 = get_bbox_cv(bbox)
                    bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt)

                    #Draw result
                    cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0))
                    #Draw ground truth
                    cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0))
                    cv2.imshow("Results:", image)
                    cv2.waitKey()

            bboxes = bboxes.tolist()
            bboxes[0] = [1]
            target_dir = os.path.join(result_output, current_key)
            if not os.path.exists(target_dir):
                os.mkdir(target_dir)
            results_file = current_key + "_" + "{:03d}".format(1) + ".txt"
            results_abs_file = os.path.join(target_dir, results_file)
            with open(results_abs_file, "w") as f:
                for bbox in bboxes:
                    if len(bbox) == 1:
                        f.write('%d\n' % (bbox[0]))
                    else:
                        f.write('%.2f, %.2f, %.2f, %.2f\n' %
                                (bbox[0], bbox[1], bbox[2], bbox[3]))