def test_local_video():
    main_folder_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..'))
    obj_detection_graph = os.path.join(
        main_folder_path,
        'object_detection/weights/batched_zoo/faster_rcnn_nas_coco_2018_01_28/batched_graph/frozen_inference_graph.pb'
    )

    print("Loading object detection model at %s" % obj_detection_graph)

    Obj_Detector = obj.Object_Detector(obj_detection_graph)

    test_vid_path = "chase1Person1View3Point0.mp4"
    print('Testing on %s' % test_vid_path)

    reader = imageio.get_reader(test_vid_path, 'ffmpeg')
    fps = reader.get_meta_data()['fps'] // 2

    out_vid_path = "chase1Person1View3Point0_out.mp4"
    writer = imageio.get_writer(out_vid_path, fps=fps)
    print("Writing output video on %s" % out_vid_path)

    frame_cnt = 0
    for test_img in reader:
        frame_cnt += 1
        if frame_cnt % 2 == 0:
            continue
        print("frame_cnt: %i" % frame_cnt)
        expanded_img = np.expand_dims(test_img, axis=0)
        detection_list = Obj_Detector.detect_objects_in_np(expanded_img)
        out_img = visualize_results(test_img, detection_list, display=False)
        writer.append_data(out_img)

    writer.close()
def test_croping_tubes_local_video():
    main_folder_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..'))
    # obj_detection_graph =  os.path.join(main_folder_path, 'object_detection/weights/batched_zoo/faster_rcnn_nas_coco_2018_01_28/batched_graph/frozen_inference_graph.pb')
    obj_detection_graph = os.path.join(
        main_folder_path,
        'object_detection/weights/batched_zoo/faster_rcnn_nas_lowproposals_coco_2018_01_28/batched_graph/frozen_inference_graph.pb'
    )

    print("Loading object detection model at %s" % obj_detection_graph)

    Obj_Detector = obj.Object_Detector(obj_detection_graph)
    Tracker = obj.Tracker()

    test_vid_path = "chase1Person1View3Point0.mp4"
    print('Testing on %s' % test_vid_path)

    reader = imageio.get_reader(test_vid_path, 'ffmpeg')
    fps = reader.get_meta_data()['fps'] // 2

    # out_vid_path = "chase1Person1View3Point0_out.mp4"
    # writer = imageio.get_writer(out_vid_path, fps=fps)
    # print("Writing output video on %s" %out_vid_path)
    actors = [0, 1, 2]
    writers = []
    for ii in actors:
        writer = imageio.get_writer("person_%i.mp4" % ii, fps=fps)
        print("Video writer set for person_%i" % ii)
        writers.append(writer)

    frame_cnt = 0
    for test_img in reader:
        frame_cnt += 1
        if frame_cnt % 2 == 0:
            continue
        print("frame_cnt: %i" % frame_cnt)
        expanded_img = np.expand_dims(test_img, axis=0)
        detection_list = Obj_Detector.detect_objects_in_np(expanded_img)
        detection_info = [info[0] for info in detection_list]
        Tracker.update_tracker(detection_info, test_img)
        if frame_cnt > 30:
            print("writing segments")
            for actor_no, writer in zip(actors, writers):
                tube, roi = Tracker.crop_person_tube(actor_no)
                for ii in range(tube.shape[0]):
                    writer.append_data(np.uint8(tube[ii]))
                writer.close()
                roi = [float("%.4f" % coord) for coord in roi]
                with open('person_%i_roi.json' % actor_no, 'w') as fp:
                    json.dump(roi, fp)
                print("Actor %i video and roi written" % actor_no)
            break
def run_obj_det_and_track_in_batches(frame_q, detection_q, det_vis_q,
                                     obj_batch_size, obj_gpu):
    import tensorflow as tf  # there is a bug. if you dont import tensorflow within the process you cant use the same gpus for both processes.
    #os.environ['CUDA_VISIBLE_DEVICES'] = obj_gpu
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    main_folder = "./"

    obj_detection_graph = "./object_detection/weights/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb"
    #obj_detection_graph = "./object_detection/weights/faster_rcnn_resnet101_coco_2018_01_28/frozen_inference_graph.pb"

    print("Loading object detection model at %s" % obj_detection_graph)

    obj_detector = obj.Object_Detector(obj_detection_graph)
    tracker = obj.Tracker(timesteps=T)
    while True:
        img_batch = []
        for _ in range(obj_batch_size):
            cur_img = frame_q.get()
            img_batch.append(cur_img)
        #expanded_img = np.expand_dims(cur_img, axis=0)
        expanded_img = np.stack(img_batch, axis=0)
        start_time = time.time()
        detection_list = obj_detector.detect_objects_in_np(expanded_img)
        end_time = time.time()
        print("%.3f second per image" %
              ((end_time - start_time) / float(obj_batch_size)))
        for ii in range(obj_batch_size):
            cur_img = img_batch[ii]
            detection_info = [info[ii] for info in detection_list]
            tracker.update_tracker(detection_info, cur_img)
            rois_np, temporal_rois_np = tracker.generate_all_rois()
            actors_snapshot = []
            for cur_actor in tracker.active_actors:
                act_id = cur_actor['actor_id']
                act_box = cur_actor['all_boxes'][-1][:]
                act_score = cur_actor['all_scores'][-1]
                actors_snapshot.append({
                    'actor_id': act_id,
                    'all_boxes': [act_box],
                    'all_scores': [act_score]
                })
            #print(len(actors_snapshot))
            #if actors_snapshot:
            #    detection_q.put([cur_img, actors_snapshot, rois_np, temporal_rois_np])
            #    det_vis_q.put([cur_img, actors_snapshot])
            detection_q.put(
                [cur_img, actors_snapshot, rois_np, temporal_rois_np])
            det_vis_q.put([cur_img, actors_snapshot])
def test_tracking_local_video():
    main_folder_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..'))
    # obj_detection_graph =  os.path.join(main_folder_path, 'object_detection/weights/batched_zoo/faster_rcnn_nas_coco_2018_01_28/batched_graph/frozen_inference_graph.pb')
    obj_detection_graph = os.path.join(
        main_folder_path,
        'object_detection/weights/batched_zoo/faster_rcnn_nas_lowproposals_coco_2018_01_28/batched_graph/frozen_inference_graph.pb'
    )

    print("Loading object detection model at %s" % obj_detection_graph)

    Obj_Detector = obj.Object_Detector(obj_detection_graph)
    Tracker = obj.Tracker()

    #test_vid_path = "chase1Person1View3Point0.mp4"
    #test_vid_path = "VIRAT_S_000003_9_00.mp4"
    test_vid_path = "VIRAT_S_000101_1_00.mp4"
    print('Testing on %s' % test_vid_path)

    reader = imageio.get_reader(test_vid_path, 'ffmpeg')
    fps = reader.get_meta_data()['fps'] // 2

    #out_vid_path = "chase1Person1View3Point0_out.mp4"
    #out_vid_path = "VIRAT_S_000003_9_00_out.mp4"
    out_vid_path = "VIRAT_S_000101_1_00_out.mp4"
    writer = imageio.get_writer(out_vid_path, fps=fps)
    print("Writing output video on %s" % out_vid_path)

    frame_cnt = 0
    for test_img in reader:
        frame_cnt += 1
        if frame_cnt % 2 == 0:
            continue
        print("frame_cnt: %i" % frame_cnt)
        expanded_img = np.expand_dims(test_img, axis=0)
        detection_list = Obj_Detector.detect_objects_in_np(expanded_img)
        detection_info = [info[0] for info in detection_list]
        Tracker.update_tracker(detection_info, test_img)
        #print(Tracker.active_actors)
        out_img = visualize_results_from_tracking(test_img,
                                                  Tracker.active_actors,
                                                  Tracker.inactive_actors,
                                                  display=False)
        writer.append_data(out_img)

    writer.close()
    def __init__(self):
        obj_detection_graph = os.path.join("object_detection", "weights",
                                           OBJ_DETECTION_MODEL,
                                           "frozen_inference_graph.pb")
        self.obj_detector = obj.Object_Detector(obj_detection_graph)

        self.act_detector = act.Action_Detector('soft_attn',
                                                timesteps=NUM_INPUT_FRAMES)
        crop_in_tubes = self.act_detector.crop_tubes_in_tf(
            [NUM_INPUT_FRAMES, HEIGHT, WIDTH, 3])
        (self.input_frames, self.temporal_rois,
         self.temporal_roi_batch_indices, self.cropped_frames) = crop_in_tubes
        self.rois, self.roi_batch_indices, self.pred_probs = (
            self.act_detector.define_inference_with_placeholders_noinput(
                self.cropped_frames))

        ckpt_path = os.path.join(MAIN_FOLDER, 'action_detection', 'weights',
                                 CKPT_NAME)
        self.act_detector.restore_model(ckpt_path)
def test_local_image():

    main_folder_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..'))
    obj_detection_graph = os.path.join(
        main_folder_path,
        'object_detection/weights/batched_zoo/faster_rcnn_nas_coco_2018_01_28/batched_graph/frozen_inference_graph.pb'
    )

    print("Loading object detection model at %s" % obj_detection_graph)

    Obj_Detector = obj.Object_Detector(obj_detection_graph)

    test_img_path = 'chase.png'
    print('Testing on %s' % test_img_path)
    test_img = cv2.imread(test_img_path)
    expanded_img = np.expand_dims(test_img, axis=0)
    detection_list = Obj_Detector.detect_objects_in_np(expanded_img)
    out_img = visualize_results(test_img, detection_list, display=False)
    #import pdb;pdb.set_trace()
    out_img_path = 'chase_out.jpg'
    cv2.imwrite(out_img_path, out_img)
    print("Output image %s written!" % out_img_path)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-v',
                        '--video_path',
                        type=str,
                        required=False,
                        default="")
    parser.add_argument('-d',
                        '--display',
                        type=str,
                        required=False,
                        default="True")

    args = parser.parse_args()
    display = (args.display == "True" or args.display == "true")

    #actor_to_display = 6 # for cams

    video_path = args.video_path
    basename = os.path.basename(video_path).split('.')[0]
    out_vid_path = "./output_videos/%s_output.mp4" % (
        basename if not SHOW_CAMS else basename +
        '_cams_actor_%.2d' % actor_to_display)
    #out_vid_path = './output_videos/testing.mp4'

    # video_path = "./tests/chase1Person1View3Point0.mp4"
    # out_vid_path = 'output.mp4'

    main_folder = './'

    # NAS

    obj_detection_model = 'ssd_mobilenet_v2_coco_2018_03_29'
    obj_detection_graph = os.path.join("object_detection", "weights",
                                       obj_detection_model,
                                       "frozen_inference_graph.pb")

    print("Loading object detection model at %s" % obj_detection_graph)

    obj_detector = obj.Object_Detector(obj_detection_graph)
    tracker = obj.Tracker()

    print("Reading video file %s" % video_path)
    reader = imageio.get_reader(video_path, 'ffmpeg')
    action_freq = 8
    # fps_divider = 1
    print('Running actions every %i frame' % action_freq)
    fps = reader.get_meta_data()['fps']  #// fps_divider
    W, H = reader.get_meta_data()['size']
    T = tracker.timesteps
    if not display:
        writer = imageio.get_writer(out_vid_path, fps=fps)
        print("Writing output to %s" % out_vid_path)

    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn')
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'

    #input_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf([T,H,W,3])
    memory_size = act_detector.timesteps - action_freq
    updated_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf_with_memory(
        [T, H, W, 3], memory_size)

    rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders_noinput(
        cropped_frames)

    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights',
                             ckpt_name)
    act_detector.restore_model(ckpt_path)

    prob_dict = {}
    frame_cnt = 0
    for cur_img in reader:
        frame_cnt += 1
        #tracker.add_frame(cur_img)
        print("frame_cnt: %i" % frame_cnt)
        # Object Detection
        expanded_img = np.expand_dims(cur_img, axis=0)
        #expanded_img = np.tile(expanded_img, [10,1,1,1]) # test the speed
        t1 = time.time()
        detection_list = obj_detector.detect_objects_in_np(expanded_img)
        detection_info = [info[0] for info in detection_list]
        t2 = time.time()
        print('obj det %.2f seconds' % (t2 - t1))
        tracker.update_tracker(detection_info, cur_img)
        t3 = time.time()
        print('tracker %.2f seconds' % (t3 - t2))
        no_actors = len(tracker.active_actors)

        if tracker.active_actors and frame_cnt % action_freq == 0:
            probs = []

            cur_input_sequence = np.expand_dims(np.stack(
                tracker.frame_history[-action_freq:], axis=0),
                                                axis=0)

            rois_np, temporal_rois_np = tracker.generate_all_rois()
            if no_actors > 14:
                no_actors = 14
                rois_np = rois_np[:14]
                temporal_rois_np = temporal_rois_np[:14]

            #feed_dict = {input_frames:cur_input_sequence,
            feed_dict = {
                updated_frames:
                cur_input_sequence,  # only update last #action_freq frames
                temporal_rois: temporal_rois_np,
                temporal_roi_batch_indices: np.zeros(no_actors),
                rois: rois_np,
                roi_batch_indices: np.arange(no_actors)
            }
            run_dict = {'pred_probs': pred_probs}
            if SHOW_CAMS:
                run_dict['cropped_frames'] = cropped_frames
                #import pdb;pdb.set_trace()
                run_dict[
                    'final_i3d_feats'] = act_detector.act_graph.get_collection(
                        'final_i3d_feats')[0]
                #run_dict['cls_weights'] = [var for var in tf.global_variables() if var.name == "CLS_Logits/kernel:0"][0]
                run_dict[
                    'cls_weights'] = act_detector.act_graph.get_collection(
                        'variables')[-2]  # this is the kernel
            #import pdb;pdb.set_trace()
            out_dict = act_detector.session.run(run_dict, feed_dict=feed_dict)
            probs = out_dict['pred_probs']
            # associate probs with actor ids
            print_top_k = 5
            for bb in range(no_actors):
                act_probs = probs[bb]
                order = np.argsort(act_probs)[::-1]
                cur_actor_id = tracker.active_actors[bb]['actor_id']
                print("Person %i" % cur_actor_id)
                cur_results = []
                for pp in range(print_top_k):
                    print(
                        '\t %s: %.3f' %
                        (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    cur_results.append(
                        (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                prob_dict[cur_actor_id] = cur_results

            t5 = time.time()
            print('action %.2f seconds' % (t5 - t3))
        # # Action detection
        # no_actors = len(tracker.active_actors)
        # #batch_np = np.zeros([no_actors, act_detector.timesteps] + act_detector.input_size + [3], np.uint8)
        # batch_list = []
        # rois_np = np.zeros([no_actors, 4])
        # batch_indices_np = np.array(range(no_actors))
        # for bb, actor_info in enumerate(tracker.active_actors):
        #     actor_no = actor_info['actor_id']
        #     tube, roi = tracker.crop_person_tube(actor_no)
        #     #batch_np[bb, :] = tube
        #     batch_list.append(tube)
        #     rois_np[bb]= roi
        #t4 = time.time(); print('cropping %.2f seconds' % (t4-t3))

        # if tracker.active_actors:
        #     batch_np = np.stack(batch_list, axis=0)
        #     max_batch_size = 10
        #     prob_list = []
        #     cur_index = 0
        #     while cur_index < no_actors:
        #         cur_batch = batch_np[cur_index:cur_index+max_batch_size]
        #         cur_roi = rois_np[cur_index:cur_index+max_batch_size]
        #         cur_indices = batch_indices_np[cur_index:cur_index+max_batch_size] - cur_index
        #         feed_dict = {input_seq:cur_batch, rois:cur_roi, roi_batch_indices:cur_indices}
        #         #t51 = time.time(); print('action before run %.2f seconds' % (t51-t4))
        #         cur_probs = act_detector.session.run(pred_probs, feed_dict=feed_dict)
        #         #t52 = time.time(); print('action after run %.2f seconds' % (t52-t51))
        #         prob_list.append(cur_probs)
        #         cur_index += max_batch_size
        #     probs = np.concatenate(prob_list, axis=0)

        #t5 = time.time(); print('action %.2f seconds' % (t5-t4))
        # Print top_k probs
        #out_img = visualize_detection_results(cur_img, tracker.active_actors, prob_dict)
        if frame_cnt > 16:
            out_img = visualize_detection_results(tracker.frame_history[-16],
                                                  tracker.active_actors,
                                                  prob_dict)
            if SHOW_CAMS:
                if tracker.active_actors:
                    actor_indices = [
                        ii for ii in range(no_actors)
                        if tracker.active_actors[ii]['actor_id'] ==
                        actor_to_display
                    ]
                    if actor_indices:
                        out_img = visualize_cams(out_img, cur_input_sequence,
                                                 out_dict, actor_indices[0])
                    else:
                        continue
                else:
                    continue
            if display:
                cv2.imshow('results', out_img[:, :, ::-1])
                cv2.waitKey(10)
            else:
                writer.append_data(out_img)

    if not display:
        writer.close()
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-v', '--video_path', type=str, required=False, default="")
    parser.add_argument('-d', '--display', type=str, required=False, default="True")

    args = parser.parse_args()
    display = (args.display == "True" or args.display == "true")
    
    #actor_to_display = 6 # for cams

    video_path = args.video_path
    basename = os.path.basename(video_path).split('.')[0]
    out_vid_path = "./output_videos/%s_output.mp4" % (basename if not SHOW_CAMS else basename+'_cams_actor_%.2d' % actor_to_display)
    clf_out_path = "./clf_output/{}_output.csv".format(basename if not SHOW_CAMS else basename+'_cams_actor_{}'.format(actor_to_display))
    #out_vid_path = './output_videos/testing.mp4'

    # video_path = "./tests/chase1Person1View3Point0.mp4"
    # out_vid_path = 'output.mp4'

    main_folder = './'

    # NAS

    obj_detection_model =  'ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
    obj_detection_graph = os.path.join("object_detection", "weights", obj_detection_model, "frozen_inference_graph.pb")



    print("Loading object detection model at %s" % obj_detection_graph)


    obj_detector = obj.Object_Detector(obj_detection_graph)
    tracker = obj.Tracker()

    


    print("Reading video file %s" % video_path)
    reader = imageio.get_reader(video_path, 'ffmpeg')
    action_freq = 8
    # fps_divider = 1
    print('Running actions every %i frame' % action_freq)
    fps = reader.get_meta_data()['fps'] #// fps_divider
    print("FPS: {}".format(fps))
    W, H = reader.get_meta_data()['size']
    T = tracker.timesteps
    #if not display:
    writer = imageio.get_writer(out_vid_path, fps=fps)
    csv_file = open(clf_out_path, 'w', newline='')
    csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(['Time', 'Person', 'Action', 'Probability'])
    print("Writing output to %s" % out_vid_path)

    
    # act_detector = act.Action_Detector('i3d_tail')
    # ckpt_name = 'model_ckpt_RGB_i3d_pooled_tail-4'
    act_detector = act.Action_Detector('soft_attn')
    #ckpt_name = 'model_ckpt_RGB_soft_attn-16'
    #ckpt_name = 'model_ckpt_soft_attn_ava-23'
    ckpt_name = 'model_ckpt_soft_attn_pooled_cosine_drop_ava-130'

    #input_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf([T,H,W,3])
    memory_size = act_detector.timesteps - action_freq
    updated_frames, temporal_rois, temporal_roi_batch_indices, cropped_frames = act_detector.crop_tubes_in_tf_with_memory([T,H,W,3], memory_size)
    
    rois, roi_batch_indices, pred_probs = act_detector.define_inference_with_placeholders_noinput(cropped_frames)
    

    ckpt_path = os.path.join(main_folder, 'action_detection', 'weights', ckpt_name)
    act_detector.restore_model(ckpt_path)

    prob_dict = {}
    frame_cnt = 0

    # Tewan
    min_teacher_features = 3
    teacher_identified = 0
    #missed_frame_cnt = 0
    #max_age = 120
    #frame_skips = 60
    #next_frame = 0
    teacher_ids = []
    matched_id = None
    # Tewan

    for cur_img in reader:
        frame_cnt += 1

        #if frame_cnt < next_frame:
        #    continue

        # Detect objects and make predictions every 8 frames (0.3 seconds)
        #if frame_cnt % action_freq == 0:

        # Object Detection
        expanded_img = np.expand_dims(cur_img, axis=0)
        detection_list = obj_detector.detect_objects_in_np(expanded_img) 
        detection_info = [info[0] for info in detection_list]
        # Updates active actors in tracker
        tracker.update_tracker(detection_info, cur_img)
        no_actors = len(tracker.active_actors)

        """
        if no_actors == 0:
            missed_frame_cnt += 1

            if missed_frame_cnt >= max_age:
                tracker.update_tracker(detection_info, cur_img)
                no_actors = len(tracker.active_actors)
                teacher_identified = False
                tracker.set_invalid_track()
                missed_frame_cnt = 0

                print("Reset active actors. Current number: {}".format(no_actors))

        """

        if frame_cnt % action_freq == 0 and frame_cnt > 16:
            if no_actors == 0:
                print("No actor found.")
                continue

            video_time = round(frame_cnt / fps, 1)
            valid_actor_ids = [actor["actor_id"] for actor in tracker.active_actors]

            print("frame count: {}, video time: {}s".format(frame_cnt, video_time))
            probs = []

            cur_input_sequence = np.expand_dims(np.stack(tracker.frame_history[-action_freq:], axis=0), axis=0)

            rois_np, temporal_rois_np = tracker.generate_all_rois()

            if teacher_identified < min_teacher_features:
                prompt_img = visualize_detection_results(img_np=tracker.frame_history[-16],
                                                         active_actors=tracker.active_actors,
                                                         prob_dict=None)
                cv2.imshow('prompt_img', prompt_img[:,:,::-1])
                cv2.waitKey(500)
                teacher_present = False

                teacher_id = _prompt_user_input()

                if not _check_teacher_in_frame(teacher_id=teacher_id):
                    print("Teacher not in this frame. Continuing.")
                    cv2.destroyWindow("prompt_img")
                    pass

                else:
                    if _check_valid_teacher_id(teacher_id=teacher_id, valid_actor_ids=valid_actor_ids):
                        teacher_id = int(teacher_id)
                        teacher_identified += 1
                        teacher_present = True

                    else:
                        while not teacher_present:
                            print("Invalid ID.")
                            teacher_id = _prompt_user_input()

                            if not _check_teacher_in_frame(teacher_id=teacher_id):
                                print("Teacher not in this frame. Continuing.")
                                cv2.destroyWindow("prompt_img")
                                break

                            else:
                                if _check_valid_teacher_id(teacher_id=teacher_id, valid_actor_ids=valid_actor_ids):
                                    teacher_id = int(teacher_id)
                                    teacher_identified += 1
                                    teacher_present = True
                                else:
                                    pass

                # Move on to next frame if teacher not in current frame
                if not teacher_present:
                    continue
                cv2.destroyWindow("prompt_img")

                if teacher_id not in teacher_ids:
                    teacher_ids.append(teacher_id)
                    tracker.update_teacher_candidate_ids(teacher_candidate_id=teacher_id)
            else:
                tracker.set_valid_track()

            # Identify idx of teacher for ROI selection                
            roi_idx = None
            found_id = False
            for idx, actor_info in enumerate(tracker.active_actors):
                actor_id = actor_info["actor_id"]
                for i in range(len(teacher_ids)-1, -1, -1):
                    if actor_id == teacher_ids[i]:
                        roi_idx = idx
                        matched_id = actor_info["actor_id"]
                        found_id = True
                        break
                if found_id:
                    break

            # Identify ROI and temporal ROI using ROI idx 
            if roi_idx is not None:
                rois_np = rois_np[roi_idx]
                temporal_rois_np = temporal_rois_np[roi_idx]
                rois_np = np.expand_dims(rois_np, axis=0)
                temporal_rois_np = np.expand_dims(temporal_rois_np, axis=0)
                no_actors = 1
            # If teacher not found (i.e. roi_idx is None) in current frame, move on to next frame
            else:
                continue

            #max_actors = 5
            #if no_actors > max_actors:
            #    no_actors = max_actors
            #    rois_np = rois_np[:max_actors]
            #    temporal_rois_np = temporal_rois_np[:max_actors]

            # Might have issue of not using attention map because only predict action for 1 actor (memory issue)
            feed_dict = {updated_frames:cur_input_sequence, # only update last #action_freq frames
                         temporal_rois: temporal_rois_np,
                         temporal_roi_batch_indices: np.zeros(no_actors),
                         rois:rois_np, 
                         roi_batch_indices:np.arange(no_actors)}
            run_dict = {'pred_probs': pred_probs}

            if SHOW_CAMS:
                run_dict['cropped_frames'] = cropped_frames
                run_dict['final_i3d_feats'] =  act_detector.act_graph.get_collection('final_i3d_feats')[0]
                run_dict['cls_weights'] = act_detector.act_graph.get_collection('variables')[-2] # this is the kernel

            out_dict = act_detector.session.run(run_dict, feed_dict=feed_dict)
            probs = out_dict['pred_probs']

            # associate probs with actor ids
            print_top_k = 5
            for bb in range(no_actors):
                #act_probs = probs[bb]
                #order = np.argsort(act_probs)[::-1]
                #cur_actor_id = tracker.active_actors[bb]['actor_id']
                act_probs = probs[bb]
                order = np.argsort(act_probs)[::-1]
                cur_actor_id = tracker.active_actors[roi_idx]["actor_id"]
                #print(cur_actor_id == actor_id)
                #print("Person %i" % cur_actor_id)
                #print("act_probs: {}".format(act_probs))
                #print("order: {}".format(order))
                #print("tracker.active_actors[bb]: {}".format(tracker.active_actors[bb]))
                cur_results = []
                for pp in range(print_top_k):
                    #print('\t %s: %.3f' % (act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    cur_results.append((act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]))
                    csv_writer.writerow([video_time, cur_actor_id, act.ACTION_STRINGS[order[pp]], act_probs[order[pp]]])

                prob_dict[cur_actor_id] = cur_results
        
        if frame_cnt > 16:
            out_img = visualize_detection_results(tracker.frame_history[-16],
                                                  tracker.active_actors,
                                                  prob_dict=prob_dict,
                                                  teacher_id=matched_id)
            if SHOW_CAMS:
                if tracker.active_actors:
                    actor_indices = [ii for ii in range(no_actors) if tracker.active_actors[ii]['actor_id'] == actor_to_display]
                    if actor_indices:
                        out_img = visualize_cams(out_img, cur_input_sequence, out_dict, actor_indices[0])
                    else:
                        continue
                else:
                    continue
            if display: 
                cv2.imshow('results', out_img[:,:,::-1])
                cv2.waitKey(10)

            writer.append_data(out_img)

    #if not display:
    writer.close()
    csv_file.close()