Example #1
0
class TrackerManager(object):
    def __init__(self, vid_list, init_time):
        self.init_time = init_time

        video_id, camera_id, max_frames, width, height = vid_list[1:]
        self.max_frames = max_frames

        self.tracker = Tracker(init_time, video_id, max_frames, camera_id,
                               width, height)

        self.postprocess_trans = get_postprocess_trans(height, width)

        self.prev_img = None
        self.n = 0

    def process_output(self, dets):
        dets = post_process(dets, self.postprocess_trans)[0]
        self.tracker.step(dets)
        self.n += 1

    def is_done(self):
        return self.n >= self.max_frames

    def finalize(self):
        self.tracker.finalize()
Example #2
0
def export_measures(workspace_path: str, dataset: Dataset, tracker: Tracker,
                    overlaps: list, failures: list, times: list):

    # create per-sequence output structure
    speed = len(dataset.sequences) * [0]
    results = len(dataset.sequences) * [0]
    for i, sequence in enumerate(dataset.sequences):
        speed_fps = 1.0 / times[i]
        results[i] = {'sequence_name': sequence.name, 'sequence_length': sequence.length, \
            'overlap': overlaps[i], 'failures': failures[i], 'speed': speed_fps}
        speed[i] = speed_fps

    # average measures
    average_overlap = sum(overlaps) / len(dataset.sequences)
    total_failures = sum(failures)
    average_speed = sum(speed) / len(dataset.sequences)

    # final output structure with all information
    output = {'tracker_name': tracker.name(), 'results': results, 'average_overlap': average_overlap, \
        'total_failures': total_failures, 'average_speed': average_speed, 'total_frames': dataset.number_frames}

    # create output directory and save output in json file
    output_dir = os.path.join(workspace_path, 'analysis', tracker.name())
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    file_path = os.path.join(output_dir, 'results.json')

    with open(file_path, 'w') as f:
        json.dump(output, f, indent=2)

    print_summary(output)

    return output
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        print('Creating model...')
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()
        # inp = (torch.ones([1, 3, 320, 320]).cuda(),
        #        torch.ones([1, 3, 320, 320]).cuda(),
        #        torch.ones([1, 1, 320, 320]).cuda())
        # pytorch_to_caffe.trans_net(self.model, inp, 'res18')
        # pytorch_to_caffe.save_prototxt('{}.prototxt'.format('res18'))
        # pytorch_to_caffe.save_caffemodel('{}.caffemodel'.format('res18'))

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.pause = not opt.no_pause
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
            if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
Example #4
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        print('Creating model...')
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.pause = not opt.no_pause
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
          if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
def reset(person_waiter: WaitingForPerson, person_checker: CheckingPerson, tracker: Tracker,
          temp_checker: TemperatureChecker, looker: Looker):
    """
    Resets the instances to their initial state.
    """
    person_waiter.reset()
    person_checker.reset()
    temp_checker.reset()
    tracker.reset()
    looker.stop()
Example #6
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        print('Creating model...')
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.pause = not opt.no_pause
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
          if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)

        self.motion = opt.motion
        if self.motion == 'transformer':
            import sys
            M3_PATH = '/u/jozhang/code/motion3d/'
            sys.path.insert(0, M3_PATH)
            from models.transformer import DPTransformer
            # motion = DPTransformer(2, 64, {'depth': 3, 'heads': 8, 'dim_head': 8, 'mlp_dim': 64, 'dropout': 0.})
            # trans_path = '/scratch/cluster/jozhang/logs/hydra/2021-01-30/15-36-54/models/ckpt-latest.dat'
            ckpt = torch.load(opt.transformer_load_path)
            self.transformer = ckpt['model'].cuda()
            print(
                f'Using transformer motion loaded from {opt.transformer_load_path}'
            )
        elif self.motion == 'zero':
            print(f'Using no motion model')
        elif self.motion == 'cttrack':
            print(f'Using cttrack motion model')
        else:
            assert False, f'Do not recognize such motion model {self.motion}'

        self.negate_motion = opt.negate_motion
        if self.negate_motion:
            logging.warning('Motion is being negated! Are you sure?')

        self.all_pre_images = []
Example #7
0
    def __init__(self, vid_list, init_time):
        self.init_time = init_time

        video_id, camera_id, max_frames, width, height = vid_list[1:]
        self.max_frames = max_frames

        self.tracker = Tracker(init_time, video_id, max_frames, camera_id,
                               width, height)

        self.postprocess_trans = get_postprocess_trans(height, width)

        self.prev_img = None
        self.n = 0
Example #8
0
 def reset_tracking(self, opt):
     if self.dataset == "nuscenes":
         self.tracker = {}
         for class_name in NUSCENES_TRACKING_NAMES:
             self.tracker[class_name] = Tracker(opt,
                                                self.model,
                                                h=self.img_height,
                                                w=self.img_width)
     else:
         self.tracker = Tracker(opt,
                                self.model,
                                h=self.img_height,
                                w=self.img_width)
     self.pre_images = None
     self.pre_image_ori = None
Example #9
0
def main(params):
    config = vars(parser.parse_args())

    # env = gym.make(config['env'])
    env = make_env(config['env'])
    env.seed(seed)

    agent = PPO(env, cfg['agent'])
    tag = params['tag']

    # Initiate the tracker for stats
    tracker = Tracker(
        config['env'],  #env.unwrapped.spec.id,
        tag,
        seed,
        cfg['agent'],
        ['Epoch', 'Ep_Reward', 'Cost'])

    # Train the agent
    agent.train(tracker,
                n_episodes=config['epochs'],
                n_step=config['stepmax'],
                verbose=config['verbose'],
                params=cfg['agent'],
                hyperp=config)
Example #10
0
def train_ner_model(wordlists_path):
    master = combine_wordlists(PROJ_PATH + '/' + wordlists_path)
    train_valid_split = int(len(master) * 0.8)
    train_data = master[0:train_valid_split]
    valid_data = master[train_valid_split:]

    (train_sents, word_tokenizer), (train_chars, char_tokenizer), (
        train_concept, concept_tokenizer) = load_data(train_data)
    (valid_sents, _), (valid_chars, _), (valid_concept, _) = load_data(
        valid_data, word_tokenizer, concept_tokenizer)
    #(test_sents, _),  (test_chars, _), (test_concept, _) = load_data(test_data, word_tokenizer, concept_tokenizer)

    config = json.load(open(PROJ_PATH + '/src/model/config.json', 'r'))

    model_parameters = config['parameters']
    model_parameters['vocab_size'] = len(word_tokenizer.word_index)
    model_parameters['char_dim'] = len(char_tokenizer)
    model_parameters['output_dim'] = len(concept_tokenizer.word_index)

    model_info = config['model_info']
    tracker = Tracker(
        basedir=PROJ_PATH + '/models/',
        desc=model_info['model_description'],
        title=model_info['model_title'],
        enter_desc=False,
        name=model_info['username'],
    )

    model = BiLSTM_CRF(model_parameters)

    model.define_model(
        char_embedding_dim=model_parameters['char_embedding_dim'],
        word_embedding_dim=model_parameters['word_embedding_dim'],
        char_lstm_cell=model_parameters['char_lstm_cell'],
        lstm_cell=model_parameters['lstm_cell'],
    )

    model.generate_model_diagram(tracker.get_model_dir(), tracker.title)
    model.train(train_sents, train_chars, train_concept, valid_sents,
                valid_chars, valid_concept)
    model.save_model(tracker.get_model_dir(), tracker.title)

    # save tokenizers
    pickle.dump(word_tokenizer,
                open(tracker.get_model_dir() + '/word_tokenizer.ser', 'wb'))
    pickle.dump(char_tokenizer,
                open(tracker.get_model_dir() + '/char_tokenizer.ser', 'wb'))
    pickle.dump(concept_tokenizer,
                open(tracker.get_model_dir() + '/concept_tokenizer.ser', 'wb'))

    tracker.log()
Example #11
0
class VideoManager(object):
    def __init__(self, path, vid_list, model_loading_time):
        init_time = time.time() - model_loading_time
        self.init_time = init_time

        vid_filename = vid_list[0]
        video_path = os.path.join(path, vid_filename)
        self.cap = cv2.VideoCapture(video_path)

        video_id, camera_id, max_frames, width, height = vid_list[1:]
        self.max_frames = max_frames

        self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height)

        self.preprocess_function = get_img_transform(height, width, new_size=512)
        self.postprocess_trans = get_postprocess_trans(height, width)

        region_mask = get_region_mask(camera_id, height, width)
        self.region_mask = np.where(region_mask, 255, 0).astype(np.uint8)

        self.prev_img = None
        self.n = 0

    def get_img(self):
        ret, frame = self.cap.read()
        frame = cv2.bitwise_and(frame, frame, mask=self.region_mask)
        img = self.preprocess_function(frame)
        img = torch.from_numpy(img).to(torch.device('cuda'))
        self.n += 1
        prev_img = self.prev_img if self.prev_img is not None else img
        self.prev_img = img

        return img, prev_img

    def process_output(self, dets):
        dets = post_process(dets, self.postprocess_trans)[0]
        self.tracker.step(dets)

    def is_done(self):
        return self.n >= self.max_frames

    def finalize(self):
        self.tracker.finalize()
Example #12
0
    def __init__(self, path, vid_list, model_loading_time):
        init_time = time.time() - model_loading_time
        self.init_time = init_time

        vid_filename = vid_list[0]
        video_path = os.path.join(path, vid_filename)
        self.cap = cv2.VideoCapture(video_path)

        video_id, camera_id, max_frames, width, height = vid_list[1:]
        self.max_frames = max_frames

        self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height)

        self.preprocess_function = get_img_transform(height, width, new_size=512)
        self.postprocess_trans = get_postprocess_trans(height, width)

        region_mask = get_region_mask(camera_id, height, width)
        self.region_mask = np.where(region_mask, 255, 0).astype(np.uint8)

        self.prev_img = None
        self.n = 0
def tracker_thread_fn(q_in,
                      init_time,
                      path,
                      debug=0,
                      new_thresh=0.4,
                      track_thresh=0.2):
    video_id, camera_id, max_frames, width, height = get_video_params(path)

    postprocess_trans = get_postprocess_trans(height, width)
    tracker = Tracker(init_time,
                      video_id,
                      max_frames,
                      camera_id,
                      width,
                      height,
                      new_thresh=new_thresh,
                      track_thresh=track_thresh)

    for i in range(max_frames):
        dets = q_in.get()
        get_time = time.time()

        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()

        dets = post_process(dets, postprocess_trans,
                            track_thresh=track_thresh)[0]
        tracker.step(dets)

        if debug > 0 and i % 100 == 99:
            frame_time = time.time() - init_time
            FPS = (i + 1) / frame_time
            print("At frame {} FPS {}".format(i + 1, FPS), file=sys.stderr)

    tracker.finalize()
Example #14
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device("cuda")
        else:
            opt.device = torch.device("cpu")

        print("Creating model...")
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        #     self.pause = not opt.no_pause
        self.rest_focal_length = (self.trained_dataset.rest_focal_length
                                  if self.opt.test_focal_length < 0 else
                                  self.opt.test_focal_length)
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.dataset = opt.dataset
        if self.dataset == "nuscenes":
            self.tracker = {}
            for class_name in NUSCENES_TRACKING_NAMES:
                self.tracker[class_name] = Tracker(opt, self.model)
        else:
            self.tracker = Tracker(opt, self.model)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
        self.img_height = 100
        self.img_width = 100
Example #15
0
    def __init__(self, name_, brush_, infoFile_):

        # Load the properties of the robot from file
        try:
            self._info = json.loads(open(infoFile_, 'r').read())
        except ValueError:
            self._info = {}

        # Call parent constructor
        super(Robot, self).__init__(name_=name_, pos_=self._info["pos"], brush_=brush_)

        # Is the robot stopped
        self._stopped = False

        # Is the robot master
        self._isMaster = False

        # Current zoom level
        self._zoom = 1.0

        # Store all items which belong to the robot
        self._items = [self, ]

        # Associate a tracker to store the path (in m)
        # Tracker only manipulates (x,y) coordinates
        self._tracker = Tracker(self._info["pos"][:2])

        # Show the supervisor information on screen
        self._showSupervisors = True

        # Set envelope
        self._envelope = self._info["envelope"]

        # Cache the bounding rect
        xmin, ymin, xmax, ymax = self.getBounds()
        self._boundingRect = QtCore.QRectF(QtCore.QPointF(xmin, ymin), QtCore.QPointF(xmax, ymax))

        # Cache the shape
        points = [QtCore.QPointF(p[0], p[1]) for p in self._envelope]
        self._shape = QtGui.QPainterPath()
        self._shape.addPolygon(QtGui.QPolygonF(points))
Example #16
0
def main(params):
    config = vars(parser.parse_args())

    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=None, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=20.0)

    env = UnityToGymWrapper(unity_env)

    agent = DDQN(env, cfg['agent'])
    tag = 'DDQN'

    # Initiate the tracker for stats
    tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'],
                      ['Epoch', 'Ep_Reward'])

    # Train the agent
    agent.train(tracker,
                n_episodes=config['epochs'],
                verbose=config['verbose'],
                params=cfg['agent'],
                hyperp=config)
Example #17
0
class Detector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device("cuda")
        else:
            opt.device = torch.device("cpu")

        print("Creating model...")
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        #     self.pause = not opt.no_pause
        self.rest_focal_length = (self.trained_dataset.rest_focal_length
                                  if self.opt.test_focal_length < 0 else
                                  self.opt.test_focal_length)
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.dataset = opt.dataset
        if self.dataset == "nuscenes":
            self.tracker = {}
            for class_name in NUSCENES_TRACKING_NAMES:
                self.tracker[class_name] = Tracker(opt, self.model)
        else:
            self.tracker = Tracker(opt, self.model)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
        self.img_height = 100
        self.img_width = 100

    def run(self, image_or_path_or_tensor, meta={}, image_info=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, track_time, tot_time, display_time = 0, 0, 0, 0
        self.debugger.clear()
        start_time = time.time()

        # read image
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(""):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor["image"][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += loaded_time - start_time

        detections = []

        # for multi-scale testing
        for scale in self.opt.test_scales:
            scale_start_time = time.time()
            if not pre_processed:
                # not prefetch testing or demo
                images, meta = self.pre_process(image, scale, meta)
            else:
                # prefetch testing
                images = pre_processed_images["images"][scale][0]
                meta = pre_processed_images["meta"][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
                if "pre_dets" in pre_processed_images["meta"]:
                    meta["pre_dets"] = pre_processed_images["meta"]["pre_dets"]
                if "cur_dets" in pre_processed_images["meta"]:
                    meta["cur_dets"] = pre_processed_images["meta"]["cur_dets"]

            images = images.to(self.opt.device,
                               non_blocking=self.opt.non_block_test)

            # initializing tracker
            pre_hms, pre_inds = None, None

            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            # run the network
            # output: the output feature maps, only used for visualizing
            # dets: output tensors after extracting peaks
            output, dets, forward_time, FeatureMaps = self.process(
                images, self.pre_images, pre_hms, pre_inds, return_time=True)
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            # convert the cropped and 4x downsampled output coordinate system
            # back to the input image coordinate system
            result = self.post_process(dets, meta, scale)
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(result)
            if self.opt.debug >= 2:
                self.debug(
                    self.debugger,
                    images,
                    result,
                    output,
                    scale,
                    pre_images=self.pre_images
                    if not self.opt.no_pre_img else None,
                    pre_hms=pre_hms,
                )

        # merge multi-scale testing results
        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time

        # public detection mode in MOT challenge
        if self.opt.public_det:
            results = (pre_processed_images["meta"]["cur_dets"]
                       if self.opt.public_det else None)

        if self.dataset == "nuscenes":
            trans_matrix = np.array(image_info["trans_matrix"], np.float32)

            results_by_class = {}
            ddd_boxes_by_class = {}
            depths_by_class = {}
            ddd_boxes_by_class2 = {}
            ddd_org_boxes_by_class = {}
            ddd_box_submission1 = {}
            ddd_box_submission2 = {}
            for class_name in NUSCENES_TRACKING_NAMES:
                results_by_class[class_name] = []
                ddd_boxes_by_class2[class_name] = []
                ddd_boxes_by_class[class_name] = []
                depths_by_class[class_name] = []
                ddd_org_boxes_by_class[class_name] = []
                ddd_box_submission1[class_name] = []
                ddd_box_submission2[class_name] = []
            for det in results:
                cls_id = int(det["class"])
                class_name = nuscenes_class_name[cls_id - 1]
                if class_name not in NUSCENES_TRACKING_NAMES:
                    continue

                if det["score"] < 0.3:
                    continue
                if class_name == "pedestrian" and det["score"] < 0.35:
                    continue
                results_by_class[class_name].append(det["bbox"].tolist() +
                                                    [det["score"]])
                size = [
                    float(det["dim"][1]),
                    float(det["dim"][2]),
                    float(det["dim"][0]),
                ]
                rot_cam = Quaternion(axis=[0, 1, 0], angle=det["rot_y"])
                translation_submission1 = np.dot(
                    trans_matrix,
                    np.array(
                        [
                            det["loc"][0], det["loc"][1] - size[2],
                            det["loc"][2], 1
                        ],
                        np.float32,
                    ),
                ).copy()

                loc = np.array([det["loc"][0], det["loc"][1], det["loc"][2]],
                               np.float32)
                depths_by_class[class_name].append([float(det["loc"][2])
                                                    ].copy())
                trans = [det["loc"][0], det["loc"][1], det["loc"][2]]

                ddd_org_boxes_by_class[class_name].append([
                    float(det["dim"][0]),
                    float(det["dim"][1]),
                    float(det["dim"][2])
                ] + trans + [det["rot_y"]])

                box = Box(loc, size, rot_cam, name="2", token="1")
                box.translate(np.array([0, -box.wlh[2] / 2, 0]))
                box.rotate(Quaternion(image_info["cs_record_rot"]))
                box.translate(np.array(image_info["cs_record_trans"]))
                box.rotate(Quaternion(image_info["pose_record_rot"]))
                box.translate(np.array(image_info["pose_record_trans"]))
                rotation = box.orientation
                rotation = [
                    float(rotation.w),
                    float(rotation.x),
                    float(rotation.y),
                    float(rotation.z),
                ]

                ddd_box_submission1[class_name].append([
                    float(translation_submission1[0]),
                    float(translation_submission1[1]),
                    float(translation_submission1[2]),
                ].copy() + size.copy() + rotation.copy())

                q = Quaternion(rotation)
                angle = q.angle if q.axis[2] > 0 else -q.angle

                ddd_boxes_by_class[class_name].append([
                    size[2],
                    size[0],
                    size[1],
                    box.center[0],
                    box.center[1],
                    box.center[2],
                    angle,
                ].copy())

            online_targets = []
            for class_name in NUSCENES_TRACKING_NAMES:
                if len(results_by_class[class_name]) > 0 and NMS:
                    boxess = torch.from_numpy(
                        np.array(results_by_class[class_name])[:, :4])
                    scoress = torch.from_numpy(
                        np.array(results_by_class[class_name])[:, -1])
                    if class_name == "bus" or class_name == "truck":
                        ovrlp = 0.7
                    else:
                        ovrlp = 0.8
                    keep, count = nms(boxess, scoress, overlap=ovrlp)

                    keep = keep.data.numpy().tolist()
                    keep = sorted(set(keep))
                    results_by_class[class_name] = np.array(
                        results_by_class[class_name])[keep]

                    ddd_boxes_by_class[class_name] = np.array(
                        ddd_boxes_by_class[class_name])[keep]
                    depths_by_class[class_name] = np.array(
                        depths_by_class[class_name])[keep]
                    ddd_org_boxes_by_class[class_name] = np.array(
                        ddd_org_boxes_by_class[class_name])[keep]
                    ddd_box_submission1[class_name] = np.array(
                        ddd_box_submission1[class_name])[keep]

                online_targets += self.tracker[class_name].update(
                    results_by_class[class_name],
                    FeatureMaps,
                    ddd_boxes=ddd_boxes_by_class[class_name],
                    depths_by_class=depths_by_class[class_name],
                    ddd_org_boxes=ddd_org_boxes_by_class[class_name],
                    submission=ddd_box_submission1[class_name],
                    classe=class_name,
                )

        else:

            online_targets = self.tracker.update(results, FeatureMaps)

        return online_targets

    def _transform_scale(self, image, scale=1):
        """
      Prepare input image in different testing modes.
        Currently support: fix short size/ center crop to a fixed size/
        keep original resolution but pad to a multiplication of 32
    """
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        if self.opt.fix_short > 0:
            if height < width:
                inp_height = self.opt.fix_short
                inp_width = (int(width / height * self.opt.fix_short) +
                             63) // 64 * 64
            else:
                inp_height = (int(height / width * self.opt.fix_short) +
                              63) // 64 * 64
                inp_width = self.opt.fix_short
            c = np.array([width / 2, height / 2], dtype=np.float32)
            s = np.array([width, height], dtype=np.float32)
        elif self.opt.fix_res:
            inp_height, inp_width = self.opt.input_h, self.opt.input_w
            c = np.array([new_width / 2.0, new_height / 2.0], dtype=np.float32)
            s = max(height, width) * 1.0
            # s = np.array([inp_width, inp_height], dtype=np.float32)
        else:
            inp_height = (new_height | self.opt.pad) + 1
            inp_width = (new_width | self.opt.pad) + 1
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)
        resized_image = cv2.resize(image, (new_width, new_height))
        return resized_image, c, s, inp_width, inp_height, height, width

    def pre_process(self, image, scale, input_meta={}):
        """
    Crop, resize, and normalize image. Gather meta data for post processing
      and tracking.
    """
        resized_image, c, s, inp_width, inp_height, height, width = self._transform_scale(
            image)
        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        out_height = inp_height // self.opt.down_ratio
        out_width = inp_width // self.opt.down_ratio
        trans_output = get_affine_transform(c, s, 0, [out_width, out_height])

        inp_image = cv2.warpAffine(resized_image,
                                   trans_input, (inp_width, inp_height),
                                   flags=cv2.INTER_LINEAR)
        inp_image = ((inp_image / 255.0 - self.mean) / self.std).astype(
            np.float32)

        images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height,
                                                      inp_width)
        if self.opt.flip_test:
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        meta = {
            "calib":
            np.array(input_meta["calib"], dtype=np.float32) if "calib"
            in input_meta else self._get_default_calib(width, height)
        }
        meta.update({
            "c": c,
            "s": s,
            "height": height,
            "width": width,
            "out_height": out_height,
            "out_width": out_width,
            "inp_height": inp_height,
            "inp_width": inp_width,
            "trans_input": trans_input,
            "trans_output": trans_output,
        })
        if "pre_dets" in input_meta:
            meta["pre_dets"] = input_meta["pre_dets"]
        if "cur_dets" in input_meta:
            meta["cur_dets"] = input_meta["cur_dets"]
        return images, meta

    def _trans_bbox(self, bbox, trans, width, height):
        """
    Transform bounding boxes according to image crop.
    """
        bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
        bbox[:2] = affine_transform(bbox[:2], trans)
        bbox[2:] = affine_transform(bbox[2:], trans)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
        return bbox

    def _get_additional_inputs(self, dets, meta, with_hm=True):
        """
    Render input heatmap from previous trackings.
    """
        trans_input, trans_output = meta["trans_input"], meta["trans_output"]
        inp_width, inp_height = meta["inp_width"], meta["inp_height"]
        out_width, out_height = meta["out_width"], meta["out_height"]
        input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

        output_inds = []
        for det in dets:
            if det["score"] < self.opt.pre_thresh or det["active"] == 0:
                continue
            bbox = self._trans_bbox(det["bbox"], trans_input, inp_width,
                                    inp_height)
            bbox_out = self._trans_bbox(det["bbox"], trans_output, out_width,
                                        out_height)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if with_hm:
                    draw_umich_gaussian(input_hm[0], ct_int, radius)
                ct_out = np.array(
                    [(bbox_out[0] + bbox_out[2]) / 2,
                     (bbox_out[1] + bbox_out[3]) / 2],
                    dtype=np.int32,
                )
                output_inds.append(ct_out[1] * out_width + ct_out[0])
        if with_hm:
            input_hm = input_hm[np.newaxis]
            if self.opt.flip_test:
                input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]),
                                          axis=0)
            input_hm = torch.from_numpy(input_hm).to(self.opt.device)
        output_inds = np.array(output_inds, np.int64).reshape(1, -1)
        output_inds = torch.from_numpy(output_inds).to(self.opt.device)
        return input_hm, output_inds

    def _get_default_calib(self, width, height):
        calib = np.array([
            [self.rest_focal_length, 0, width / 2, 0],
            [0, self.rest_focal_length, height / 2, 0],
            [0, 0, 1, 0],
        ])
        return calib

    def _sigmoid_output(self, output):
        if "hm" in output:
            output["hm"] = output["hm"].sigmoid_()
        if "hm_hp" in output:
            output["hm_hp"] = output["hm_hp"].sigmoid_()
        if "dep" in output:
            output["dep"] = 1.0 / (output["dep"].sigmoid() + 1e-6) - 1.0
            output["dep"] *= self.opt.depth_scale
        return output

    def _flip_output(self, output):
        average_flips = ["hm", "wh", "dep", "dim"]
        neg_average_flips = ["amodel_offset"]
        single_flips = [
            "ltrb",
            "nuscenes_att",
            "velocity",
            "ltrb_amodal",
            "reg",
            "hp_offset",
            "rot",
            "tracking",
            "pre_hm",
        ]
        for head in output:
            if head in average_flips:
                output[head] = (output[head][0:1] +
                                flip_tensor(output[head][1:2])) / 2
            if head in neg_average_flips:
                flipped_tensor = flip_tensor(output[head][1:2])
                flipped_tensor[:, 0::2] *= -1
                output[head] = (output[head][0:1] + flipped_tensor) / 2
            if head in single_flips:
                output[head] = output[head][0:1]
            if head == "hps":
                output["hps"] = (output["hps"][0:1] + flip_lr_off(
                    output["hps"][1:2], self.flip_idx)) / 2
            if head == "hm_hp":
                output["hm_hp"] = (output["hm_hp"][0:1] + flip_lr(
                    output["hm_hp"][1:2], self.flip_idx)) / 2

        return output

    def process(self,
                images,
                pre_images=None,
                pre_hms=None,
                pre_inds=None,
                return_time=False):
        with torch.no_grad():
            torch.cuda.synchronize()
            output, FeatureMaps = self.model(images, pre_images, pre_hms)
            output = output[-1]
            output = self._sigmoid_output(output)
            output.update({"pre_inds": pre_inds})
            if self.opt.flip_test:
                output = self._flip_output(output)
            torch.cuda.synchronize()
            forward_time = time.time()

            dets = generic_decode(output, K=self.opt.K, opt=self.opt)
            torch.cuda.synchronize()
            for k in dets:
                dets[k] = dets[k].detach().cpu().numpy()
        if return_time:
            return output, dets, forward_time, FeatureMaps
        else:
            return output, dets, FeatureMaps

    def post_process(self, dets, meta, scale=1):
        dets = generic_post_process(
            self.opt,
            dets,
            [meta["c"]],
            [meta["s"]],
            meta["out_height"],
            meta["out_width"],
            self.opt.num_classes,
            [meta["calib"]],
            meta["height"],
            meta["width"],
        )
        self.this_calib = meta["calib"]

        if scale != 1:
            for i in range(len(dets[0])):
                for k in ["bbox", "hps"]:
                    if k in dets[0][i]:
                        dets[0][i][k] = (np.array(dets[0][i][k], np.float32) /
                                         scale).tolist()
        return dets[0]

    def merge_outputs(self, detections):
        assert len(self.opt.test_scales) == 1, "multi_scale not supported!"
        results = []
        for i in range(len(detections[0])):
            if detections[0][i]["score"] > self.opt.out_thresh:
                results.append(detections[0][i])
        return results

    def debug(self,
              debugger,
              images,
              dets,
              output,
              scale=1,
              pre_images=None,
              pre_hms=None):
        img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
        img = np.clip(((img * self.std + self.mean) * 255.0), 0,
                      255).astype(np.uint8)
        pred = debugger.gen_colormap(output["hm"][0].detach().cpu().numpy())
        debugger.add_blend_img(img, pred, "pred_hm")
        if "hm_hp" in output:
            pred = debugger.gen_colormap_hp(
                output["hm_hp"][0].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, "pred_hmhp")

        if pre_images is not None:
            pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0)
            pre_img = np.clip(((pre_img * self.std + self.mean) * 255.0), 0,
                              255).astype(np.uint8)
            debugger.add_img(pre_img, "pre_img")
            if pre_hms is not None:
                pre_hm = debugger.gen_colormap(
                    pre_hms[0].detach().cpu().numpy())
                debugger.add_blend_img(pre_img, pre_hm, "pre_hm")

    def show_results(self, debugger, image, results):
        debugger.add_img(image, img_id="generic")
        if self.opt.tracking:
            debugger.add_img(
                self.pre_image_ori
                if self.pre_image_ori is not None else image,
                img_id="previous",
            )
            self.pre_image_ori = image

        for j in range(len(results)):
            if results[j]["score"] > self.opt.vis_thresh:
                if "active" in results[j] and results[j]["active"] == 0:
                    continue
                item = results[j]
                if "bbox" in item:
                    sc = (item["score"] if self.opt.demo == "" or
                          not ("tracking_id" in item) else item["tracking_id"])
                    sc = item[
                        "tracking_id"] if self.opt.show_track_color else sc

                    debugger.add_coco_bbox(item["bbox"],
                                           item["class"] - 1,
                                           sc,
                                           img_id="generic")

                if "tracking" in item:
                    debugger.add_arrow(item["ct"],
                                       item["tracking"],
                                       img_id="generic")

                tracking_id = item[
                    "tracking_id"] if "tracking_id" in item else -1
                if ("tracking_id" in item and self.opt.demo == ""
                        and not self.opt.show_track_color):
                    debugger.add_tracking_id(item["ct"],
                                             item["tracking_id"],
                                             img_id="generic")

                if (item["class"] in [1, 2]) and "hps" in item:
                    debugger.add_coco_hp(item["hps"],
                                         tracking_id=tracking_id,
                                         img_id="generic")

        if (len(results) > 0 and "dep" in results[0] and "alpha" in results[0]
                and "dim" in results[0]):
            debugger.add_3d_detection(
                image if not self.opt.qualitative else cv2.resize(
                    debugger.imgs["pred_hm"],
                    (image.shape[1], image.shape[0])),
                False,
                results,
                self.this_calib,
                vis_thresh=self.opt.vis_thresh,
                img_id="ddd_pred",
            )
            debugger.add_bird_view(
                results,
                vis_thresh=self.opt.vis_thresh,
                img_id="bird_pred",
                cnt=self.cnt,
            )
            if self.opt.show_track_color and self.opt.debug == 4:
                del debugger.imgs["generic"], debugger.imgs["bird_pred"]

    def reset_tracking(self, opt):
        if self.dataset == "nuscenes":
            self.tracker = {}
            for class_name in NUSCENES_TRACKING_NAMES:
                self.tracker[class_name] = Tracker(opt,
                                                   self.model,
                                                   h=self.img_height,
                                                   w=self.img_width)
        else:
            self.tracker = Tracker(opt,
                                   self.model,
                                   h=self.img_height,
                                   w=self.img_width)
        self.pre_images = None
        self.pre_image_ori = None

    def update_public_detections(self, detections_file):

        self.det_file = pd.read_csv(detections_file, header=None, sep=" ")
        self.det_group = self.det_file.groupby(0)
        self.det_group_keys = self.det_group.indices.keys()
Example #18
0
def run_single_video_serial(path, debug=0, full_precision=False):
    init_time = time.time()
    if debug >= 1:
        print("Starting for video: {}".format(path), file=sys.stderr)

    video_id, camera_id, max_frames, width, height = get_video_params(path)

    cap = cv2.VideoCapture(path)

    model = create_model()
    model = load_model(model, 'checkpoints/coco_tracking.pth')
    model.to(torch.device('cuda'))
    model.eval()

    tracker = Tracker(init_time,
                      video_id,
                      max_frames,
                      camera_id,
                      width,
                      height,
                      debug=debug)

    preprocess_function = get_img_transform(height, width, new_size=512)
    postprocess_trans = get_postprocess_trans(height, width)
    region_mask = get_region_mask(camera_id, height, width)
    region_mask = np.where(region_mask, 255, 0).astype(np.uint8)

    if debug > 2:
        cv2.imwrite("mask.png", region_mask)

    pre_img = None

    for i in range(max_frames):
        ret, frame = cap.read()
        if debug >= 2:
            cv2.imshow("Frame", frame)
            cv2.waitKey(1)
            tracker.frame = np.copy(frame)

        frame = cv2.bitwise_and(frame, frame, mask=region_mask)

        img = preprocess_function(frame)
        img = torch.from_numpy(img).to(torch.device('cuda'))

        if pre_img is None:
            pre_img = img

        with torch.no_grad():
            with torch.cuda.amp.autocast(enabled=not full_precision):
                out = model(img, pre_img, None)[-1]
                out = sigmoid_output(out)
                dets = generic_decode(out)

        pre_img = img

        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()

        dets = post_process(dets, postprocess_trans)[0]
        tracker.step(dets)

        if debug >= 1 and i % 100 == 99:
            frame_time = time.time() - init_time
            FPS = (i + 1) / frame_time
            print("At frame {} FPS {}".format(i + 1, FPS), file=sys.stderr)

    tracker.finalize()

    if debug >= 1:
        print("Finished video: {}".format(path), file=sys.stderr)
Example #19
0
def run(sequence_dir, detection_file, output_file, max_age, n_init, reid_thr,
        checkpoint_dir):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """
    new_npy = encode_newfeat(detection_file, checkpoint_dir)
    seq_info = gather_sequence_info(sequence_dir, new_npy)
    tracker = Tracker(max_age=max_age, n_init=n_init, reid_thr=reid_thr)
    results = []
    if not osp.exists(
            os.path.join("warp_mat", "%s.npy" % seq_info["sequence_name"])):
        if not osp.exists("./warp_mat"):
            os.system('mkdir ./warp_mat')
        warp_matrix = np.array(WarpMatrix(seq_info).mat)
        output_filename = os.path.join("warp_mat",
                                       "%s.npy" % seq_info["sequence_name"])
        np.save(output_filename, warp_matrix, allow_pickle=False)
    else:
        warp_matrix = np.load(
            os.path.join("warp_mat", "%s.npy" % seq_info["sequence_name"]))

    def frame_callback(frame_idx):
        print("Processing %s" % seq_info["sequence_name"],
              "frame %05d" % frame_idx)

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"],
                                       frame_idx,
                                       w_img=seq_info["image_size"][1],
                                       h_img=seq_info["image_size"][0])

        # Update tracker.
        tracker.predict(warp_matrix[frame_idx - 2])
        tracker.update(detections, seq_info["sequence_name"], frame_idx,
                       checkpoint_dir)

        # Store results.
        for track in tracker.tracks:
            if track.time_since_update >= 1:
                continue
            bbox = track.to_tlwh2()
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]
            ])

    # Run tracker.
    frame_idx = seq_info["min_frame_idx"]
    while frame_idx <= seq_info["max_frame_idx"]:
        frame_callback(frame_idx)
        frame_idx += 1

    # Store results.
    output_path = os.path.dirname(output_file)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    f = open(output_file, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5]),
              file=f)
def video():
    """
    Principal method of the program that reads the data streams, displays
    the video streams to the user and other messages.
    """
    global image_timestamp
    global thermal
    global normal
    global temp

    # Define the variable that remember the current state: 'waiting' that awaits
    # for a person to enter the frame and 'person_detected' in which checks
    # continuously if the wearer's mask is worn correctly.
    current_state = 'waiting'

    looker = Looker()
    talker = Talker()
    tracker = Tracker(args['tracker'])
    detector = FaceAndMaskDetector(args['confidence'])
    temp_checker = TemperatureChecker()
    person_waiter = WaitingForPerson(tracker, detector, args['wait'])
    person_checker = CheckingPerson(tracker, talker, detector, temp_checker, args['value'], args['wait'],
                                    args['threshold'], args['state'], args['move'])

    while True:
        # Get current frames
        normal_wrapper.set(normal)
        curr_normal = normal_wrapper.get()
        temp_wrapper.set(temp)
        curr_temp = temp_wrapper.get()
        thermal_wrapper.set(thermal)
        curr_thermal = thermal_wrapper.get()

        # While in the 'waiting' state check if a person is in the frame
        if current_state == 'waiting':
            person_waiter.run_prediction(curr_normal)

        # If a person entered the frame, change the current state
        if person_waiter.person_in_frame():
            current_state = 'person_detected'

        # While in the 'person_detected' state check if the person is wearing
        # the mask properly.
        if current_state == 'person_detected':
            person_checker.check_person(curr_normal, curr_temp, looker, image_timestamp)
            if person_checker.mask_ok:
                print(f'{person_checker.temp_checker.get_temp()} C')
                sleep(3)
                person_checker.speak_temperature()
                reset(person_waiter, person_checker, tracker, temp_checker, looker)
                looker = Looker()
                current_state = 'waiting'
            elif person_checker.lost_tracking:
                reset(person_waiter, person_checker, tracker, temp_checker, looker)
                looker = Looker()
                current_state = 'waiting'

        frame = vstack((curr_normal, curr_thermal))

        # Display the concatenated current frame
        cv.imshow('Video stream', frame)

        # Exit if Q pressed
        if cv.waitKey(1) & 0xFF == ord('q'):
            break

    # Close the video stream, stops the thread that centers the camera on the
    # face and exits the program
    cv.destroyAllWindows()
    looker.stop()
    sys.exit(0)
# assign unique color to each id
id_name = []
for i in range(0, 1000):
    id_name.append(i)
id_color = IdColor(id_name)

# assign session
tf.Graph().as_default()
sess = tf.Session(config=config)
keras.backend.set_session(sess)

# load tracker
tracker = Tracker(sess,
                  MEM_SIZE,
                  IMG_SIZE,
                  FEATURE_SIZE,
                  ori_height=height,
                  ori_width=width,
                  iou_threshold=0.3,
                  kl_threshold=0.6)
sess.run(tf.global_variables_initializer())
log_dir = '/home/msis_dasol/master_thesis/RAN/for_paper/VGG16_skip_connection/memsize_5'
tf_util.restore_from_dir(sess, os.path.join(log_dir, 'checkpoints'))

# load detector
yolov3 = YOLOv3(sess)
total_tracking_obejct = 0

var_sizes = [
    np.product(list(map(int, v.shape))) * v.dtype.size
    for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
]
Example #22
0
class CenterTrackOperator(erdos.Operator):
    def __init__(self, camera_stream, obstacle_tracking_stream, flags,
                 camera_setup):
        from dataset.dataset_factory import get_dataset
        from model.model import create_model, load_model
        from opts import opts
        from utils.tracker import Tracker

        camera_stream.add_callback(self.on_frame_msg,
                                   [obstacle_tracking_stream])
        self._flags = flags
        self._logger = erdos.utils.setup_logging(self.config.name,
                                                 self.config.log_file_name)
        self._csv_logger = erdos.utils.setup_csv_logging(
            self.config.name + '-csv', self.config.csv_log_file_name)
        self._camera_setup = camera_setup
        # TODO(ionel): Might have to filter labels when running with a coco
        # and a nuscenes model.
        num_classes = {
            'kitti_tracking': 3,
            'coco': 90,
            'mot': 1,
            'nuscenes': 10
        }
        # Other flags:
        # 1) --K ; max number of output objects.
        # 2) --fix_short ; resizes the height of the image to fix short, and
        # the width such the aspect ratio is maintained.
        # 3) --pre_hm ; pre heat map.
        # 4) --input_w; str(camera_setup.width)
        # 5) --input_h; str(camera_setup.height)
        args = [
            'tracking', '--load_model', flags.center_track_model_path,
            '--dataset', flags.center_track_model, '--test_focal_length',
            str(int(camera_setup.get_focal_length())), '--out_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--pre_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--new_thresh',
            str(flags.obstacle_detection_min_score_threshold),
            '--track_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--max_age',
            str(flags.obstacle_track_max_age), '--num_classes',
            str(num_classes[flags.center_track_model]), '--tracking',
            '--hungarian'
        ]
        opt = opts().init(args)
        gpu = True
        if gpu:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.opt = opt
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(self.opt.device)
        self.model.eval()

        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
            if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)

    @staticmethod
    def connect(camera_stream):
        obstacle_tracking_stream = erdos.WriteStream()
        return [obstacle_tracking_stream]

    @erdos.profile_method()
    def on_frame_msg(self, msg, obstacle_tracking_stream):
        """Invoked when a FrameMessage is received on the camera stream."""
        self._logger.debug('@{}: {} received frame'.format(
            msg.timestamp, self.config.name))
        assert msg.frame.encoding == 'BGR', 'Expects BGR frames'
        image_np = msg.frame.as_bgr_numpy_array()
        results = self.run_model(image_np)
        obstacles = []
        for res in results:
            track_id = res['tracking_id']
            bbox = res['bbox']
            score = res['score']
            (label_id, ) = res['class'] - 1,
            if label_id > 80:
                continue
            label = self.trained_dataset.class_name[label_id]
            if label in ['Pedestrian', 'pedestrian']:
                label = 'person'
            elif label == 'Car':
                label = 'car'
            elif label == 'Cyclist':
                label == 'bicycle'
            if label in OBSTACLE_LABELS:
                bounding_box_2D = BoundingBox2D(bbox[0], bbox[2], bbox[1],
                                                bbox[3])
                bounding_box_3D = None
                if 'dim' in res and 'loc' in res and 'rot_y' in res:
                    bounding_box_3D = BoundingBox3D.from_dimensions(
                        res['dim'], res['loc'], res['rot_y'])
                obstacles.append(
                    Obstacle(bounding_box_3D,
                             score,
                             label,
                             track_id,
                             bounding_box_2D=bounding_box_2D))
        obstacle_tracking_stream.send(
            ObstaclesMessage(msg.timestamp, obstacles, 0))

    def run_model(self, image_np, meta={}):
        images, meta = self.pre_process(image_np, meta)
        images = images.to(self.opt.device,
                           non_blocking=self.opt.non_block_test)
        pre_hms, pre_inds = None, None
        if self.pre_images is None:
            self.pre_images = images
            self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in
                                    meta else [])
        if self.opt.pre_hm:
            pre_hms, pre_inds = self._get_additional_inputs(
                self.tracker.tracks, meta, with_hm=not self.opt.zero_pre_hm)

        output, dets = self.process(images, self.pre_images, pre_hms, pre_inds)
        detections = self.post_process(dets, meta)

        # Filter out detections below threshold.
        detections = [
            det for det in detections if det['score'] > self.opt.out_thresh
        ]
        torch.cuda.synchronize()
        public_det = meta['cur_dets'] if self.opt.public_det else None
        # Add tracking id to results.
        results = self.tracker.step(detections, public_det)
        self.pre_images = images
        return results

    def process(self, images, pre_images=None, pre_hms=None, pre_inds=None):
        from model.decode import generic_decode
        with torch.no_grad():
            torch.cuda.synchronize()
            output = self.model(images, pre_images, pre_hms)[-1]
            output = self._sigmoid_output(output)
            output.update({'pre_inds': pre_inds})
            if self.opt.flip_test:
                output = self._flip_output(output)
            torch.cuda.synchronize()
            dets = generic_decode(output, K=self.opt.K, opt=self.opt)
            torch.cuda.synchronize()
            for k in dets:
                dets[k] = dets[k].detach().cpu().numpy()
        return output, dets

    def pre_process(self, image, input_meta={}):
        """
        Crop, resize, and normalize image. Gather meta data for post
        processing and tracking.
        """
        from utils.image import get_affine_transform
        resized_image, c, s, inp_width, inp_height, height, width = \
            self._transform_scale(image)
        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        out_height = inp_height // self.opt.down_ratio
        out_width = inp_width // self.opt.down_ratio
        trans_output = get_affine_transform(c, s, 0, [out_width, out_height])

        inp_image = cv2.warpAffine(resized_image,
                                   trans_input, (inp_width, inp_height),
                                   flags=cv2.INTER_LINEAR)
        inp_image = ((inp_image / 255. - self.mean) / self.std).astype(
            np.float32)

        images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height,
                                                      inp_width)
        if self.opt.flip_test:
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        meta = {
            'calib': np.array(input_meta['calib'], dtype=np.float32) if 'calib'
            in input_meta else self._get_default_calib(width, height)
        }
        meta.update({
            'c': c,
            's': s,
            'height': height,
            'width': width,
            'out_height': out_height,
            'out_width': out_width,
            'inp_height': inp_height,
            'inp_width': inp_width,
            'trans_input': trans_input,
            'trans_output': trans_output
        })
        if 'pre_dets' in input_meta:
            meta['pre_dets'] = input_meta['pre_dets']
        if 'cur_dets' in input_meta:
            meta['cur_dets'] = input_meta['cur_dets']
        return images, meta

    def _get_default_calib(self, width, height):
        calib = np.array([[self.rest_focal_length, 0, width / 2, 0],
                          [0, self.rest_focal_length, height / 2, 0],
                          [0, 0, 1, 0]])
        return calib

    def _transform_scale(self, image, scale=1):
        """
        Prepare input image in different testing modes.
        Currently support: fix short size/ center crop to a fixed size/
        keep original resolution but pad to a multiplication of 32.
        """
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        if self.opt.fix_short > 0:
            if height < width:
                inp_height = self.opt.fix_short
                inp_width = (int(width / height * self.opt.fix_short) +
                             63) // 64 * 64
            else:
                inp_height = (int(height / width * self.opt.fix_short) +
                              63) // 64 * 64
                inp_width = self.opt.fix_short
            c = np.array([width / 2, height / 2], dtype=np.float32)
            s = np.array([width, height], dtype=np.float32)
        elif self.opt.fix_res:
            inp_height, inp_width = self.opt.input_h, self.opt.input_w
            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
            s = max(height, width) * 1.0
            # s = np.array([inp_width, inp_height], dtype=np.float32)
        else:
            inp_height = (new_height | self.opt.pad) + 1
            inp_width = (new_width | self.opt.pad) + 1
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)
        resized_image = cv2.resize(image, (new_width, new_height))
        return resized_image, c, s, inp_width, inp_height, height, width

    def _sigmoid_output(self, output):
        if 'hm' in output:
            output['hm'] = output['hm'].sigmoid_()
        if 'hm_hp' in output:
            output['hm_hp'] = output['hm_hp'].sigmoid_()
        if 'dep' in output:
            output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
            output['dep'] *= self.opt.depth_scale
        return output

    def post_process(self, dets, meta):
        from utils.post_process import generic_post_process
        dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']],
                                    meta['out_height'], meta['out_width'],
                                    self.opt.num_classes, [meta['calib']],
                                    meta['height'], meta['width'])
        self.this_calib = meta['calib']
        return dets[0]
Example #23
0
class Detector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        print('Creating model...')
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.pause = not opt.no_pause
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
          if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)

    def run(self, image_or_path_or_tensor, meta={}):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, track_time, tot_time, display_time = 0, 0, 0, 0
        self.debugger.clear()
        start_time = time.time()

        # read image
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []

        # for multi-scale testing
        for scale in self.opt.test_scales:
            scale_start_time = time.time()
            if not pre_processed:
                # not prefetch testing or demo
                images, meta = self.pre_process(image, scale, meta)
            else:
                # prefetch testing
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
                if 'pre_dets' in pre_processed_images['meta']:
                    meta['pre_dets'] = pre_processed_images['meta']['pre_dets']
                if 'cur_dets' in pre_processed_images['meta']:
                    meta['cur_dets'] = pre_processed_images['meta']['cur_dets']

            images = images.to(self.opt.device,
                               non_blocking=self.opt.non_block_test)

            # initializing tracker
            pre_hms, pre_inds = None, None
            if self.opt.tracking:
                # initialize the first frame
                if self.pre_images is None:
                    print('Initialize tracking!')
                    self.pre_images = images
                    self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in
                                            meta else [])
                if self.opt.pre_hm:
                    # render input heatmap from tracker status
                    # pre_inds is not used in the current version.
                    # We used pre_inds for learning an offset from previous image to
                    # the current image.
                    pre_hms, pre_inds = self._get_additional_inputs(
                        self.tracker.tracks,
                        meta,
                        with_hm=not self.opt.zero_pre_hm)

            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            # run the network
            # output: the output feature maps, only used for visualizing
            # dets: output tensors after extracting peaks
            output, dets, forward_time = self.process(images,
                                                      self.pre_images,
                                                      pre_hms,
                                                      pre_inds,
                                                      return_time=True)
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            # convert the cropped and 4x downsampled output coordinate system
            # back to the input image coordinate system
            result = self.post_process(dets, meta, scale)
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(result)
            if self.opt.debug >= 2:
                self.debug(self.debugger,
                           images,
                           result,
                           output,
                           scale,
                           pre_images=self.pre_images
                           if not self.opt.no_pre_img else None,
                           pre_hms=pre_hms)

        # merge multi-scale testing results
        results = self.merge_outputs(detections)
        if self.opt.gpus[0] >= 0:
            torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time

        if self.opt.tracking:
            # public detection mode in MOT challenge
            public_det = meta['cur_dets'] if self.opt.public_det else None
            # add tracking id to results
            results = self.tracker.step(results, public_det)
            self.pre_images = images

        tracking_time = time.time()
        track_time += tracking_time - end_time
        tot_time += tracking_time - start_time

        if self.opt.debug >= 1:
            self.show_results(self.debugger, image, results)
        self.cnt += 1

        show_results_time = time.time()
        display_time += show_results_time - end_time

        # return results and run time
        ret = {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time,
            'track': track_time,
            'display': display_time
        }
        if self.opt.save_video:
            try:
                # return debug image for saving video
                ret.update({'generic': self.debugger.imgs['generic']})
            except:
                pass
        return ret

    def _transform_scale(self, image, scale=1):
        '''
      Prepare input image in different testing modes.
        Currently support: fix short size/ center crop to a fixed size/ 
        keep original resolution but pad to a multiplication of 32
    '''
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        if self.opt.fix_short > 0:
            if height < width:
                inp_height = self.opt.fix_short
                inp_width = (int(width / height * self.opt.fix_short) +
                             63) // 64 * 64
            else:
                inp_height = (int(height / width * self.opt.fix_short) +
                              63) // 64 * 64
                inp_width = self.opt.fix_short
            c = np.array([width / 2, height / 2], dtype=np.float32)
            s = np.array([width, height], dtype=np.float32)
        elif self.opt.fix_res:
            inp_height, inp_width = self.opt.input_h, self.opt.input_w
            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
            s = max(height, width) * 1.0
            # s = np.array([inp_width, inp_height], dtype=np.float32)
        else:
            inp_height = (new_height | self.opt.pad) + 1
            inp_width = (new_width | self.opt.pad) + 1
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)
        resized_image = cv2.resize(image, (new_width, new_height))
        return resized_image, c, s, inp_width, inp_height, height, width

    def pre_process(self, image, scale, input_meta={}):
        '''
    Crop, resize, and normalize image. Gather meta data for post processing 
      and tracking.
    '''
        resized_image, c, s, inp_width, inp_height, height, width = \
          self._transform_scale(image)
        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        out_height = inp_height // self.opt.down_ratio
        out_width = inp_width // self.opt.down_ratio
        trans_output = get_affine_transform(c, s, 0, [out_width, out_height])

        inp_image = cv2.warpAffine(resized_image,
                                   trans_input, (inp_width, inp_height),
                                   flags=cv2.INTER_LINEAR)
        inp_image = ((inp_image / 255. - self.mean) / self.std).astype(
            np.float32)

        images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height,
                                                      inp_width)
        if self.opt.flip_test:
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        meta = {'calib': np.array(input_meta['calib'], dtype=np.float32) \
                 if 'calib' in input_meta else \
                 self._get_default_calib(width, height)}
        meta.update({
            'c': c,
            's': s,
            'height': height,
            'width': width,
            'out_height': out_height,
            'out_width': out_width,
            'inp_height': inp_height,
            'inp_width': inp_width,
            'trans_input': trans_input,
            'trans_output': trans_output
        })
        if 'pre_dets' in input_meta:
            meta['pre_dets'] = input_meta['pre_dets']
        if 'cur_dets' in input_meta:
            meta['cur_dets'] = input_meta['cur_dets']
        return images, meta

    def _trans_bbox(self, bbox, trans, width, height):
        '''
    Transform bounding boxes according to image crop.
    '''
        bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
        bbox[:2] = affine_transform(bbox[:2], trans)
        bbox[2:] = affine_transform(bbox[2:], trans)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
        return bbox

    def _get_additional_inputs(self, dets, meta, with_hm=True):
        '''
    Render input heatmap from previous trackings.
    '''
        trans_input, trans_output = meta['trans_input'], meta['trans_output']
        inp_width, inp_height = meta['inp_width'], meta['inp_height']
        out_width, out_height = meta['out_width'], meta['out_height']
        input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

        output_inds = []
        for det in dets:
            if det['score'] < self.opt.pre_thresh or det['active'] == 0:
                continue
            bbox = self._trans_bbox(det['bbox'], trans_input, inp_width,
                                    inp_height)
            bbox_out = self._trans_bbox(det['bbox'], trans_output, out_width,
                                        out_height)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if with_hm:
                    draw_umich_gaussian(input_hm[0], ct_int, radius)
                ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2,
                                   (bbox_out[1] + bbox_out[3]) / 2],
                                  dtype=np.int32)
                output_inds.append(ct_out[1] * out_width + ct_out[0])
        if with_hm:
            input_hm = input_hm[np.newaxis]
            if self.opt.flip_test:
                input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]),
                                          axis=0)
            input_hm = torch.from_numpy(input_hm).to(self.opt.device)
        output_inds = np.array(output_inds, np.int64).reshape(1, -1)
        output_inds = torch.from_numpy(output_inds).to(self.opt.device)
        return input_hm, output_inds

    def _get_default_calib(self, width, height):
        calib = np.array([[self.rest_focal_length, 0, width / 2, 0],
                          [0, self.rest_focal_length, height / 2, 0],
                          [0, 0, 1, 0]])
        return calib

    def _sigmoid_output(self, output):
        if 'hm' in output:
            output['hm'] = output['hm'].sigmoid_()
        if 'hm_hp' in output:
            output['hm_hp'] = output['hm_hp'].sigmoid_()
        if 'dep' in output:
            output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
            output['dep'] *= self.opt.depth_scale
        return output

    def _flip_output(self, output):
        average_flips = ['hm', 'wh', 'dep', 'dim']  ##TODO consider tracking_wh
        neg_average_flips = ['amodel_offset']
        single_flips = [
            'ltrb', 'nuscenes_att', 'velocity', 'ltrb_amodal', 'reg',
            'hp_offset', 'rot', 'tracking', 'pre_hm'
        ]  ## TODO consider iou
        for head in output:
            if head in average_flips:
                output[head] = (output[head][0:1] +
                                flip_tensor(output[head][1:2])) / 2
            if head in neg_average_flips:
                flipped_tensor = flip_tensor(output[head][1:2])
                flipped_tensor[:, 0::2] *= -1
                output[head] = (output[head][0:1] + flipped_tensor) / 2
            if head in single_flips:
                output[head] = output[head][0:1]
            if head == 'hps':
                output['hps'] = (output['hps'][0:1] + flip_lr_off(
                    output['hps'][1:2], self.flip_idx)) / 2
            if head == 'hm_hp':
                output['hm_hp'] = (output['hm_hp'][0:1] + \
                  flip_lr(output['hm_hp'][1:2], self.flip_idx)) / 2

        return output

    def process(self,
                images,
                pre_images=None,
                pre_hms=None,
                pre_inds=None,
                return_time=False):
        with torch.no_grad():
            if self.opt.gpus[0] >= 0:
                torch.cuda.synchronize()
            output = self.model(images, pre_images, pre_hms)[-1]
            output = self._sigmoid_output(output)
            output.update({'pre_inds': pre_inds})
            if self.opt.flip_test:
                output = self._flip_output(output)
            if self.opt.gpus[0] >= 0:
                torch.cuda.synchronize()
            forward_time = time.time()

            dets = generic_decode(output, K=self.opt.K, opt=self.opt)
            if self.opt.gpus[0] >= 0:
                torch.cuda.synchronize()
            for k in dets:
                dets[k] = dets[k].detach().cpu().numpy()
        if return_time:
            return output, dets, forward_time
        else:
            return output, dets

    def post_process(self, dets, meta, scale=1):
        dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']],
                                    meta['out_height'], meta['out_width'],
                                    self.opt.num_classes, [meta['calib']],
                                    meta['height'], meta['width'])
        self.this_calib = meta['calib']

        if scale != 1:
            for i in range(len(dets[0])):
                for k in ['bbox', 'hps']:
                    if k in dets[0][i]:
                        dets[0][i][k] = (np.array(dets[0][i][k], np.float32) /
                                         scale).tolist()
        return dets[0]

    def merge_outputs(self, detections):
        assert len(self.opt.test_scales) == 1, 'multi_scale not supported!'
        results = []
        for i in range(len(detections[0])):
            if detections[0][i]['score'] > self.opt.out_thresh:
                results.append(detections[0][i])
        return results

    def debug(self,
              debugger,
              images,
              dets,
              output,
              scale=1,
              pre_images=None,
              pre_hms=None):
        img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
        img = np.clip(((img * self.std + self.mean) * 255.), 0,
                      255).astype(np.uint8)
        pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
        debugger.add_blend_img(img, pred, 'pred_hm')
        if 'hm_hp' in output:
            pred = debugger.gen_colormap_hp(
                output['hm_hp'][0].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hmhp')

        if pre_images is not None:
            pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0)
            pre_img = np.clip(((pre_img * self.std + self.mean) * 255.), 0,
                              255).astype(np.uint8)
            debugger.add_img(pre_img, 'pre_img')
            if pre_hms is not None:
                pre_hm = debugger.gen_colormap(
                    pre_hms[0].detach().cpu().numpy())
                debugger.add_blend_img(pre_img, pre_hm, 'pre_hm')

    def show_results(self, debugger, image, results):
        debugger.add_img(image, img_id='generic')
        # if self.opt.tracking:
        #   debugger.add_img(self.pre_image_ori if self.pre_image_ori is not None else image,
        #     img_id='previous')
        #   self.pre_image_ori = image

        for j in range(len(results)):
            if results[j]['score'] > self.opt.vis_thresh:
                if 'active' in results[j] and results[j]['active'] == 0:
                    continue
                item = results[j]
                if ('bbox' in item):
                    sc = item['score'] if self.opt.demo == '' or \
                      not ('tracking_id' in item) else item['tracking_id']
                    sc = item[
                        'tracking_id'] if self.opt.show_track_color else sc

                    debugger.add_coco_bbox(item['bbox'],
                                           item['class'] - 1,
                                           sc,
                                           img_id='generic')

                if 'tracking' in item:
                    debugger.add_arrow(item['ct'],
                                       item['tracking'],
                                       img_id='generic')

                tracking_id = item[
                    'tracking_id'] if 'tracking_id' in item else -1
                if 'tracking_id' in item and self.opt.demo == '' and \
                  not self.opt.show_track_color:
                    debugger.add_tracking_id(item['ct'],
                                             item['tracking_id'],
                                             img_id='generic')

                if (item['class'] in [1, 2]) and 'hps' in item:
                    debugger.add_coco_hp(item['hps'],
                                         tracking_id=tracking_id,
                                         img_id='generic')

        if len(results) > 0 and \
          'dep' in results[0] and 'alpha' in results[0] and 'dim' in results[0]:
            debugger.add_3d_detection(
                image if not self.opt.qualitative else cv2.resize(
                    debugger.imgs['pred_hm'],
                    (image.shape[1], image.shape[0])),
                False,
                results,
                self.this_calib,
                vis_thresh=self.opt.vis_thresh,
                img_id='ddd_pred')
            debugger.add_bird_view(results,
                                   vis_thresh=self.opt.vis_thresh,
                                   img_id='bird_pred',
                                   cnt=self.cnt)
            if self.opt.show_track_color and self.opt.debug == 4:
                del debugger.imgs['generic'], debugger.imgs['bird_pred']
        if 'ddd_pred' in debugger.imgs:
            debugger.imgs['generic'] = debugger.imgs['ddd_pred']
        if self.opt.debug == 4:
            debugger.save_all_imgs(self.opt.debug_dir,
                                   prefix='{}'.format(self.cnt))
        else:
            debugger.show_all_imgs(pause=self.pause)

    def reset_tracking(self):
        self.tracker.reset()
        self.pre_images = None
        self.pre_image_ori = None
Example #24
0
def main(opt):

    if opt.verbose:
        print("------------------------")
        print("RUNNING SET UP")
        print("------------------------")
    tf.logging.set_verbosity(40)
    random.seed(0)
    Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor
    os.makedirs(opt.output_folder, exist_ok=True)
    if opt.LSTM:
        opt.max_cosine_distance = 1
        lstm = CombiLSTM()
        checkpoint = torch.load(opt.lstm_model)
        lstm.load_state_dict(checkpoint['state_dict'])
        if opt.using_cuda:
            lstm.cuda()
        lstm.eval()
    else:
        lstm = None
    if opt.combine_features:
        combination_model = CombiNet()
        checkpoint = torch.load(opt.combo_model)
        combination_model.load_state_dict(checkpoint['state_dict'])
        if opt.using_cuda:
            combination_model.cuda()
        combination_model.eval()
    else:
        combination_model = None
    
    dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn)
    appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda)
    if opt.point_cloud:
        depth_model = create_depth_model(opt.depth_model, opt.depth_config_path)
    if opt.track_3d:
        tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
                        matching_strategy=opt.matching_strategy,
                        gate_full_state=opt.gate_full_state,
                        kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight,
                                       opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty),
                        calib=dataset.calib,
                        dummy_node_cost_iou=opt.dummy_node_cost_iou,
                        dummy_node_cost_app=opt.dummy_node_cost_app,
                        nn_budget=opt.nn_budget,
                        use_imm=opt.use_imm,
                        uncertainty_limit=opt.uncertainty_limit,
                        gate_limit=opt.gate_limit,
                        omni=opt.omni)
    else:
        tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol,
                        max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh,
                        matching_strategy=opt.matching_strategy,
                        kf_appearance_feature=opt.kf_appearance_feature,
                        gate_full_state=opt.gate_full_state,
                        kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
                        kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty),
                        calib=dataset.calib,
                        dummy_node_cost_iou=opt.dummy_node_cost_iou,
                        dummy_node_cost_app=opt.dummy_node_cost_app,
                        nn_budget=opt.nn_budget,
                        use_imm=opt.use_imm,
                        uncertainty_limit=opt.uncertainty_limit,
                        optical_flow=opt.optical_flow_initiation,
                        gate_limit=opt.gate_limit)

    results = []
    results_3d = []
    n_frames = len(dataloader)
    if opt.log_data:
        full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)]
    det_matrix = None
    seq_name = os.path.split(opt.sequence_folder)[-1]

    frame_times = []
    if opt.verbose:
        print("------------------------")
        print("BEGINNING TRACKING OF SEQUENCE %s"%seq_name)
        print("------------------------")
    for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose):
        # if frame_idx > 120:
        #     break
        # elif frame_idx < 98:
        #     continue

        if opt.log_data:
            full_log[frame_idx]['img_path'] = copy.copy(img_path)
        input_img = input_img.type(Tensor)
        if opt.reference:
            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh)
        elif opt.ground_truth:
            detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh)
        else:
            raise("Must specify ground truth or detections")

        # --- START OF TRACKING ---
        # start_time = time.time()
        if detections is None or len(detections)==0:
            tracker.predict()
            if opt.log_data:
                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
            start_time = time.time()
            tracker.update(input_img, [])
        else:
            total_dets = len(detections)
            patches = get_image_patches(input_img, detections)
            appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids)
            if opt.point_cloud:
                if not opt.omni:
                    point_cloud = point_cloud[point_cloud[:,2]>=0]
                if opt.fpointnet:
                    boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model, 
                                                                        detections, np.asarray(point_cloud), 
                                                                        dataset.calib, input_img.shape,
                                                                        peds='ped' in opt.ref_det or opt.omni)
                    depth_features = convert_depth_features(depth_features, valid_3d)
                else:
                    boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx)        
                features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight)
                # boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box
                # detections = detections[valid_3d != -1]
                if np.any(valid_3d == -1):
                    compare_2d = True
                else:
                    compare_2d = False
                if len(boxes_3d) > 0:
                    detections_3d = []
                    for idx, box in enumerate(boxes_3d):
                        if valid_3d[idx] == -1:
                            detections_3d.append(None)
                        else:
                            detections_3d.append(np.array(box).astype(np.float32))
                else:
                    detections_3d = None
            else:
                appearance_features = [appearance_features[i] for i in range(total_dets)]
                features = [None]*len(appearance_features)
                compare_2d = True
                detections_3d = None
            detections = convert_detections(detections, features, appearance_features, detections_3d)
            tracker.predict()
            if opt.log_data:
                full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks)
            start_time = time.time()
            tracker.update(input_img, detections, compare_2d)

        # --- END OF TRACKING ---
        end_time = time.time()
        frame_times.append(end_time - start_time)


        if opt.log_data:
            full_tracks = copy.deepcopy(tracker.tracks)
            temp_tracks = []
            for track in full_tracks:
                bbox = track.to_tlwh(None)
                if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10):
                    temp_tracks.append(track)
            full_log[frame_idx]['tracks'] = temp_tracks
            full_log[frame_idx]['detections'] = copy.deepcopy(detections)

        for track in tracker.tracks:
            if opt.track_3d:
                bbox_3d = track.to_tlwh3d()
            else:
                bbox = track.to_tlwh(None)
            if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10:
                continue
            bbox[0] = max(0,bbox[0]) # Frame adjustments
            bbox[1] = max(0,bbox[1])
            bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0]
            bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1]

            track_status = 1
            if not track.is_confirmed(): # or track.time_since_update > 0:
                if opt.near_online:
                    if not track.is_confirmed():
                         track_status = 0
                    else:
                         track_status = 2
                         continue
                else:
                    continue
            if opt.near_online:
                if opt.track_3d:
                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status])
                else:
                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status])

                if track_status == 1: #updates 0s
                    for row_i in range(len(results)):
                        if results[row_i][1] == track.track_id:
                            results[row_i][6] = 1
                        if opt.point_cloud:
                            if results_3d[row_i][1] == track.track_id:
                                results_3d[row_i][7] = 1
            else:
                if opt.track_3d:
                    results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]])
                else:
                    results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
                # if opt.point_cloud:

    frame_times = np.asarray(frame_times)
    if opt.verbose:
        print("------------------------")
        print("COMPLETED TRACKING, SAVING RESULTS")
        print("------------------------")
        print('\n\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times))

    if opt.track_3d:
        output_file_3d = os.path.join(opt.output_folder, seq_name+"_3d.txt")
        if len(results_3d) > 0:
            with open(output_file_3d, 'w+') as f:
                for row in results_3d:
                    if opt.near_online and row[9] != 1:
                        continue
                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % (
                        row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f)
    else:
        output_file = os.path.join(opt.output_folder, seq_name+".txt")
        if len(results) > 0:
            with open(output_file, 'w+') as f:
                for row in results:
                    if opt.near_online and row[6] != 1:
                        continue
                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % (
                        row[0], row[1], row[2], row[3], row[4], row[5]), file=f)

    if opt.log_data:
        output_file = os.path.join(opt.output_folder, seq_name+".p")
        with open(output_file, 'wb') as f:
            pickle.dump(full_log, f)
Example #25
0
    def __init__(self, camera_stream, obstacle_tracking_stream, flags,
                 camera_setup):
        from dataset.dataset_factory import get_dataset
        from model.model import create_model, load_model
        from opts import opts
        from utils.tracker import Tracker

        camera_stream.add_callback(self.on_frame_msg,
                                   [obstacle_tracking_stream])
        self._flags = flags
        self._logger = erdos.utils.setup_logging(self.config.name,
                                                 self.config.log_file_name)
        self._csv_logger = erdos.utils.setup_csv_logging(
            self.config.name + '-csv', self.config.csv_log_file_name)
        self._camera_setup = camera_setup
        # TODO(ionel): Might have to filter labels when running with a coco
        # and a nuscenes model.
        num_classes = {
            'kitti_tracking': 3,
            'coco': 90,
            'mot': 1,
            'nuscenes': 10
        }
        # Other flags:
        # 1) --K ; max number of output objects.
        # 2) --fix_short ; resizes the height of the image to fix short, and
        # the width such the aspect ratio is maintained.
        # 3) --pre_hm ; pre heat map.
        # 4) --input_w; str(camera_setup.width)
        # 5) --input_h; str(camera_setup.height)
        args = [
            'tracking', '--load_model', flags.center_track_model_path,
            '--dataset', flags.center_track_model, '--test_focal_length',
            str(int(camera_setup.get_focal_length())), '--out_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--pre_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--new_thresh',
            str(flags.obstacle_detection_min_score_threshold),
            '--track_thresh',
            str(flags.obstacle_detection_min_score_threshold), '--max_age',
            str(flags.obstacle_track_max_age), '--num_classes',
            str(num_classes[flags.center_track_model]), '--tracking',
            '--hungarian'
        ]
        opt = opts().init(args)
        gpu = True
        if gpu:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.opt = opt
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(self.opt.device)
        self.model.eval()

        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
            if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)