def compute_track(video): track = [] proposals = [] object_detected = False kalman_filter = None for t, im in enumerate(video): scores, bboxes = im_detect(sess, net, im) bboxes, scores = filter_proposals(scores, bboxes, nms_thresh=NMS_THRESH, conf_thresh=CONF_THRESH) affinities = np.zeros(scores.shape) if not object_detected: if bboxes.shape[0] == 0: track.append(np.zeros(6)) proposals.append(np.zeros((1, 6))) continue else: object_detected = True i = np.argmax(scores) xhat_0 = bboxes[i] kalman_filter = KalmanFilter(xhat_0) track.append( np.concatenate((bboxes[i], scores[i], affinities[i]))) proposals.append( np.concatenate([bboxes, scores, affinities], axis=1)) continue xhat, P = kalman_filter.update_time() score, affinity = np.zeros(1), np.zeros(1) if bboxes.shape[0] > 0: track_im = crop_and_resize(im, track[-1][:4]) bbox_ims = [crop_and_resize(im, bbox) for bbox in bboxes] affinities = [ get_affinity(track_im, bbox_im) for bbox_im in bbox_ims ] affinities = np.reshape(affinities, scores.shape) i, aff = np.argmax(affinities), np.max(affinities) if aff > AFF_THRESH: xhat, P = kalman_filter.update_measurement(bboxes[i][:4]) score, affinity = scores[i], affinities[i] track.append(np.concatenate((xhat, score, affinity))) proposals.append(np.concatenate((bboxes, scores, affinities), axis=1)) return track, proposals