class Camera(multiproc.context.Process): def __init__(self, stream, queue, track_prefix="", simulation_file=None): multiproc.context.Process.__init__(self) self.queue = queue self.track_prefix = track_prefix if simulation_file is None: # Definition of the parameters max_cosine_distance = 0.5 # Initialize deepsort model_filename = 'networks//mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, None) # Initialize Object Detector and Tracker video_width = int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)) # float video_height = int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT)) detector = Detector(video_width, video_height) self.tracker = Tracker(metric, detector, encoder) self.stream = stream self.run = self.track else: self.simulation_file = simulation_file self.run = self.simulate def track(self): frame_id = 0 while True: ret, frame = self.stream.read() if not ret: break tracks = self.tracker.consume(frame) self.queue.put((frame_id, { "%s%d" % (self.track_prefix, t): bbox for t, bbox in tracks.items() })) frame_id += 1 self.queue.put((-1, None)) def get_tracks(self): return self.queue.get() def simulate(self): with open(self.simulation_file, "rb") as f: tracks_dict = pickle.load(f) for frame_id, tracks in tracks_dict.items(): self.queue.put((frame_id, { "%s%d" % (self.track_prefix, t): bbox for t, bbox in tracks.items() })) self.queue.put((-1, {}))
def annotate_video(video_path): video_prefix = video_path.split('.')[0] video_name = video_prefix.split('/')[-1] view = video_name.split('-')[-1] output_dict = { "videoFileName": video_name, "fullVideoFilePath": video_path, "stepSize": 0.1, "config": { "stepSize": 0.1, "playbackRate": 0.4, "imageMimeType": "image/jpeg", "imageExtension": ".jpg", "framesZipFilename": "extracted-frames.zip", "consoleLog": "0" }, "objects": [] } # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deepsort model_filename = 'networks//mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) ## Initialize Object Detector and Tracker cap = cv2.VideoCapture(video_path) w, h, video_fps = int(cap.get(3)), int(cap.get(4)), cap.get(5) det = Detector(w, h) deep_sort_tracker = Tracker(metric, det, encoder) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(f"{video_prefix}-annotated.mp4", fourcc, video_fps, (w, h)) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) total_frames = 0 pbar = tqdm(total=length, file=sys.stdout) mask_filter = cv2.imread(f"./yolov4/masks/{mask_map[view]}-maskfilter.png") if mask_filter.shape[:2] != (h, w): mask_filter = cv2.resize(mask_filter, (w, h)) id_frames = defaultdict(list) ## Read frames from stream while (True): ret, frame = cap.read() if not ret: break ## Tracker consumes a frame and spits out an annotated_frame if mask_filter is not None: frame = cv2.bitwise_and(frame, mask_filter) annotated_frame, bbox_dict = deep_sort_tracker.consume(frame) ## Save the annotated frame #out.write(annotated_frame) pbar.update(1) for pig_id in bbox_dict: xmin, ymin, xmax, ymax = bbox_dict[pig_id] x = int((xmin + xmax) / 2) y = int((ymin + ymax) / 2) width, height = int(xmax - xmin), int(ymax - ymin) id_frames[pig_id].append({ "frameNumber": total_frames, "bbox": { "x": x, "y": y, "width": width, "height": height }, "isGroundTruth": "1", "visible": "1", "behaviour": "other" }) total_frames += 1 if total_frames == 100: break out.release() cap.release() del det, deep_sort_tracker for pig_id, frames in id_frames.items(): output_dict["objects"].append({"frames": frames, "id": pig_id}) with open(f"{video_prefix}.json", "w") as f: json.dump(output_dict, f)