def poll(self): path = os.path.join(self.directory, self.getFilterMask()) print(f"Polling: {path}") files = glob.glob(path) files.sort() if not files: return if files != self.files: was_on_last_image = False if self.video is None: was_on_last_image = True self.show() elif self.state["frame_idx"] == self.video.last_frame_idx: was_on_last_image = True self.files = files self.video = Video.from_image_filenames(filenames=files) self.load_video(video=self.video) if was_on_last_image: self.state["frame_idx"] = self.video.last_frame_idx elif self.state["frame_idx"]: self.state["frame_idx"] = min(self.state["frame_idx"], self.video.last_frame_idx)
def poll(self): """Re-scans directory (using current filter) and updates widget.""" path = os.path.join(self.directory, self._current_filter_mask) print(f"Polling: {path}") files = glob.glob(path) files.sort() if not files: return if files != self.files: was_on_last_image = False if self.video is None: was_on_last_image = True self.show() elif self.state["frame_idx"] == self.video.last_frame_idx: was_on_last_image = True self.files = files self.video = Video.from_image_filenames(filenames=files) self.load_video(video=self.video) if was_on_last_image: self.state["frame_idx"] = self.video.last_frame_idx elif self.state["frame_idx"]: self.state["frame_idx"] = min(self.state["frame_idx"], self.video.last_frame_idx)
def read(cls, file: FileHandle, *args, **kwargs,) -> Labels: filename = file.filename # Load data from the YAML file project_data = yaml.load(file.text, Loader=yaml.SafeLoader) # Create skeleton which we'll use for each video skeleton = Skeleton() skeleton.add_nodes(project_data["bodyparts"]) # Get subdirectories of videos and labeled data root_dir = os.path.dirname(filename) videos_dir = os.path.join(root_dir, "videos") labeled_data_dir = os.path.join(root_dir, "labeled-data") with os.scandir(labeled_data_dir) as file_iterator: data_subdirs = [file.path for file in file_iterator if file.is_dir()] labeled_frames = [] # Each subdirectory of labeled data corresponds to a video. # We'll go through each and import the labeled frames. for data_subdir in data_subdirs: csv_files = find_files_by_suffix( data_subdir, prefix="CollectedData", suffix=".csv" ) if csv_files: csv_path = csv_files[0] # Try to find a full video corresponding to this subdir. # If subdirectory is foo, we look for foo.mp4 in videos dir. shortname = os.path.split(data_subdir)[-1] video_path = os.path.join(videos_dir, f"{shortname}.mp4") if os.path.exists(video_path): video = Video.from_filename(video_path) else: # When no video is found, the individual frame images # stored in the labeled data subdir will be used. print( f"Unable to find {video_path} so using individual frame images." ) video = None # Import the labeled fraems labeled_frames.extend( LabelsDeepLabCutCsvAdaptor.read_frames( FileHandle(csv_path), full_video=video, skeleton=skeleton ) ) else: print(f"No csv data file found in {data_subdir}") return Labels(labeled_frames=labeled_frames)
def show_datagen_preview(labels: Labels, config_info_list: List[ConfigFileInfo]): """ Shows window(s) with preview images of training data for model configs. """ labels_reader = LabelsReader.from_user_instances(labels) win_x = 300 def show_win(results: dict, key: Text, head_name: Text, video: Video, scale_to_height=None): nonlocal win_x scale = None if scale_to_height: overlay_height = results[key].shape[ 1] # frames, height, width, channels scale = scale_to_height // overlay_height if key == "confmap": win = demo_confmaps(results[key], video, scale=scale) elif key == "paf": win = demo_pafs(results[key], video, scale=scale, decimation=2) else: raise ValueError(f"Cannot show preview window for {key}") win.activateWindow() win.setWindowTitle(f"{head_name} {key}") win.resize(400, 400) win.move(win_x, 300) win_x += 420 for cfg_info in config_info_list: results = make_datagen_results(labels_reader, cfg_info.config) if "image" in results: vid = Video.from_numpy(results["image"]) if "confmap" in results: show_win( results, "confmap", cfg_info.head_name, vid, scale_to_height=vid.height, ) if "paf" in results: show_win(results, "paf", cfg_info.head_name, vid, scale_to_height=vid.height)
def demo_receptive_field(): app = QtWidgets.QApplication([]) video = Video.from_filename("tests/data/videos/centered_pair_small.mp4") win = ReceptiveFieldImageWidget() win.setImage(video.get_frame(0)) win._set_field_size(50) win.show() app.exec_()
def make_video_for_image_list(cls, image_dir, filenames) -> Video: """Creates a Video object from frame images.""" # the image filenames in the csv may not match where the user has them # so we'll change the directory to match where the user has the csv def fix_img_path(img_dir, img_filename): img_filename = img_filename.replace("\\", "/") img_filename = os.path.basename(img_filename) img_filename = os.path.join(img_dir, img_filename) return img_filename filenames = list(map(lambda f: fix_img_path(image_dir, f), filenames)) return Video.from_image_filenames(filenames)
def read( cls, file: FileHandle, video_path: str, skeleton_path: str, *args, **kwargs, ) -> Labels: f = file.file video = Video.from_filename(video_path) skeleton_data = pd.read_csv(skeleton_path, header=0) skeleton = Skeleton() skeleton.add_nodes(skeleton_data["name"]) nodes = skeleton.nodes for name, parent, swap in skeleton_data.itertuples(index=False, name=None): if parent is not np.nan: skeleton.add_edge(parent, name) lfs = [] pose_matrix = f["pose"][:] track_count, frame_count, node_count, _ = pose_matrix.shape tracks = [Track(0, f"Track {i}") for i in range(track_count)] for frame_idx in range(frame_count): lf_instances = [] for track_idx in range(track_count): points_array = pose_matrix[track_idx, frame_idx, :, :] points = dict() for p in range(len(points_array)): x, y, score = points_array[p] points[nodes[p]] = Point(x, y) # TODO: score inst = Instance(skeleton=skeleton, track=tracks[track_idx], points=points) lf_instances.append(inst) lfs.append( LabeledFrame(video, frame_idx=frame_idx, instances=lf_instances)) return Labels(labeled_frames=lfs)
def read( cls, file: FileHandle, video: Union[Video, str], *args, **kwargs, ) -> Labels: connect_adj_nodes = False if video is None: raise ValueError( "Cannot read analysis hdf5 if no video specified.") if not isinstance(video, Video): video = Video.from_filename(video) f = file.file tracks_matrix = f["tracks"][:].T track_names_list = f["track_names"][:].T node_names_list = f["node_names"][:].T # shape: frames * nodes * 2 * tracks frame_count, node_count, _, track_count = tracks_matrix.shape tracks = [ Track(0, track_name.decode()) for track_name in track_names_list ] skeleton = Skeleton() last_node_name = None for node_name in node_names_list: node_name = node_name.decode() skeleton.add_node(node_name) if connect_adj_nodes and last_node_name: skeleton.add_edge(last_node_name, node_name) last_node_name = node_name frames = [] for frame_idx in range(frame_count): instances = [] for track_idx in range(track_count): points = tracks_matrix[frame_idx, ..., track_idx] if not np.all(np.isnan(points)): point_scores = np.ones(len(points)) # make everything a PredictedInstance since the usual use # case is to export predictions for analysis instances.append( PredictedInstance.from_arrays( points=points, point_confidences=point_scores, skeleton=skeleton, track=tracks[track_idx], instance_score=1, )) if instances: frames.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=instances)) return Labels(labeled_frames=frames)
def from_json_data(cls, data: Union[str, dict], match_to: Optional["Labels"] = None) -> "Labels": """ Create instance of class from data in dictionary. Method is used by other methods that load from JSON. Args: data: Dictionary, deserialized from JSON. match_to: If given, we'll replace particular objects in the data dictionary with *matching* objects in the match_to :class:`Labels` object. This ensures that the newly instantiated :class:`Labels` can be merged without duplicate matching objects (e.g., :class:`Video` objects ). Returns: A new :class:`Labels` object. """ # Parse the json string if needed. if type(data) is str: dicts = json_loads(data) else: dicts = data dicts["tracks"] = dicts.get( "tracks", []) # don't break if json doesn't include tracks # First, deserialize the skeletons, videos, and nodes lists. # The labels reference these so we will need them while deserializing. nodes = cattr.structure(dicts["nodes"], List[Node]) idx_to_node = {i: nodes[i] for i in range(len(nodes))} skeletons = Skeleton.make_cattr(idx_to_node).structure( dicts["skeletons"], List[Skeleton]) videos = Video.cattr().structure(dicts["videos"], List[Video]) try: # First try unstructuring tuple (newer format) track_cattr = cattr.Converter( unstruct_strat=cattr.UnstructureStrategy.AS_TUPLE) tracks = track_cattr.structure(dicts["tracks"], List[Track]) except: # Then try unstructuring dict (older format) try: tracks = cattr.structure(dicts["tracks"], List[Track]) except: raise ValueError("Unable to load tracks as tuple or dict!") # if we're given a Labels object to match, use its objects when they match if match_to is not None: for idx, sk in enumerate(skeletons): for old_sk in match_to.skeletons: if sk.matches(old_sk): # use nodes from matched skeleton for (node, match_node) in zip(sk.nodes, old_sk.nodes): node_idx = nodes.index(node) nodes[node_idx] = match_node # use skeleton from match skeletons[idx] = old_sk break for idx, vid in enumerate(videos): for old_vid in match_to.videos: # Try to match videos using either their current or source filename # if available. old_vid_paths = [old_vid.filename] if getattr(old_vid.backend, "has_embedded_images", False): old_vid_paths.append( old_vid.backend._source_video.filename) new_vid_paths = [vid.filename] if getattr(vid.backend, "has_embedded_images", False): new_vid_paths.append( vid.backend._source_video.filename) is_match = False for old_vid_path in old_vid_paths: for new_vid_path in new_vid_paths: if old_vid_path == new_vid_path or weak_filename_match( old_vid_path, new_vid_path): is_match = True videos[idx] = old_vid break if is_match: break if is_match: break suggestions = [] if "suggestions" in dicts: suggestions_cattr = cattr.Converter() suggestions_cattr.register_structure_hook( Video, lambda x, type: videos[int(x)]) try: suggestions = suggestions_cattr.structure( dicts["suggestions"], List[SuggestionFrame]) except Exception as e: print("Error while loading suggestions (1)") print(e) try: # Convert old suggestion format to new format. # Old format: {video: list of frame indices} # New format: [SuggestionFrames] old_suggestions = suggestions_cattr.structure( dicts["suggestions"], Dict[Video, List]) for video in old_suggestions.keys(): suggestions.extend([ SuggestionFrame(video, idx) for idx in old_suggestions[video] ]) except Exception as e: print("Error while loading suggestions (2)") print(e) pass if "negative_anchors" in dicts: negative_anchors_cattr = cattr.Converter() negative_anchors_cattr.register_structure_hook( Video, lambda x, type: videos[int(x)]) negative_anchors = negative_anchors_cattr.structure( dicts["negative_anchors"], Dict[Video, List]) else: negative_anchors = dict() if "provenance" in dicts: provenance = dicts["provenance"] else: provenance = dict() # If there is actual labels data, get it. if "labels" in dicts: label_cattr = make_instance_cattr() label_cattr.register_structure_hook( Skeleton, lambda x, type: skeletons[int(x)]) label_cattr.register_structure_hook(Video, lambda x, type: videos[int(x)]) label_cattr.register_structure_hook( Node, lambda x, type: x if isinstance(x, Node) else nodes[int(x)]) label_cattr.register_structure_hook( Track, lambda x, type: None if x is None else tracks[int(x)]) labels = label_cattr.structure(dicts["labels"], List[LabeledFrame]) else: labels = [] return Labels( labeled_frames=labels, videos=videos, skeletons=skeletons, nodes=nodes, suggestions=suggestions, negative_anchors=negative_anchors, tracks=tracks, provenance=provenance, )
def write( cls, filename: str, source_object: str, compress: Optional[bool] = None, save_frame_data: bool = False, frame_data_format: str = "png", ): """ Save a Labels instance to a JSON format. Args: filename: The filename to save the data to. source_object: The labels dataset to save. compress: Whether the data be zip compressed or not? If True, the JSON will be compressed using Python's shutil.make_archive command into a PKZIP zip file. If compress is True then filename will have a .zip appended to it. save_frame_data: Whether to save the image data for each frame. For each video in the dataset, all frames that have labels will be stored as an imgstore dataset. If save_frame_data is True then compress will be forced to True since the archive must contain both the JSON data and image data stored in ImgStores. frame_data_format: If save_frame_data is True, then this argument is used to set the data format to use when writing frame data to ImgStore objects. Supported formats should be: * 'pgm', * 'bmp', * 'ppm', * 'tif', * 'png', * 'jpg', * 'npy', * 'mjpeg/avi', * 'h264/mkv', * 'avc1/mp4' Note: 'h264/mkv' and 'avc1/mp4' require separate installation of these codecs on your system. They are excluded from SLEAP because of their GPL license. Returns: None """ labels = source_object if compress is None: compress = filename.endswith(".zip") # Lets make a temporary directory to store the image frame data or pre-compressed json # in case we need it. with tempfile.TemporaryDirectory() as tmp_dir: # If we are saving frame data along with the datasets. We will replace videos with # new video object that represent video data from just the labeled frames. if save_frame_data: # Create a set of new Video objects with imgstore backends. One for each # of the videos. We will only include the labeled frames though. We will # then replace each video with this new video new_videos = labels.save_frame_data_imgstore( output_dir=tmp_dir, format=frame_data_format) # Make video paths relative for vid in new_videos: tmp_path = vid.filename # Get the parent dir of the YAML file. # Use "/" since this works on Windows and posix img_store_dir = ( os.path.basename(os.path.split(tmp_path)[0]) + "/" + os.path.basename(tmp_path)) # Change to relative path vid.backend.filename = img_store_dir # Convert to a dict, not JSON yet, because we need to patch up the videos d = labels.to_dict() d["videos"] = Video.cattr().unstructure(new_videos) else: d = labels.to_dict() # Set file format version d["format_id"] = cls.FORMAT_ID if compress or save_frame_data: # Ensure that filename ends with .json # shutil will append .zip filename = re.sub("(\\.json)?(\\.zip)?$", ".json", filename) # Write the json to the tmp directory, we will zip it up with the frame data. full_out_filename = os.path.join(tmp_dir, os.path.basename(filename)) json_dumps(d, full_out_filename) # Create the archive shutil.make_archive(base_name=filename, root_dir=tmp_dir, format="zip") # If the user doesn't want to compress, then just write the json to the filename else: json_dumps(d, filename)
def write( cls, filename: str, source_object: object, append: bool = False, save_frame_data: bool = False, frame_data_format: str = "png", all_labeled: bool = False, suggested: bool = False, ): labels = source_object # Delete the file if it exists, we want to start from scratch since # h5py truncates the file which seems to not actually delete data # from the file. Don't if we are appending of course. if os.path.exists(filename) and not append: os.unlink(filename) # Serialize all the meta-data to JSON. d = labels.to_dict(skip_labels=True) if save_frame_data: new_videos = labels.save_frame_data_hdf5( filename, format=frame_data_format, user_labeled=True, all_labeled=all_labeled, suggested=suggested, ) # Replace path to video file with "." (which indicates that the # video is in the same file as the HDF5 labels dataset). # Otherwise, the video paths will break if the HDF5 labels # dataset file is moved. for vid in new_videos: vid.backend.filename = "." d["videos"] = Video.cattr().unstructure(new_videos) with h5py.File(filename, "a") as f: # Add all the JSON metadata meta_group = f.require_group("metadata") meta_group.attrs["format_id"] = cls.FORMAT_ID # If we are appending and there already exists JSON metadata if append and "json" in meta_group.attrs: # Otherwise, we need to read the JSON and append to the lists old_labels = labels_json.LabelsJsonAdaptor.from_json_data( meta_group.attrs["json"].tostring().decode() ) # A function to join to list but only include new non-dupe entries # from the right hand list. def append_unique(old, new): unique = [] for x in new: try: matches = [y.matches(x) for y in old] except AttributeError: matches = [x == y for y in old] # If there were no matches, this is a unique object. if sum(matches) == 0: unique.append(x) else: # If we have an object that matches, replace the instance # with the one from the new list. This will will make sure # objects on the Instances are the same as those in the # Labels lists. for i, match in enumerate(matches): if match: old[i] = x return old + unique # Append the lists labels.tracks = append_unique(old_labels.tracks, labels.tracks) labels.skeletons = append_unique(old_labels.skeletons, labels.skeletons) labels.videos = append_unique(old_labels.videos, labels.videos) labels.nodes = append_unique(old_labels.nodes, labels.nodes) # FIXME: Do something for suggestions and negative_anchors # Get the dict for JSON and save it over the old data d = labels.to_dict(skip_labels=True) if not append: # These items are stored in separate lists because the metadata # group got to be too big. for key in ("videos", "tracks", "suggestions"): # Convert for saving in hdf5 dataset data = [np.string_(json_dumps(item)) for item in d[key]] hdf5_key = f"{key}_json" # Save in its own dataset (e.g., videos_json) f.create_dataset(hdf5_key, data=data, maxshape=(None,)) # Clear from dict since we don't want to save this in attribute d[key] = [] # Output the dict to JSON meta_group.attrs["json"] = np.string_(json_dumps(d)) # FIXME: We can probably construct these from attrs fields # We will store Instances and PredcitedInstances in the same # table. instance_type=0 or Instance and instance_type=1 for # PredictedInstance, score will be ignored for Instances. instance_dtype = np.dtype( [ ("instance_id", "i8"), ("instance_type", "u1"), ("frame_id", "u8"), ("skeleton", "u4"), ("track", "i4"), ("from_predicted", "i8"), ("score", "f4"), ("point_id_start", "u8"), ("point_id_end", "u8"), ] ) frame_dtype = np.dtype( [ ("frame_id", "u8"), ("video", "u4"), ("frame_idx", "u8"), ("instance_id_start", "u8"), ("instance_id_end", "u8"), ] ) num_instances = len(labels.all_instances) max_skeleton_size = max([len(s.nodes) for s in labels.skeletons], default=0) # Initialize data arrays for serialization points = np.zeros(num_instances * max_skeleton_size, dtype=Point.dtype) pred_points = np.zeros( num_instances * max_skeleton_size, dtype=PredictedPoint.dtype ) instances = np.zeros(num_instances, dtype=instance_dtype) frames = np.zeros(len(labels), dtype=frame_dtype) # Pre compute some structures to make serialization faster skeleton_to_idx = { skeleton: labels.skeletons.index(skeleton) for skeleton in labels.skeletons } track_to_idx = { track: labels.tracks.index(track) for track in labels.tracks } track_to_idx[None] = -1 video_to_idx = { video: labels.videos.index(video) for video in labels.videos } instance_type_to_idx = {Instance: 0, PredictedInstance: 1} # Each instance we create will have and index in the dataset, keep track of # these so we can quickly add from_predicted links on a second pass. instance_to_idx = {} instances_with_from_predicted = [] instances_from_predicted = [] # If we are appending, we need look inside to see what frame, instance, and # point ids we need to start from. This gives us offsets to use. if append and "points" in f: point_id_offset = f["points"].shape[0] pred_point_id_offset = f["pred_points"].shape[0] instance_id_offset = f["instances"][-1]["instance_id"] + 1 frame_id_offset = int(f["frames"][-1]["frame_id"]) + 1 else: point_id_offset = 0 pred_point_id_offset = 0 instance_id_offset = 0 frame_id_offset = 0 point_id = 0 pred_point_id = 0 instance_id = 0 for frame_id, label in enumerate(labels): frames[frame_id] = ( frame_id + frame_id_offset, video_to_idx[label.video], label.frame_idx, instance_id + instance_id_offset, instance_id + instance_id_offset + len(label.instances), ) for instance in label.instances: # Add this instance to our lookup structure we will need for # from_predicted links instance_to_idx[instance] = instance_id parray = instance.get_points_array(copy=False, full=True) instance_type = type(instance) # Check whether we are working with a PredictedInstance or an # Instance. if instance_type is PredictedInstance: score = instance.score pid = pred_point_id + pred_point_id_offset else: score = np.nan pid = point_id + point_id_offset # Keep track of any from_predicted instance links, we will # insert the correct instance_id in the dataset after we are # done. if instance.from_predicted: instances_with_from_predicted.append(instance_id) instances_from_predicted.append(instance.from_predicted) # Copy all the data instances[instance_id] = ( instance_id + instance_id_offset, instance_type_to_idx[instance_type], frame_id, skeleton_to_idx[instance.skeleton], track_to_idx[instance.track], -1, score, pid, pid + len(parray), ) # If these are predicted points, copy them to the predicted point # array otherwise, use the normal point array if type(parray) is PredictedPointArray: pred_points[ pred_point_id : (pred_point_id + len(parray)) ] = parray pred_point_id = pred_point_id + len(parray) else: points[point_id : (point_id + len(parray))] = parray point_id = point_id + len(parray) instance_id = instance_id + 1 # Add from_predicted links for instance_id, from_predicted in zip( instances_with_from_predicted, instances_from_predicted ): try: instances[instance_id]["from_predicted"] = instance_to_idx[ from_predicted ] except KeyError: # If we haven't encountered the from_predicted instance yet then # don't save the link. It's possible for a user to create a regular # instance from a predicted instance and then delete all predicted # instances from the file, but in this case I don’t think there's # any reason to remember which predicted instance the regular # instance came from. pass # We pre-allocated our points array with max possible size considering the # max skeleton size, drop any unused points. points = points[0:point_id] pred_points = pred_points[0:pred_point_id] # Create datasets if we need to if append and "points" in f: f["points"].resize((f["points"].shape[0] + points.shape[0]), axis=0) f["points"][-points.shape[0] :] = points f["pred_points"].resize( (f["pred_points"].shape[0] + pred_points.shape[0]), axis=0 ) f["pred_points"][-pred_points.shape[0] :] = pred_points f["instances"].resize( (f["instances"].shape[0] + instances.shape[0]), axis=0 ) f["instances"][-instances.shape[0] :] = instances f["frames"].resize((f["frames"].shape[0] + frames.shape[0]), axis=0) f["frames"][-frames.shape[0] :] = frames else: f.create_dataset( "points", data=points, maxshape=(None,), dtype=Point.dtype ) f.create_dataset( "pred_points", data=pred_points, maxshape=(None,), dtype=PredictedPoint.dtype, ) f.create_dataset( "instances", data=instances, maxshape=(None,), dtype=instance_dtype ) f.create_dataset( "frames", data=frames, maxshape=(None,), dtype=frame_dtype )
def read( cls, file: FileHandle, img_dir: str, use_missing_gui: bool = False, *args, **kwargs, ) -> Labels: dicts = file.json # Make skeletons from "categories" skeleton_map = dict() for category in dicts["categories"]: skeleton = Skeleton(name=category["name"]) skeleton_id = category["id"] node_names = category["keypoints"] skeleton.add_nodes(node_names) try: for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx], node_names[dst_idx]) except IndexError as e: # According to the COCO data format specifications[^1], the edges # are supposed to be 1-indexed. But in some of their own # dataset the edges are 1-indexed! So we'll try. # [1]: http://cocodataset.org/#format-data # Clear any edges we already created using 0-indexing skeleton.clear_edges() # Add edges for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx - 1], node_names[dst_idx - 1]) skeleton_map[skeleton_id] = skeleton # Make videos from "images" # Remove images that aren't referenced in the annotations img_refs = [annotation["image_id"] for annotation in dicts["annotations"]] dicts["images"] = list(filter(lambda im: im["id"] in img_refs, dicts["images"])) # Key in JSON file should be "file_name", but sometimes it's "filename", # so we have to check both. img_filename_key = "file_name" if img_filename_key not in dicts["images"][0].keys(): img_filename_key = "filename" # First add the img_dir to each image filename img_paths = [ os.path.join(img_dir, image[img_filename_key]) for image in dicts["images"] ] # See if there are any missing files img_missing = [not os.path.exists(path) for path in img_paths] if sum(img_missing): if use_missing_gui: okay = MissingFilesDialog(img_paths, img_missing).exec_() if not okay: return None else: raise FileNotFoundError( f"Images for COCO dataset could not be found in {img_dir}." ) # Update the image paths (with img_dir or user selected path) for image, path in zip(dicts["images"], img_paths): image[img_filename_key] = path # Create the video objects for the image files image_video_map = dict() vid_id_video_map = dict() for image in dicts["images"]: image_id = image["id"] image_filename = image[img_filename_key] # Sometimes images have a vid_id which links multiple images # together as one video. If so, we'll use that as the video key. # But if there isn't a vid_id, we'll treat each images as a # distinct video and use the image id as the video id. vid_id = image.get("vid_id", image_id) if vid_id not in vid_id_video_map: kwargs = dict(filenames=[image_filename]) for key in ("width", "height"): if key in image: kwargs[key] = image[key] video = Video.from_image_filenames(**kwargs) vid_id_video_map[vid_id] = video frame_idx = 0 else: video = vid_id_video_map[vid_id] frame_idx = video.num_frames video.backend.filenames.append(image_filename) image_video_map[image_id] = (video, frame_idx) # Make instances from "annotations" lf_map = dict() track_map = dict() for annotation in dicts["annotations"]: skeleton = skeleton_map[annotation["category_id"]] image_id = annotation["image_id"] video, frame_idx = image_video_map[image_id] keypoints = np.array(annotation["keypoints"], dtype="int").reshape(-1, 3) track = None if "track_id" in annotation: track_id = annotation["track_id"] if track_id not in track_map: track_map[track_id] = Track(frame_idx, str(track_id)) track = track_map[track_id] points = dict() any_visible = False for i in range(len(keypoints)): node = skeleton.nodes[i] x, y, flag = keypoints[i] if flag == 0: # node not labeled for this instance continue is_visible = flag == 2 any_visible = any_visible or is_visible points[node] = Point(x, y, is_visible) if points: # If none of the points had 2 has the "visible" flag, we'll # assume this incorrect and just mark all as visible. if not any_visible: for point in points.values(): point.visible = True inst = Instance(skeleton=skeleton, points=points, track=track) if image_id not in lf_map: lf_map[image_id] = LabeledFrame(video, frame_idx) lf_map[image_id].insert(0, inst) return Labels(labeled_frames=list(lf_map.values()))
def read( cls, file: FileHandle, gui: bool = True, *args, **kwargs, ): filename = file.filename mat_contents = sio.loadmat(filename) box_path = cls._unwrap_mat_scalar(mat_contents["boxPath"]) # If the video file isn't found, try in the same dir as the mat file if not os.path.exists(box_path): file_dir = os.path.dirname(filename) box_path_name = box_path.split("\\")[-1] # assume windows path box_path = os.path.join(file_dir, box_path_name) if not os.path.exists(box_path): if gui: video_paths = [box_path] missing = [True] okay = MissingFilesDialog(video_paths, missing).exec_() if not okay or missing[0]: return box_path = video_paths[0] else: # Ignore missing videos if not loading from gui box_path = "" if os.path.exists(box_path): vid = Video.from_hdf5( dataset="box", filename=box_path, input_format="channels_first" ) else: vid = None nodes_ = mat_contents["skeleton"]["nodes"] edges_ = mat_contents["skeleton"]["edges"] points_ = mat_contents["positions"] edges_ = edges_ - 1 # convert matlab 1-indexing to python 0-indexing nodes = cls._unwrap_mat_array(nodes_) edges = cls._unwrap_mat_array(edges_) nodes = list(map(str, nodes)) # convert np._str to str sk = Skeleton(name=filename) sk.add_nodes(nodes) for edge in edges: sk.add_edge(source=nodes[edge[0]], destination=nodes[edge[1]]) labeled_frames = [] node_count, _, frame_count = points_.shape for i in range(frame_count): new_inst = Instance(skeleton=sk) for node_idx, node in enumerate(nodes): x = points_[node_idx][0][i] y = points_[node_idx][1][i] new_inst[node] = Point(x, y) if len(new_inst.points): new_frame = LabeledFrame(video=vid, frame_idx=i) new_frame.instances = (new_inst,) labeled_frames.append(new_frame) labels = Labels(labeled_frames=labeled_frames, videos=[vid], skeletons=[sk]) return labels