def read_headers( cls, file: format.filehandle.FileHandle, video_search: Union[Callable, List[Text], None] = None, match_to: Optional[Labels] = None, ): f = file.file # Extract the Labels JSON metadata and create Labels object with just this # metadata. dicts = json_loads( f.require_group("metadata").attrs["json"].tostring().decode() ) # These items are stored in separate lists because the metadata group got to be # too big. for key in ("videos", "tracks", "suggestions"): hdf5_key = f"{key}_json" if hdf5_key in f: items = [json_loads(item_json) for item_json in f[hdf5_key]] dicts[key] = items # Video path "." means the video is saved in same file as labels, so replace # these paths. for video_item in dicts["videos"]: if video_item["backend"]["filename"] == ".": video_item["backend"]["filename"] = file.filename # Use the video_callback for finding videos with broken paths: # 1. Accept single string as video search path if isinstance(video_search, str): video_search = [video_search] # 2. Accept list of strings as video search paths if hasattr(video_search, "__iter__"): # If the callback is an iterable, then we'll expect it to be a list of # strings and build a non-gui callback with those as the search paths. search_paths = [ # os.path.dirname(path) if os.path.isfile(path) else path path for path in video_search ] # Make the search function from list of paths video_search = Labels.make_video_callback(search_paths) # 3. Use the callback function (either given as arg or build from paths) if callable(video_search): video_search(dicts["videos"]) # Create the Labels object with the header data we've loaded labels = labels_json.LabelsJsonAdaptor.from_json_data(dicts, match_to=match_to) return labels
def _load(self): if self.__tried_to_load: return self.__tried_to_load = True # Handle cases where the user feeds in h5.File objects instead of filename if isinstance(self.filename, h5.File): self.__file_h5 = self.filename self.filename = self.__file_h5.filename elif type(self.filename) is str: try: self.__file_h5 = h5.File(self.filename, "r") except OSError as ex: raise FileNotFoundError( f"Could not find HDF5 file {self.filename}" ) from ex else: self.__file_h5 = None # Handle the case when h5.Dataset is passed in if isinstance(self.dataset, h5.Dataset): self.__dataset_h5 = self.dataset self.__file_h5 = self.__dataset_h5.file self.dataset = self.__dataset_h5.name # File loaded and dataset name given, so load dataset elif isinstance(self.dataset, str) and (self.__file_h5 is not None): # dataset = "video0" passed: if self.dataset + "/video" in self.__file_h5: self.__dataset_h5 = self.__file_h5[self.dataset + "/video"] base_dataset_path = self.dataset else: # dataset = "video0/video" passed: self.__dataset_h5 = self.__file_h5[self.dataset] base_dataset_path = "/".join(self.dataset.split("/")[:-1]) # Check for frame_numbers dataset corresponding to video framenum_dataset = f"{base_dataset_path}/frame_numbers" if framenum_dataset in self.__file_h5: original_idx_lists = self.__file_h5[framenum_dataset] # Create map from idx in original video to idx in current for current_idx in range(len(original_idx_lists)): original_idx = original_idx_lists[current_idx] self.__original_to_current_frame_idx[original_idx] = current_idx source_video_group = f"{base_dataset_path}/source_video" if source_video_group in self.__file_h5: d = json_loads( self.__file_h5.require_group(source_video_group).attrs["json"] ) self._source_video = Video.cattr().structure(d, Video)
def read( cls, file: FileHandle, video_search: Union[Callable, List[Text], None] = None, match_to: Optional[Labels] = None, *args, **kwargs, ) -> Labels: pass """ Deserialize JSON file as new :class:`Labels` instance. Args: filename: Path to JSON file. video_callback: A callback function that which can modify video paths before we try to create the corresponding :class:`Video` objects. Usually you'll want to pass a callback created by :meth:`make_video_callback` or :meth:`make_gui_video_callback`. Alternately, if you pass a list of strings we'll construct a non-gui callback with those strings as the search paths. match_to: If given, we'll replace particular objects in the data dictionary with *matching* objects in the match_to :class:`Labels` object. This ensures that the newly instantiated :class:`Labels` can be merged without duplicate matching objects (e.g., :class:`Video` objects ). Returns: A new :class:`Labels` object. """ tmp_dir = None filename = file.filename # Check if the file is a zipfile for not. if zipfile.is_zipfile(filename): # Make a tmpdir, located in the directory that the file exists, to unzip # its contents. tmp_dir = os.path.join( os.path.dirname(filename), f"tmp_{os.getpid()}_{os.path.basename(filename)}", ) if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir, ignore_errors=True) try: os.mkdir(tmp_dir) except FileExistsError: pass # tmp_dir = tempfile.mkdtemp(dir=os.path.dirname(filename)) try: # Register a cleanup routine that deletes the tmpdir on program exit # if something goes wrong. The True is for ignore_errors atexit.register(shutil.rmtree, tmp_dir, True) # Uncompress the data into the directory shutil.unpack_archive(filename, extract_dir=tmp_dir) # We can now open the JSON file, save the zip file and # replace file with the first JSON file we find in the archive. json_files = [ os.path.join(tmp_dir, file) for file in os.listdir(tmp_dir) if file.endswith(".json") ] if len(json_files) == 0: raise ValueError( f"No JSON file found inside {filename}. Are you sure this is a valid sLEAP dataset." ) filename = json_files[0] except Exception as ex: # If we had problems, delete the temp directory and reraise the exception. shutil.rmtree(tmp_dir, ignore_errors=True) raise # Open and parse the JSON in filename with open(filename, "r") as file: # FIXME: Peek into the json to see if there is version string. # We do this to tell apart old JSON data from leap_dev vs the # newer format for sLEAP. json_str = file.read() dicts = json_loads(json_str) # If we have a version number, then it is new sLEAP format if "version" in dicts: # Cache the working directory. cwd = os.getcwd() # Replace local video paths (for imagestore) if tmp_dir: for vid in dicts["videos"]: vid["backend"]["filename"] = os.path.join( tmp_dir, vid["backend"]["filename"]) # Use the video_callback for finding videos with broken paths: # 1. Accept single string as video search path if isinstance(video_search, str): video_search = [video_search] # 2. Accept list of strings as video search paths if hasattr(video_search, "__iter__"): # If the callback is an iterable, then we'll expect it to be a # list of strings and build a non-gui callback with those as # the search paths. # When path is to a file, use the path of parent directory. search_paths = [ os.path.dirname(path) if os.path.isfile(path) else path for path in video_search ] # Make the search function from list of paths video_search = Labels.make_video_callback(search_paths) # 3. Use the callback function (either given as arg or build from paths) if callable(video_search): abort = video_search(dicts["videos"]) if abort: raise FileNotFoundError # Try to load the labels filename. try: labels = cls.from_json_data(dicts, match_to=match_to) except FileNotFoundError: # FIXME: We are going to the labels JSON that has references to # video files. Lets change directory to the dirname of the json file # so that relative paths will be from this directory. Maybe # it is better to feed the dataset dirname all the way down to # the Video object. This seems like less coupling between classes # though. if os.path.dirname(filename) != "": os.chdir(os.path.dirname(filename)) # Try again labels = cls.from_json_data(dicts, match_to=match_to) except Exception as ex: # Ok, we give up, where the hell are these videos! raise # Re-raise. finally: os.chdir( cwd) # Make sure to change back if we have problems. return labels else: frames = load_labels_json_old(data_path=filename, parsed_json=dicts) return Labels(frames)
def from_json_data(cls, data: Union[str, dict], match_to: Optional["Labels"] = None) -> "Labels": """ Create instance of class from data in dictionary. Method is used by other methods that load from JSON. Args: data: Dictionary, deserialized from JSON. match_to: If given, we'll replace particular objects in the data dictionary with *matching* objects in the match_to :class:`Labels` object. This ensures that the newly instantiated :class:`Labels` can be merged without duplicate matching objects (e.g., :class:`Video` objects ). Returns: A new :class:`Labels` object. """ # Parse the json string if needed. if type(data) is str: dicts = json_loads(data) else: dicts = data dicts["tracks"] = dicts.get( "tracks", []) # don't break if json doesn't include tracks # First, deserialize the skeletons, videos, and nodes lists. # The labels reference these so we will need them while deserializing. nodes = cattr.structure(dicts["nodes"], List[Node]) idx_to_node = {i: nodes[i] for i in range(len(nodes))} skeletons = Skeleton.make_cattr(idx_to_node).structure( dicts["skeletons"], List[Skeleton]) videos = Video.cattr().structure(dicts["videos"], List[Video]) try: # First try unstructuring tuple (newer format) track_cattr = cattr.Converter( unstruct_strat=cattr.UnstructureStrategy.AS_TUPLE) tracks = track_cattr.structure(dicts["tracks"], List[Track]) except: # Then try unstructuring dict (older format) try: tracks = cattr.structure(dicts["tracks"], List[Track]) except: raise ValueError("Unable to load tracks as tuple or dict!") # if we're given a Labels object to match, use its objects when they match if match_to is not None: for idx, sk in enumerate(skeletons): for old_sk in match_to.skeletons: if sk.matches(old_sk): # use nodes from matched skeleton for (node, match_node) in zip(sk.nodes, old_sk.nodes): node_idx = nodes.index(node) nodes[node_idx] = match_node # use skeleton from match skeletons[idx] = old_sk break for idx, vid in enumerate(videos): for old_vid in match_to.videos: # Try to match videos using either their current or source filename # if available. old_vid_paths = [old_vid.filename] if getattr(old_vid.backend, "has_embedded_images", False): old_vid_paths.append( old_vid.backend._source_video.filename) new_vid_paths = [vid.filename] if getattr(vid.backend, "has_embedded_images", False): new_vid_paths.append( vid.backend._source_video.filename) is_match = False for old_vid_path in old_vid_paths: for new_vid_path in new_vid_paths: if old_vid_path == new_vid_path or weak_filename_match( old_vid_path, new_vid_path): is_match = True videos[idx] = old_vid break if is_match: break if is_match: break suggestions = [] if "suggestions" in dicts: suggestions_cattr = cattr.Converter() suggestions_cattr.register_structure_hook( Video, lambda x, type: videos[int(x)]) try: suggestions = suggestions_cattr.structure( dicts["suggestions"], List[SuggestionFrame]) except Exception as e: print("Error while loading suggestions (1)") print(e) try: # Convert old suggestion format to new format. # Old format: {video: list of frame indices} # New format: [SuggestionFrames] old_suggestions = suggestions_cattr.structure( dicts["suggestions"], Dict[Video, List]) for video in old_suggestions.keys(): suggestions.extend([ SuggestionFrame(video, idx) for idx in old_suggestions[video] ]) except Exception as e: print("Error while loading suggestions (2)") print(e) pass if "negative_anchors" in dicts: negative_anchors_cattr = cattr.Converter() negative_anchors_cattr.register_structure_hook( Video, lambda x, type: videos[int(x)]) negative_anchors = negative_anchors_cattr.structure( dicts["negative_anchors"], Dict[Video, List]) else: negative_anchors = dict() if "provenance" in dicts: provenance = dicts["provenance"] else: provenance = dict() # If there is actual labels data, get it. if "labels" in dicts: label_cattr = make_instance_cattr() label_cattr.register_structure_hook( Skeleton, lambda x, type: skeletons[int(x)]) label_cattr.register_structure_hook(Video, lambda x, type: videos[int(x)]) label_cattr.register_structure_hook( Node, lambda x, type: x if isinstance(x, Node) else nodes[int(x)]) label_cattr.register_structure_hook( Track, lambda x, type: None if x is None else tracks[int(x)]) labels = label_cattr.structure(dicts["labels"], List[LabeledFrame]) else: labels = [] return Labels( labeled_frames=labels, videos=videos, skeletons=skeletons, nodes=nodes, suggestions=suggestions, negative_anchors=negative_anchors, tracks=tracks, provenance=provenance, )
def json(self): """The loaded JSON dictionary (for a JSON file).""" if self._json is None: self._json = json_loads(self.text) return self._json
def json(self): if self._json is None: self._json = json_loads(self.text) return self._json
def load_labels_json_old( data_path: str, parsed_json: dict = None, adjust_matlab_indexing: bool = True, fix_rel_paths: bool = True, ) -> List[LabeledFrame]: """ Load predicted instances from Talmo's old JSON format. Args: data_path: The path to the JSON file. parsed_json: The parsed json if already loaded, so we can save some time if already parsed. adjust_matlab_indexing: Whether to adjust indexing from MATLAB. fix_rel_paths: Whether to fix paths to videos to absolute paths. Returns: A newly constructed Labels object. """ if parsed_json is None: data = json_loads(open(data_path).read()) else: data = parsed_json videos = pd.DataFrame(data["videos"]) instances = pd.DataFrame(data["instances"]) points = pd.DataFrame(data["points"]) predicted_instances = pd.DataFrame(data["predicted_instances"]) predicted_points = pd.DataFrame(data["predicted_points"]) if adjust_matlab_indexing: instances.frameIdx -= 1 points.frameIdx -= 1 predicted_instances.frameIdx -= 1 predicted_points.frameIdx -= 1 points.node -= 1 predicted_points.node -= 1 points.x -= 1 predicted_points.x -= 1 points.y -= 1 predicted_points.y -= 1 skeleton = Skeleton() skeleton.add_nodes(data["skeleton"]["nodeNames"]) edges = data["skeleton"]["edges"] if adjust_matlab_indexing: edges = np.array(edges) - 1 for (src_idx, dst_idx) in edges: skeleton.add_edge( data["skeleton"]["nodeNames"][src_idx], data["skeleton"]["nodeNames"][dst_idx], ) if fix_rel_paths: for i, row in videos.iterrows(): p = row.filepath if not os.path.exists(p): p = os.path.join(os.path.dirname(data_path), p) if os.path.exists(p): videos.at[i, "filepath"] = p # Make the video objects video_objects = {} for i, row in videos.iterrows(): if videos.at[i, "format"] == "media": vid = Video.from_media(videos.at[i, "filepath"]) else: vid = Video.from_hdf5( filename=videos.at[i, "filepath"], dataset=videos.at[i, "dataset"] ) video_objects[videos.at[i, "id"]] = vid # A function to get all the instances for a particular video frame def get_frame_instances(video_id, frame_idx): """ """ is_in_frame = (points["videoId"] == video_id) & ( points["frameIdx"] == frame_idx ) if not is_in_frame.any(): return [] instances = [] frame_instance_ids = np.unique(points["instanceId"][is_in_frame]) for i, instance_id in enumerate(frame_instance_ids): is_instance = is_in_frame & (points["instanceId"] == instance_id) instance_points = { data["skeleton"]["nodeNames"][n]: Point(x, y, visible=v) for x, y, n, v in zip( *[points[k][is_instance] for k in ["x", "y", "node", "visible"]] ) } instance = Instance(skeleton=skeleton, points=instance_points) instances.append(instance) return instances # Get the unique labeled frames and construct a list of LabeledFrame objects for them. frame_keys = list( { (videoId, frameIdx) for videoId, frameIdx in zip(points["videoId"], points["frameIdx"]) } ) frame_keys.sort() labels = [] for videoId, frameIdx in frame_keys: label = LabeledFrame( video=video_objects[videoId], frame_idx=frameIdx, instances=get_frame_instances(videoId, frameIdx), ) labels.append(label) return labels