def read(cls, file: FileHandle, *args, **kwargs,) -> Labels: filename = file.filename # Load data from the YAML file project_data = yaml.load(file.text, Loader=yaml.SafeLoader) # Create skeleton which we'll use for each video skeleton = Skeleton() skeleton.add_nodes(project_data["bodyparts"]) # Get subdirectories of videos and labeled data root_dir = os.path.dirname(filename) videos_dir = os.path.join(root_dir, "videos") labeled_data_dir = os.path.join(root_dir, "labeled-data") with os.scandir(labeled_data_dir) as file_iterator: data_subdirs = [file.path for file in file_iterator if file.is_dir()] labeled_frames = [] # Each subdirectory of labeled data corresponds to a video. # We'll go through each and import the labeled frames. for data_subdir in data_subdirs: csv_files = find_files_by_suffix( data_subdir, prefix="CollectedData", suffix=".csv" ) if csv_files: csv_path = csv_files[0] # Try to find a full video corresponding to this subdir. # If subdirectory is foo, we look for foo.mp4 in videos dir. shortname = os.path.split(data_subdir)[-1] video_path = os.path.join(videos_dir, f"{shortname}.mp4") if os.path.exists(video_path): video = Video.from_filename(video_path) else: # When no video is found, the individual frame images # stored in the labeled data subdir will be used. print( f"Unable to find {video_path} so using individual frame images." ) video = None # Import the labeled fraems labeled_frames.extend( LabelsDeepLabCutCsvAdaptor.read_frames( FileHandle(csv_path), full_video=video, skeleton=skeleton ) ) else: print(f"No csv data file found in {data_subdir}") return Labels(labeled_frames=labeled_frames)
def test_inference_merging(): skeleton = Skeleton() video = Video(backend=MediaVideo) lf_user_only = LabeledFrame(video=video, frame_idx=0, instances=[Instance(skeleton=skeleton)]) lf_pred_only = LabeledFrame( video=video, frame_idx=1, instances=[PredictedInstance(skeleton=skeleton)]) lf_both = LabeledFrame( video=video, frame_idx=2, instances=[ Instance(skeleton=skeleton), PredictedInstance(skeleton=skeleton) ], ) labels = Labels([lf_user_only, lf_pred_only, lf_both]) task = runners.InferenceTask( trained_job_paths=None, inference_params=None, labels=labels, results=[ LabeledFrame( video=labels.video, frame_idx=2, instances=[ PredictedInstance(skeleton=skeleton), PredictedInstance(skeleton=skeleton), ], ) ], ) task.merge_results() assert len(labels) == 3 assert labels[0].frame_idx == 0 assert labels[0].has_user_instances assert labels[1].frame_idx == 1 assert labels[1].has_predicted_instances assert labels[2].frame_idx == 2 assert len(labels[2].user_instances) == 1 assert len(labels[2].predicted_instances) == 2
def read( cls, file: FileHandle, video_path: str, skeleton_path: str, *args, **kwargs, ) -> Labels: f = file.file video = Video.from_filename(video_path) skeleton_data = pd.read_csv(skeleton_path, header=0) skeleton = Skeleton() skeleton.add_nodes(skeleton_data["name"]) nodes = skeleton.nodes for name, parent, swap in skeleton_data.itertuples(index=False, name=None): if parent is not np.nan: skeleton.add_edge(parent, name) lfs = [] pose_matrix = f["pose"][:] track_count, frame_count, node_count, _ = pose_matrix.shape tracks = [Track(0, f"Track {i}") for i in range(track_count)] for frame_idx in range(frame_count): lf_instances = [] for track_idx in range(track_count): points_array = pose_matrix[track_idx, frame_idx, :, :] points = dict() for p in range(len(points_array)): x, y, score = points_array[p] points[nodes[p]] = Point(x, y) # TODO: score inst = Instance(skeleton=skeleton, track=tracks[track_idx], points=points) lf_instances.append(inst) lfs.append( LabeledFrame(video, frame_idx=frame_idx, instances=lf_instances)) return Labels(labeled_frames=lfs)
def receptive_field_info_from_model_cfg(model_cfg: ModelConfig) -> dict: """Gets receptive field information given specific model configuration.""" rf_info = dict( size=None, max_stride=None, down_blocks=None, convs_per_block=None, kernel_size=None, ) try: model = Model.from_config(model_cfg, Skeleton()) except ZeroDivisionError: # Unable to create model from these config parameters return rf_info if hasattr(model_cfg.backbone.which_oneof(), "max_stride"): rf_info["max_stride"] = model_cfg.backbone.which_oneof().max_stride if hasattr(model.backbone, "down_convs_per_block"): rf_info["convs_per_block"] = model.backbone.down_convs_per_block elif hasattr(model.backbone, "convs_per_block"): rf_info["convs_per_block"] = model.backbone.convs_per_block if hasattr(model.backbone, "kernel_size"): rf_info["kernel_size"] = model.backbone.kernel_size rf_info["down_blocks"] = model.backbone.down_blocks if rf_info["down_blocks"] and rf_info["convs_per_block"] and rf_info[ "kernel_size"]: rf_info["size"] = compute_rf( down_blocks=rf_info["down_blocks"], convs_per_block=rf_info["convs_per_block"], kernel_size=rf_info["kernel_size"], ) return rf_info
def read( cls, file: FileHandle, video: Union[Video, str], *args, **kwargs, ) -> Labels: connect_adj_nodes = False if video is None: raise ValueError( "Cannot read analysis hdf5 if no video specified.") if not isinstance(video, Video): video = Video.from_filename(video) f = file.file tracks_matrix = f["tracks"][:].T track_names_list = f["track_names"][:].T node_names_list = f["node_names"][:].T # shape: frames * nodes * 2 * tracks frame_count, node_count, _, track_count = tracks_matrix.shape tracks = [ Track(0, track_name.decode()) for track_name in track_names_list ] skeleton = Skeleton() last_node_name = None for node_name in node_names_list: node_name = node_name.decode() skeleton.add_node(node_name) if connect_adj_nodes and last_node_name: skeleton.add_edge(last_node_name, node_name) last_node_name = node_name frames = [] for frame_idx in range(frame_count): instances = [] for track_idx in range(track_count): points = tracks_matrix[frame_idx, ..., track_idx] if not np.all(np.isnan(points)): point_scores = np.ones(len(points)) # make everything a PredictedInstance since the usual use # case is to export predictions for analysis instances.append( PredictedInstance.from_arrays( points=points, point_confidences=point_scores, skeleton=skeleton, track=tracks[track_idx], instance_score=1, )) if instances: frames.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=instances)) return Labels(labeled_frames=frames)
def read( cls, file: FileHandle, img_dir: str, use_missing_gui: bool = False, *args, **kwargs, ) -> Labels: dicts = file.json # Make skeletons from "categories" skeleton_map = dict() for category in dicts["categories"]: skeleton = Skeleton(name=category["name"]) skeleton_id = category["id"] node_names = category["keypoints"] skeleton.add_nodes(node_names) try: for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx], node_names[dst_idx]) except IndexError as e: # According to the COCO data format specifications[^1], the edges # are supposed to be 1-indexed. But in some of their own # dataset the edges are 1-indexed! So we'll try. # [1]: http://cocodataset.org/#format-data # Clear any edges we already created using 0-indexing skeleton.clear_edges() # Add edges for src_idx, dst_idx in category["skeleton"]: skeleton.add_edge(node_names[src_idx - 1], node_names[dst_idx - 1]) skeleton_map[skeleton_id] = skeleton # Make videos from "images" # Remove images that aren't referenced in the annotations img_refs = [annotation["image_id"] for annotation in dicts["annotations"]] dicts["images"] = list(filter(lambda im: im["id"] in img_refs, dicts["images"])) # Key in JSON file should be "file_name", but sometimes it's "filename", # so we have to check both. img_filename_key = "file_name" if img_filename_key not in dicts["images"][0].keys(): img_filename_key = "filename" # First add the img_dir to each image filename img_paths = [ os.path.join(img_dir, image[img_filename_key]) for image in dicts["images"] ] # See if there are any missing files img_missing = [not os.path.exists(path) for path in img_paths] if sum(img_missing): if use_missing_gui: okay = MissingFilesDialog(img_paths, img_missing).exec_() if not okay: return None else: raise FileNotFoundError( f"Images for COCO dataset could not be found in {img_dir}." ) # Update the image paths (with img_dir or user selected path) for image, path in zip(dicts["images"], img_paths): image[img_filename_key] = path # Create the video objects for the image files image_video_map = dict() vid_id_video_map = dict() for image in dicts["images"]: image_id = image["id"] image_filename = image[img_filename_key] # Sometimes images have a vid_id which links multiple images # together as one video. If so, we'll use that as the video key. # But if there isn't a vid_id, we'll treat each images as a # distinct video and use the image id as the video id. vid_id = image.get("vid_id", image_id) if vid_id not in vid_id_video_map: kwargs = dict(filenames=[image_filename]) for key in ("width", "height"): if key in image: kwargs[key] = image[key] video = Video.from_image_filenames(**kwargs) vid_id_video_map[vid_id] = video frame_idx = 0 else: video = vid_id_video_map[vid_id] frame_idx = video.num_frames video.backend.filenames.append(image_filename) image_video_map[image_id] = (video, frame_idx) # Make instances from "annotations" lf_map = dict() track_map = dict() for annotation in dicts["annotations"]: skeleton = skeleton_map[annotation["category_id"]] image_id = annotation["image_id"] video, frame_idx = image_video_map[image_id] keypoints = np.array(annotation["keypoints"], dtype="int").reshape(-1, 3) track = None if "track_id" in annotation: track_id = annotation["track_id"] if track_id not in track_map: track_map[track_id] = Track(frame_idx, str(track_id)) track = track_map[track_id] points = dict() any_visible = False for i in range(len(keypoints)): node = skeleton.nodes[i] x, y, flag = keypoints[i] if flag == 0: # node not labeled for this instance continue is_visible = flag == 2 any_visible = any_visible or is_visible points[node] = Point(x, y, is_visible) if points: # If none of the points had 2 has the "visible" flag, we'll # assume this incorrect and just mark all as visible. if not any_visible: for point in points.values(): point.visible = True inst = Instance(skeleton=skeleton, points=points, track=track) if image_id not in lf_map: lf_map[image_id] = LabeledFrame(video, frame_idx) lf_map[image_id].insert(0, inst) return Labels(labeled_frames=list(lf_map.values()))
def read( cls, file: FileHandle, gui: bool = True, *args, **kwargs, ): filename = file.filename mat_contents = sio.loadmat(filename) box_path = cls._unwrap_mat_scalar(mat_contents["boxPath"]) # If the video file isn't found, try in the same dir as the mat file if not os.path.exists(box_path): file_dir = os.path.dirname(filename) box_path_name = box_path.split("\\")[-1] # assume windows path box_path = os.path.join(file_dir, box_path_name) if not os.path.exists(box_path): if gui: video_paths = [box_path] missing = [True] okay = MissingFilesDialog(video_paths, missing).exec_() if not okay or missing[0]: return box_path = video_paths[0] else: # Ignore missing videos if not loading from gui box_path = "" if os.path.exists(box_path): vid = Video.from_hdf5( dataset="box", filename=box_path, input_format="channels_first" ) else: vid = None nodes_ = mat_contents["skeleton"]["nodes"] edges_ = mat_contents["skeleton"]["edges"] points_ = mat_contents["positions"] edges_ = edges_ - 1 # convert matlab 1-indexing to python 0-indexing nodes = cls._unwrap_mat_array(nodes_) edges = cls._unwrap_mat_array(edges_) nodes = list(map(str, nodes)) # convert np._str to str sk = Skeleton(name=filename) sk.add_nodes(nodes) for edge in edges: sk.add_edge(source=nodes[edge[0]], destination=nodes[edge[1]]) labeled_frames = [] node_count, _, frame_count = points_.shape for i in range(frame_count): new_inst = Instance(skeleton=sk) for node_idx, node in enumerate(nodes): x = points_[node_idx][0][i] y = points_[node_idx][1][i] new_inst[node] = Point(x, y) if len(new_inst.points): new_frame = LabeledFrame(video=vid, frame_idx=i) new_frame.instances = (new_inst,) labeled_frames.append(new_frame) labels = Labels(labeled_frames=labeled_frames, videos=[vid], skeletons=[sk]) return labels
def read_frames( cls, file: FileHandle, skeleton: Optional[Skeleton] = None, full_video: Optional[Video] = None, *args, **kwargs, ) -> List[LabeledFrame]: filename = file.filename # Read CSV file. data = pd.read_csv(filename, header=[1, 2]) # Check if this is in the new multi-animal format. is_multianimal = data.columns[0][0] == "individuals" if is_multianimal: # Reload with additional header rows if using new format. data = pd.read_csv(filename, header=[1, 2, 3]) # Pull out animal and node names from the columns. animal_names = [] node_names = [] for animal_name, node_name, _ in data.columns[1:][::2]: if animal_name not in animal_names: animal_names.append(animal_name) if node_name not in node_names: node_names.append(node_name) else: # Create the skeleton from the list of nodes in the csv file. # Note that DeepLabCut doesn't have edges, so these will need to be # added by user later. node_names = [n[0] for n in list(data)[1::2]] if skeleton is None: skeleton = Skeleton() skeleton.add_nodes(node_names) # Get list of all images filenames. img_files = data.iloc[:, 0] if full_video: video = full_video index_frames_by_original_index = True else: # Create the Video object img_dir = os.path.dirname(filename) video = cls.make_video_for_image_list(img_dir, img_files) # The frames in the video we created will be indexed from 0 to N # rather than having their index from the original source video. index_frames_by_original_index = False lfs = [] for i in range(len(data)): # Figure out frame index to use. if index_frames_by_original_index: # Extract "0123" from "path/img0123.png" as original frame index. frame_idx_match = re.search("(?<=img)(\\d+)(?=\\.png)", img_files[i]) if frame_idx_match is not None: frame_idx = int(frame_idx_match.group(0)) else: raise ValueError( f"Unable to determine frame index for image {img_files[i]}" ) else: frame_idx = i instances = [] if is_multianimal: for animal_name in animal_names: any_not_missing = False # Get points for each node. instance_points = dict() for node in node_names: x, y = ( data[(animal_name, node, "x")][i], data[(animal_name, node, "y")][i], ) instance_points[node] = Point(x, y) if ~(np.isnan(x) and np.isnan(y)): any_not_missing = True if any_not_missing: # Create instance with points. instances.append( Instance(skeleton=skeleton, points=instance_points) ) else: # Get points for each node. instance_points = dict() for node in node_names: x, y = data[(node, "x")][i], data[(node, "y")][i] instance_points[node] = Point(x, y) # Create instance with points assuming there's a single instance per # frame. instances.append(Instance(skeleton=skeleton, points=instance_points)) # Create LabeledFrame and add it to list. lfs.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=instances) ) return lfs
def read_frames( cls, file: FileHandle, skeleton: Optional[Skeleton] = None, full_video: Optional[Video] = None, *args, **kwargs, ) -> List[LabeledFrame]: filename = file.filename data = pd.read_csv(filename, header=[1, 2]) # Create the skeleton from the list of nodes in the csv file. # Note that DeepLabCut doesn't have edges, so these will need to be # added by user later. node_names = [n[0] for n in list(data)[1::2]] if skeleton is None: skeleton = Skeleton() skeleton.add_nodes(node_names) img_files = data.iloc[:, 0] # get list of all images if full_video: video = full_video index_frames_by_original_index = True else: # Create the Video object img_dir = os.path.dirname(filename) video = cls.make_video_for_image_list(img_dir, img_files) # The frames in the video we created will be indexed from 0 to N # rather than having their index from the original source video. index_frames_by_original_index = False frames = [] for i in range(len(data)): # get points for each node instance_points = dict() for node in node_names: x, y = data[(node, "x")][i], data[(node, "y")][i] instance_points[node] = Point(x, y) # Create instance with points. # For DeepLabCut we're assuming there's a single instance per frame. instance = Instance(skeleton=skeleton, points=instance_points) if index_frames_by_original_index: # extract "0123" from "path/img0123.png" as original frame index frame_idx_match = re.search("(?<=img)(\\d+)(?=\.png)", img_files[i]) if frame_idx_match is not None: frame_idx = int(frame_idx_match.group(0)) else: raise ValueError( f"Unable to determine frame index for image {img_files[i]}" ) else: frame_idx = i # create labeledframe and add it to list frames.append( LabeledFrame(video=video, frame_idx=frame_idx, instances=[instance]) ) return frames