def test_load_dataclass_dict_from_csv_throws(self): dataclass_objs = [ TestDataclass2("a", 1), TestDataclass2("a", 2), TestDataclass2("b", 3), TestDataclass2("c", 4), TestDataclass2("c", 4), TestDataclass2("c", 4), ] with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir: csv_file_name = Path(tempdir) / "data.csv" save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name) self.assertRaises( AssertionError, lambda: load_dataclass_dict_from_csv( csv_file_name, TestDataclass2, "a", list_per_key=False), )
def test_load_dataclass_dict_from_csv_value_dict(self): dataclass_objs = [ TestDataclass2("a", 1), TestDataclass2("b", 2), TestDataclass2("c", 3), TestDataclass2("d", 4), ] with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir: csv_file_name = Path(tempdir) / "data.csv" save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name) test_dict = load_dataclass_dict_from_csv(csv_file_name, TestDataclass2, "a", list_per_key=False) self.assertEqual(len(test_dict), 4) self.assertEqual(test_dict["c"].b, 3)
def test_save_dataclass_objs_to_headered_csv(self): dataclass_objs = [ TestDataclass2("a", 1), TestDataclass2("a", 2), TestDataclass2("b", 3), ] with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir: csv_file_name = Path(tempdir) / "data.csv" save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name) with open(csv_file_name) as f: lines = list(f.readlines()) self.assertEqual(len(lines), 4) self.assertEqual(lines[0], "a,b\n") self.assertEqual(lines[1], "a,1\n") self.assertEqual(lines[2], "a,2\n") self.assertEqual(lines[3], "b,3\n")
def save_video_frame_info(video_frames: Dict[str, VideoFrameInfo], file_name: str = None) -> str: """ Saves the video frame dictionary as a csv file that can be read for future usage. Args: video_frames (Dict[str, VideoFrameInfo]): Dictionary mapping video_ids to metadata about the location of their video frame files. file_name (str): location to save file (will be automatically generated if None). Returns: string of the filename where the video info is stored. """ file_name = (f"{os.getcwd()}/video_frame_metadata.csv" if file_name is None else file_name) save_dataclass_objs_to_headered_csv(list(video_frames.values()), file_name) return file_name
def test_load_dataclass_dict_from_csv_list_dict(self): dataclass_objs = [ TestDataclass2("a", 1), TestDataclass2("a", 2), TestDataclass2("b", 3), TestDataclass2("c", 4), TestDataclass2("c", 4), TestDataclass2("c", 4), ] with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir: csv_file_name = Path(tempdir) / "data.csv" save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name) test_dict = load_dataclass_dict_from_csv(csv_file_name, TestDataclass2, "a", list_per_key=True) self.assertEqual(len(test_dict), 3) self.assertEqual([x.b for x in test_dict["a"]], [1, 2]) self.assertEqual([x.b for x in test_dict["b"]], [3]) self.assertEqual([x.b for x in test_dict["c"]], [4, 4, 4])
def test__len__(self, dataset_type): with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir: tempdir = Path(tempdir) video_info_file = tempdir / "test_video_info.csv" save_dataclass_objs_to_headered_csv( list(MOCK_VIDEO_INFOS.values()), video_info_file ) activity_file = tempdir / "activity_video_info.csv" activities = [] for activity_list in self.ACTIVITIES_DATA.values(): for activity in activity_list: activities.append(activity) save_dataclass_objs_to_headered_csv(activities, activity_file) video_data_manifest_file_path = ( tempdir / "video_data_manifest_file_path.json" ) with ExitStack() as stack: if dataset_type == VideoDatasetType.Frame: video_data_dict = get_flat_video_frames(tempdir, "jpg") elif dataset_type == VideoDatasetType.EncodedVideo: video_data_dict = get_encoded_video_infos(tempdir, stack) save_dataclass_objs_to_headered_csv( list(video_data_dict.values()), video_data_manifest_file_path ) video_ids = list(self.ACTIVITIES_DATA) dataset = DomsevDataset( video_data_manifest_file_path=str(video_data_manifest_file_path), video_info_file_path=str(video_info_file), activities_file_path=str(activity_file), dataset_type=dataset_type, clip_sampler=lambda x, y: [ VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9) for i in range(0, 7) ], ) self.assertEqual(len(dataset._videos), 4) total_activities = [ activity for video_activities in list(dataset._activities.values()) for activity in video_activities ] self.assertEqual(len(total_activities), 6) self.assertEqual(len(dataset), 7) # Num clips
def test__len__(self, dataset_type): with tempfile.TemporaryDirectory( prefix=f"{TestEpicKitchenDataset}") as tempdir: tempdir = Path(tempdir) video_info_file = tempdir / "test_video_info.csv" save_dataclass_objs_to_headered_csv( list(MOCK_VIDEO_INFOS.values()), video_info_file) action_file = tempdir / "action_video_info.csv" actions = [] for action_list in self.ACTIONS_DATAS.values(): for action in action_list: actions.append(action) save_dataclass_objs_to_headered_csv(actions, action_file) video_data_manifest_file_path = ( tempdir / "video_data_manifest_file_path.json") with ExitStack() as stack: if dataset_type == VideoDatasetType.Frame: video_data_dict = get_flat_video_frames(tempdir, "jpg") elif dataset_type == VideoDatasetType.EncodedVideo: video_data_dict = get_encoded_video_infos(tempdir, stack) save_dataclass_objs_to_headered_csv( list(video_data_dict.values()), video_data_manifest_file_path) dataset = EpicKitchenDataset( video_info_file_path=str(video_info_file), actions_file_path=str(action_file), clip_sampler=lambda x, y: [ VideoClipInfo(str(i), i * 2.0, i * 2.0 + 0.9) for i in range(0, 7) ], video_data_manifest_file_path=str( video_data_manifest_file_path), dataset_type=dataset_type, ) self.assertEqual(len(dataset), 7)
def test__getitem__(self, dataset_type): with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir: tempdir = Path(tempdir) video_info_file = tempdir / "test_video_info.csv" save_dataclass_objs_to_headered_csv( list(MOCK_VIDEO_INFOS.values()), video_info_file ) activity_file = tempdir / "activity_video_info.csv" activities = [] for activity_list in self.ACTIVITIES_DATA.values(): for activity in activity_list: activities.append(activity) save_dataclass_objs_to_headered_csv(activities, activity_file) video_data_manifest_file_path = ( tempdir / "video_data_manifest_file_path.json" ) with ExitStack() as stack: if dataset_type == VideoDatasetType.Frame: video_data_dict = get_flat_video_frames(tempdir, "jpg") elif dataset_type == VideoDatasetType.EncodedVideo: video_data_dict = get_encoded_video_infos(tempdir, stack) save_dataclass_objs_to_headered_csv( list(video_data_dict.values()), video_data_manifest_file_path ) video_ids = list(self.ACTIVITIES_DATA) dataset = DomsevDataset( video_data_manifest_file_path=str(video_data_manifest_file_path), video_info_file_path=str(video_info_file), activities_file_path=str(activity_file), dataset_type=dataset_type, clip_sampler=lambda x, y: [ VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9) for i in range(0, 7) ], ) get_clip_string = ( "pytorchvideo.data.frame_video.FrameVideo.get_clip" if dataset_type == VideoDatasetType.Frame else "pytorchvideo.data.encoded_video.EncodedVideo.get_clip" ) with unittest.mock.patch( get_clip_string, return_value=({"video": torch.rand(3, 5, 10, 20), "audio": []}), ) as _: clip_1 = dataset.__getitem__(1) for i, a in enumerate(clip_1["activities"]): self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[0]][i]) self.assertEqual(clip_1["start_time"], 2.0) self.assertEqual(clip_1["stop_time"], 2.9) self.assertEqual(clip_1["video_id"], MOCK_VIDEO_IDS[0]) clip_2 = dataset.__getitem__(2) for i, a in enumerate(clip_2["activities"]): self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[1]][i]) self.assertEqual(clip_2["start_time"], 4.0) self.assertEqual(clip_2["stop_time"], 4.9) self.assertEqual(clip_2["video_id"], MOCK_VIDEO_IDS[1])