def _get_segment(path: str, segment_name: str) -> Segment: segment = Segment(segment_name) image_paths = glob(os.path.join(path, segment_name, "*.png")) for image_path in image_paths: segment.append(Data(image_path)) return segment
def test_upload_segment_with_label(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") dataset_client.upload_catalog(Catalog.loads(CATALOG)) segment = Segment("segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) data.label = Label.loads(LABEL) segment.append(data) dataset_client.upload_segment(segment) segment1 = Segment(name="segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].path == "hello0.txt" assert segment1[0].path == segment[0].target_remote_path assert segment1[0].label # todo: match the input and output label gas_client.delete_dataset(dataset_name)
def test_data_in_draft(self, accesskey, url, tmp_path): gas_client = GAS(access_key=accesskey, url=url) dataset_name = get_dataset_name() dataset_client = gas_client.create_dataset(dataset_name) dataset_client.create_draft("draft-1") segment = Segment("segment1") path = tmp_path / "sub" path.mkdir() for i in range(10): local_path = path / f"hello{i}.txt" local_path.write_text("CONTENT") data = Data(local_path=str(local_path)) segment.append(data) dataset_client.upload_segment(segment) dataset_client.commit("commit-1") segment1 = Segment(name="segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].get_url() assert segment1[0].path == segment[0].target_remote_path dataset_client.create_draft("draft-2") segment1 = Segment(name="segment1", client=dataset_client) assert len(segment1) == 10 assert segment1[0].get_url() assert segment1[0].path == segment[0].target_remote_path gas_client.delete_dataset(dataset_name)
def _load_negative_segment(root_path: str) -> Segment: segment = Segment("negative") for negative_image_path in glob(os.path.join(root_path, "negatives", "negativePics", "*.png")): data = Data(negative_image_path) data.label.box2d = [] segment.append(data) return segment
def test_sort(self): segment = Segment("train") segment.append(Data("file1")) segment.append(Data("file2")) assert segment[0].path == "file1" segment.sort(key=lambda data: data.path, reverse=True) assert segment[0].path == "file2"
def test_upload_segment(self, mocker): self.dataset_client._status.checkout(draft_number=1) segment_test = Segment(name="test1") for i in range(5): segment_test.append(Data(f"data{i}.png")) segment_client = SegmentClient(name="test1", data_client=self.dataset_client) upload_segment = mocker.patch( f"{dataset.__name__}.DatasetClient._upload_segment", return_value=segment_client ) assert self.dataset_client.upload_segment(segment_test).name == "test1" args, keywords = upload_segment.call_args assert args[0] == segment_test assert keywords["jobs"] == 1 assert not keywords["skip_uploaded_files"] upload_segment.assert_called_once()
def test__upload_segment(self, mocker): segment_test = Segment(name="test1") for i in range(5): segment_test.append(Data(f"data{i}.png")) segment_client = SegmentClient(name="test1", data_client=self.dataset_client) get_or_create_segment = mocker.patch( f"{dataset.__name__}.DatasetClient.get_or_create_segment", return_value=segment_client) list_data_paths = mocker.patch( f"{segment.__name__}.SegmentClient.list_data_paths", return_value=["data1.png", "data2.png"], ) multithread_upload = mocker.patch( f"{dataset.__name__}.multithread_upload") with Tqdm(5, disable=False) as pbar: self.dataset_client._upload_segment(segment_test, skip_uploaded_files=True, pbar=pbar) get_or_create_segment.assert_called_once_with(segment_test.name) list_data_paths.assert_called_once_with() args, keywords = multithread_upload.call_args assert args[0] == segment_client._upload_or_import_data assert [item.path for item in args[1] ] == ["data0.png", "data3.png", "data4.png"] assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 1 assert keywords["pbar"] == pbar multithread_upload.assert_called_once() with Tqdm(5, disable=False) as pbar: self.dataset_client._upload_segment(segment_test, skip_uploaded_files=False, pbar=pbar) get_or_create_segment.assert_called_with(segment_test.name) list_data_paths.assert_called_with() args, keywords = multithread_upload.call_args assert args[0] == segment_client._upload_or_import_data assert [item.path for item in args[1]] == [f"data{i}.png" for i in range(5)] assert keywords[ "callback"] == segment_client._synchronize_upload_info assert keywords["jobs"] == 1 assert keywords["pbar"] == pbar multithread_upload.assert_called()
def _load_positive_segment(segment_name: str, segment_path: str) -> Segment: if segment_name.startswith("vid"): # Pad zero for segment name to change "vid0" to "vid00" segment_name = f"{segment_name[:3]}{int(segment_name[3:]):02}" segment = Segment(segment_name) annotation_file = glob( os.path.join(segment_path, "frameAnnotations-*", "frameAnnotations.csv"))[0] image_folder = os.path.dirname(annotation_file) pre_filename = "" with open(annotation_file, "r", encoding="utf-8") as fp: for annotation in csv.DictReader(fp, delimiter=";"): filename = annotation["Filename"] if filename != pre_filename: data = Data(os.path.join(image_folder, filename)) data.label.box2d = [] segment.append(data) pre_filename = filename occluded, on_another_road = annotation[ "Occluded,On another road"].split(",", 1) data.label.box2d.append( LabeledBox2D( int(annotation["Upper left corner X"]), int(annotation["Upper left corner Y"]), int(annotation["Lower right corner X"]), int(annotation["Lower right corner Y"]), category=annotation["Annotation tag"], attributes={ "Occluded": bool(int(occluded)), "On another road": bool(int(on_another_road)), "Origin file": annotation["Origin file"], "Origin frame number": int(annotation["Origin frame number"]), "Origin track": annotation["Origin track"], "Origin track frame number": int(annotation["Origin track frame number"]), }, )) return segment
def _get_segment( segment_name: str, local_abspaths: Iterable[str], remote_path: str, is_recursive: bool, ) -> Segment: """Get the pair of local_path and remote_path. Arguments: segment_name: The name of the segment these data belong to. local_abspaths: A list of local abstract paths, could be folder or file. remote_path: The remote object path, not necessarily end with '/'. is_recursive: Whether copy directories recursively. Returns: A segment contains mapping data. """ segment = Segment(segment_name) for local_abspath in local_abspaths: if not os.path.isdir(local_abspath): data = Data( local_abspath, target_remote_path=str(PurePosixPath(remote_path, os.path.basename(local_abspath))), ) segment.append(data) continue if not is_recursive: error("Local paths include directories, please use -r option") local_abspath = os.path.normpath(local_abspath) folder_name = os.path.basename(local_abspath) for root, _, filenames in os.walk(local_abspath): relpath = os.path.relpath(root, local_abspath) if root != local_abspath else "" for filename in filenames: data = Data( os.path.join(root, filename), target_remote_path=str( PurePosixPath(Path(remote_path, folder_name, relpath, filename)) ), ) segment.append(data) return segment