def _get_data(image_path: str, annotation: Dict[str, Any]) -> Data: data = Data(image_path) keypoints = LabeledKeypoints2D() for x, y, v in chunked(annotation["keypoints"], 3): keypoints.append(Keypoint2D(x, y, v if v in (0, 1, 2) else 2)) data.label.keypoints2d = [keypoints] return data
def _get_data(filename: str, beauty_score: str, image_path: str, label_path: str) -> Data: stem = os.path.splitext(os.path.basename(filename))[0] data = Data(os.path.join(image_path, filename)) keypoints2d = LabeledKeypoints2D() keypoints2d.attributes = {"beauty_score": float(beauty_score)} keypoints2d.category = _CATEGORY_NAMES[stem[:2]] with open(os.path.join(label_path, f"{stem}.pts"), "rb") as fp: points = struct.unpack("i172f", fp.read()) for x, y in chunked(islice(points, 1, None), 2): keypoints2d.append(Keypoint2D(float(x), float(y))) data.label.keypoints2d = [keypoints2d] return data
def _get_data(keypoints_info: List[str], image_path: str, parsing_path: str) -> Data: stem = os.path.splitext(keypoints_info[0])[0] data = Data(os.path.join(image_path, f"{stem}.jpg")) label = data.label label.semantic_mask = SemanticMask( os.path.join(parsing_path, f"{stem}.png")) keypoints = LabeledKeypoints2D() for x, y, v in chunked(islice(keypoints_info, 1, None), 3): keypoints.append( Keypoint2D(float(x), float(y), 1 - int(v)) if x.isnumeric() else Keypoint2D(0, 0, 0)) label.keypoints2d = [keypoints] return data
def upload_frame(self, frame: Frame, timestamp: Optional[float] = None) -> None: """Upload frame to the draft. Arguments: frame: The :class:`~tensorbay.dataset.frame.Frame` to upload. timestamp: The mark to sort frames, supporting timestamp and float. Raises: FrameError: When lacking frame id or frame id conflicts. """ self._status.check_authority_for_draft() if timestamp is None: try: frame_id = frame.frame_id except AttributeError as error: raise FrameError( "Lack frame id, please add frame id in frame or " "give timestamp to the function!") from error elif not hasattr(frame, "frame_id"): frame_id = from_timestamp(timestamp) else: raise FrameError( "Frame id conflicts, please do not give timestamp to the function!." ) callback_bodies = [] for sensor_name, data in frame.items(): try: callback_body = data.get_callback_body( ) # type:ignore[union-attr] except AttributeError: continue callback_body["frameId"] = frame_id.str callback_body["sensorName"] = sensor_name if isinstance(data, Data): self._upload_file(data) self._upload_mask_files(data.label) callback_bodies.append(callback_body) elif isinstance(data, AuthData): self._synchronize_import_info((callback_body, )) for chunked_callback_bodies in chunked(callback_bodies, 50): self._synchronize_upload_info(chunked_callback_bodies)
def upload_label( self, data: Union[DataBase._Type, Iterable[DataBase._Type]]) -> None: """Upload label with Data object to the draft. Arguments: data: The data object which represents the local file to upload. """ self._status.check_authority_for_draft() if not isinstance(data, Iterable): data = [data] for chunked_data in chunked(data, 128): for single_data in chunked_data: self._upload_mask_files(single_data.label) self._upload_multi_label(chunked_data)
def _get_keypoint2ds(label_path: str) -> Dict[str, LabeledKeypoints2D]: all_keypoint2ds = {} for file_path in ( os.path.join(label_path, "loose_landmark_test.csv"), os.path.join(label_path, "loose_landmark_train.csv"), ): # The normal format of each line of the file is # NAME_ID,P1X,P1Y,P2X,P2Y,P3X,P3Y,P4X,P4Y,P5X,P5Y # "n000001/0001_01",75.81253,110.2077,103.1778,104.6074,... # "n000001/0002_01",194.9206,211.5826,278.5339,206.3202,... # "n000001/0003_01",80.4145,74.07401,111.7425,75.42367,... # ... with open(file_path, encoding="utf-8") as fp: for row in islice(csv.reader(fp), 1, None): name_id = row.pop(0).strip('"') all_keypoint2ds[name_id] = LabeledKeypoints2D( chunked(map(float, row), 2)) return all_keypoint2ds
def _get_polygon_labels( annotations_dir: str, segment_name: str, polygon_attribute_names: Tuple[str, ...] ) -> DefaultDict[str, List[LabeledPolygon]]: label_path = os.path.join( annotations_dir, f"RarePlanes_{segment_name.capitalize()}_Coco_Annotations_tiled.json") image_name_to_polygons: DefaultDict[ str, List[LabeledPolygon]] = defaultdict(list) with open(label_path, encoding="utf-8") as fp: label_contents = json.load(fp) annotations, categories = label_contents["annotations"], label_contents[ "categories"] for annotation, category in zip(annotations, categories): attributes = { attribute: annotation[attribute] for attribute in polygon_attribute_names } attributes["canards"] = annotation["canards"] == "Yes" attributes["truncated"] = bool(annotation["truncated"]) image_name_to_polygons[category["image_fname"]].append( LabeledPolygon(chunked(annotation["segmentation"][0], 2), attributes=attributes)) return image_name_to_polygons
def HKD(path: str) -> Dataset: """`HKD <http://vlm1.uta.edu/~srujana/HandPoseDataset/HK_Dataset.html>`_ dataset. The file structure should be like:: <path> AnnotatedData_subject1/ CropImages/ subject1_fingercount_cropframe_2.jpg subject1_fingercount_cropframe_3.jpg ... subject1_fingercount_cropframe_210.jpg subject1_fingercount_2D_Annotations_cropped.csv AnnotatedData_subject2/ CropImages/ subject2_fingercount_cropframe_2.jpg subject2_fingercount_cropframe_3.jpg ... subject2_fingercount_cropframe_207.jpg subject2_fingercount_2D_Annotations_cropped.csv AnnotatedData_subject3/ CropImages/ fingerappose_subject3_cropframe_2.jpg fingerappose_subject3_cropframe_3.jpg ... fingerappose_subject3_cropframe_235.jpg fingerappose_subject3_2D_Annotations_cropped.csv AnnotatedData_subject4/ CropImages/ subject4_cropframe_2.jpg subject4_cropframe_3.jpg ... subject4_cropframe_147.jpg subject4_2D_Annotations_cropped.csv Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME) dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name, (csv_name, image_name_template) in _SEGMENT_INFO.items(): segment = dataset.create_segment(segment_name) segment_path = os.path.join(root_path, f"AnnotatedData_{segment_name}") csv_path = os.path.join(root_path, segment_path, csv_name) with open(csv_path, encoding="utf-8") as fp: # The csv files should be like:: # subject1_fingercount_2D_Annotations_cropped.csv # 2,4.523,28.569,136.8,181.37,154.63,80.348,130.86,57.322,... # 3,4.523,32.731,135.31,176.17,147.2,80.348,123.43,65.493,... # 4,-2.413,39.668,149.41,164.28,143.47,70.692,137.53,64.75,... # 5,-1.026,31.344,138.77,178.4,136.54,78.863,135.06,75.149,... # ... # ... for csv_line in csv.reader(fp): image_path = os.path.join( segment_path, "CropImages", image_name_template.format(csv_line.pop(0))) data = Data(image_path) data.label.keypoints2d = [ LabeledKeypoints2D(chunked(map(float, csv_line), 2)) ] segment.append(data) return dataset
def move_data( self, source_remote_paths: Union[str, Iterable[str]], target_remote_paths: Union[None, str, Iterable[str]] = None, *, source_client: Optional["SegmentClient"] = None, strategy: str = "abort", ) -> None: """Move data to this segment, also used to rename data. Arguments: source_remote_paths: The source remote paths of the moved data. target_remote_paths: The target remote paths of the moved data. This argument is used to specify new remote paths of the moved data. If None, the remote path of the moved data will not be changed after copy. source_client: The source segment client of the moved data. This argument is used to specifies where the moved data comes from when the moved data is from another segment. If None, the moved data comes from this segment. strategy: The strategy of handling the name conflict. There are three options: 1. "abort": stop copying and raise exception; 2. "override": the source data will override the origin data; 3. "skip": keep the origin data. Raises: InvalidParamsError: When strategy is invalid. ValueError: When the type or the length of target_remote_paths is not equal with source_remote_paths. Or when the dataset_id and drafter_number of source_client is not equal with the current segment client. """ self._status.check_authority_for_draft() if strategy not in _STRATEGIES: raise InvalidParamsError(param_name="strategy", param_value=strategy) if not target_remote_paths: all_target_remote_paths = [] all_source_remote_paths = ([source_remote_paths] if isinstance( source_remote_paths, str) else list(source_remote_paths)) elif isinstance(source_remote_paths, str) and isinstance( target_remote_paths, str): all_target_remote_paths = [target_remote_paths] all_source_remote_paths = [source_remote_paths] elif not isinstance(source_remote_paths, str) and not isinstance( target_remote_paths, str): all_target_remote_paths = list(target_remote_paths) all_source_remote_paths = list(source_remote_paths) if len(all_target_remote_paths) != len(all_source_remote_paths): raise ValueError( "To move the data, the length of target_remote_paths " "must be equal with source_remote_paths") else: raise ValueError( "To move the data, the type of target_remote_paths " "must be equal with source_remote_paths") source = {} if source_client: if (source_client.status.draft_number == self.status.draft_number and source_client._dataset_id # pylint: disable=protected-access == self._dataset_id): source["segmentName"] = source_client.name else: raise ValueError( "To move the data, the dataset_id and drafter_number of source_client " "must be equal with the current segment client") else: source["segmentName"] = self.name post_data: Dict[str, Any] = { "strategy": strategy, "source": source, "segmentName": self.name, } post_data.update(self._status.get_status_info()) for targets, sources in zip_longest( chunked(all_target_remote_paths, 128), chunked(all_source_remote_paths, 128)): if targets: post_data["remotePaths"] = targets post_data["source"]["remotePaths"] = sources self._client.open_api_do("POST", "data?multipleMove", self._dataset_id, json=post_data)
def LISATrafficLight(path: str) -> Dataset: """`LISA Traffic Light <https://www.kaggle.com/mbornoe\ /lisa-traffic-light-dataset>`_ dataset. The file structure should be like:: <path> Annotations/Annotations/ daySequence1/ daySequence2/ dayTrain/ dayClip1/ dayClip10/ ... dayClip9/ nightSequence1/ nightSequence2/ nightTrain/ nightClip1/ nightClip2/ ... nightClip5/ daySequence1/daySequence1/ daySequence2/daySequence2/ dayTrain/dayTrain/ dayClip1/ dayClip10/ ... dayClip9/ nightSequence1/nightSequence1/ nightSequence2/nightSequence2/ nightTrain/nightTrain/ nightClip1/ nightClip2/ ... nightClip5/ Arguments: path: The root directory of the dataset. Returns: Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance. Raises: FileStructureError: When frame number is discontinuous. """ root_path = os.path.abspath(os.path.expanduser(path)) annotation_path = os.path.join(root_path, "Annotations", "Annotations") dataset = Dataset(DATASET_NAME) dataset.notes.is_continuous = True dataset.load_catalog( os.path.join(os.path.dirname(__file__), "catalog.json")) csv_paths = glob(os.path.join(annotation_path, "**", "*.csv"), recursive=True) for box_csv_path, bulb_csv_path in chunked(csv_paths, 2): segment = dataset.create_segment(_get_segment_name(box_csv_path)) prefix = _get_path_prefix(annotation_path, box_csv_path) classification = _get_classification(prefix) filedir = os.path.join(root_path, prefix) image_paths = glob(os.path.join(filedir, "*.jpg")) # Check the frame_number from filename: "daySequence1--00345.jpg" if _get_frame_number(image_paths[-1]) + 1 != len(image_paths): raise FileStructureError( f"Discontinuous frame number in '{filedir}'") for image_path in image_paths: data = Data(image_path) data.label.box2d = [] if classification: data.label.classification = Classification(classification) segment.append(data) _add_labels(segment, box_csv_path) _add_labels(segment, bulb_csv_path) return dataset