def get_dataset( dataset_slug: str, dataset_type: str, partition: Optional[str] = None, split: str = "default", split_type: str = "random", transform: Optional[List] = None, ) -> LocalDataset: """ Creates and returns a dataset Parameters ---------- dataset_slug: str Slug of the dataset to retrieve dataset_type: str The type of dataset [classification, instance-segmentation, object-detection, semantic-segmentation] partition: str Selects one of the partitions [train, val, test, None]. (Default: None) split: str Selects the split that defines the percentages used. (Default: 'default') split_type: str Heuristic used to do the split [random, stratified]. (Default: 'random') transform : list[torchvision.transforms] List of PyTorch transforms """ dataset_functions = { "classification": ClassificationDataset, "instance-segmentation": InstanceSegmentationDataset, "semantic-segmentation": SemanticSegmentationDataset, "object-detection": ObjectDetectionDataset, } dataset_function = dataset_functions.get(dataset_type) if not dataset_function: list_of_types = ", ".join(dataset_functions.keys()) return _error(f"dataset_type needs to be one of '{list_of_types}'") identifier = DatasetIdentifier.parse(dataset_slug) client = _load_client(offline=True) for p in client.list_local_datasets(team_slug=identifier.team_slug): if identifier.dataset_slug == p.name: return dataset_function( dataset_path=p, partition=partition, split=split, split_type=split_type, release_name=identifier.version, transform=transform, ) _error(f"Dataset '{identifier.dataset_slug}' does not exist locally. " f"Use 'darwin dataset remote' to see all the available datasets, " f"and 'darwin dataset pull' to pull them.")
def main() -> None: """ Executes the main function of program. Raises ------ Unauthorized If the API key with which the use is authenticated does not grant access for the given action. Unauthenticated If a given action needs authentication and you are not authenticated. InvalidTeam If you are trying to use a team that is not specified in the configuration file. To fix this please authenticate with the given team first. requests.exceptions.ConnectionError If there is a connection issue. """ args, parser = Options().parse_args() try: _run(args, parser) except Unauthorized: f._error("Your API key is not authorized to do that action.") except Unauthenticated: f._error("You need to specify a valid API key to do that action.") except InvalidTeam: f._error("The team specified is not in the configuration, please authenticate first.") except requests.exceptions.ConnectionError: f._error("Darwin seems unreachable, please try again in a minute or contact support.")
def main(): args, parser = Options().parse_args() try: run(args, parser) except Unauthorized: f._error("Your API key is not authorized to do that action.") except Unauthenticated: f._error("You need to specify a valid API key to do that action.") except InvalidTeam: f._error("The team specified is not in the configuration, please authenticate first.") except requests.exceptions.ConnectionError: f._error("Darwin seems unreachable, please try again in a minute or contact support.")
def detectron2_register_dataset( dataset: str, release_name: Optional[str] = "latest", partition: Optional[str] = None, split: Optional[str] = "default", split_type: Optional[str] = "stratified", evaluator_type: Optional[str] = None, ) -> str: """Registers a local Darwin-formatted dataset in Detectron2 Parameters ---------- dataset: str Dataset slug release_name: str Version of the dataset partition: str Selects one of the partitions [train, val, test] split Selects the split that defines the percetages used (use 'default' to select the default split) split_type: str Heuristic used to do the split [random, stratified] evaluator_type: str Evaluator to be used in the val and test sets """ try: from detectron2.data import DatasetCatalog, MetadataCatalog except ImportError: print("Detectron2 not found.") sys.exit(1) from darwin.dataset.utils import get_annotations, get_classes dataset_path: Optional[Path] = None if os.path.isdir(dataset): dataset_path = Path(dataset) else: identifier = DatasetIdentifier.parse(dataset) if identifier.version: release_name = identifier.version client = _load_client(offline=True) dataset_path = None for path in client.list_local_datasets(team_slug=identifier.team_slug): if identifier.dataset_slug == path.name: dataset_path = path if not dataset_path: _error( f"Dataset '{identifier.dataset_slug}' does not exist locally. " f"Use 'darwin dataset remote' to see all the available datasets, " f"and 'darwin dataset pull' to pull them.") catalog_name = f"darwin_{dataset_path.name}" if partition: catalog_name += f"_{partition}" classes = get_classes(dataset_path=dataset_path, release_name=release_name, annotation_type="polygon") DatasetCatalog.register( catalog_name, lambda partition=partition: list( get_annotations( dataset_path, partition=partition, split=split, split_type=split_type, release_name=release_name, annotation_type="polygon", annotation_format="coco", ignore_inconsistent_examples=True, )), ) MetadataCatalog.get(catalog_name).set(thing_classes=classes) if evaluator_type: MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type) return catalog_name