def _load_client( team_slug: Optional[str] = None, offline: bool = False, maybe_guest: bool = False, dataset_identifier: Optional[str] = None, ): """Fetches a client, potentially offline Parameters ---------- offline : bool Flag for using an offline client maybe_guest : bool Flag to make a guest client, if config is missing Returns ------- Client The client requested """ if not team_slug and dataset_identifier: team_slug = DatasetIdentifier.parse(dataset_identifier).team_slug try: config_dir = Path.home() / ".darwin" / "config.yaml" client = Client.from_config(config_dir, team_slug=team_slug) return client except MissingConfig: if maybe_guest: return Client.from_guest() else: _error("Authenticate first") except InvalidLogin: _error("Please re-authenticate") except Unauthenticated: _error("Please re-authenticate")
def run_demo( *, team_slug: Optional[str], dataset_slug: Optional[str] = None, datasets_dir: Optional[str] = None, api_key: Optional[str] = None, config_path: Optional[Path] = None, ): """ Download a Darwin dataset on the file system. Parameters ---------- team_slug : str Slug of the team to select dataset_slug : str This is the dataset name with everything lower-case, removed specials characters and spaces are replaced by dashes, e.g., `bird-species`. This string is unique within a team datasets_dir : Path Path where the client should be initialized from (aka the root path) api_key: str API key to authenticate the client config_path: Path Path to a configuration path which contains the authentication information to use Returns ------- splits : dict Keys are the different splits (random, tags, ...) and values are the relative file names """ # Authenticate the new KEY if available if api_key is not None: authenticate(api_key=api_key, default_team=True, datasets_dir=datasets_dir) # Get the client used to perform remote operations if config_path is not None: client = Client.from_config(config_path=config_path) else: client = Client.local(team_slug=team_slug) # Create a dataset identifier dataset_identifier = DatasetIdentifier.from_slug(dataset_slug=dataset_slug, team_slug=team_slug) # Get an object representing the remote dataset ds = client.get_remote_dataset(dataset_identifier=dataset_identifier) # Download the dataset on the local file system ds.pull() # Split the dataset in train/val/test splits = split_dataset(dataset=ds)
def _load_client(team: Optional[str] = None, offline: bool = False): """Fetches a client, potentially offline Parameters ---------- offline : bool Flag for using an offline client Returns ------- Client The client requested """ try: config_dir = Path.home() / ".darwin" / "config.yaml" client = Client.from_config(config_dir, team_slug=team) return client except MissingConfig: _error("Authenticate first") except InvalidLogin: _error("Please re-authenticate") except Unauthenticated: _error("Please re-authenticate")
def get_darwin_dataset( *, team_slug: Optional[str] = None, dataset_slug: Optional[str] = None, dataset_id: Optional[str] = None, projects_dir: Optional[str] = None, token: Optional[str] = None, config_path: Optional[Path] = None, email: Optional[str] = None, password: Optional[str] = None, val_percentage: Optional[float] = 0.1, test_percentage: Optional[float] = 0.2, force_resplit: Optional[bool] = False, split_seed: Optional[int] = 42 ): """ Download a Darwin dataset on the file system. It is possible to select the way to authenticate and the configuration of the split of the dataset Parameters ---------- team_slug : str Slug of the team to select dataset_slug : str This is the dataset name with everything lower-case, removed specials characters and spaces are replaced by dashes, e.g., `bird-species`. This string is unique within a team projects_dir : Path Path where the client should be initialized from (aka the root path) token : str Access token used to auth a specific request. It has a time spans of roughly 8min. to config_path : str Path to a configuration file to use to create the client email : str Email of the Darwin user to use for the login password : str Password of the Darwin user to use for the login val_percentage : float Percentage of images used in the validation set test_percentage : float Percentage of images used in the test set force_resplit : bool Discard previous split and create a new one split_seed : in Fix seed for random split creation Returns ------- splits : dict Keys are the different splits (random, tags, ...) and values are the relative file names """ # Authenticate client. The priority of the cases is arbitrarily chosen and should actually not matter if email is not None and password is not None: client = Client.login(email=email, password=password, projects_dir=projects_dir) elif token is not None: client = Client.from_token(token=token, projects_dir=projects_dir) elif config_path is not None: client = Client.from_config(config_path=config_path) else: client = Client.default(projects_dir=projects_dir) # Select the desired team if team_slug is not None: client.set_team(slug=team_slug) # Get the remote dataset dataset = client.get_remote_dataset(slug=dataset_slug, dataset_id=dataset_id) # Download the data on the file system dataset.pull() # Split the dataset with the param required return split_dataset( dataset=dataset, val_percentage=val_percentage, test_percentage=test_percentage, force_resplit=force_resplit, split_seed=split_seed )