Example #1
0
def _load_client(
    team_slug: Optional[str] = None,
    offline: bool = False,
    maybe_guest: bool = False,
    dataset_identifier: Optional[str] = None,
):
    """Fetches a client, potentially offline

    Parameters
    ----------
    offline : bool
        Flag for using an offline client

    maybe_guest : bool
        Flag to make a guest client, if config is missing
    Returns
    -------
    Client
    The client requested
    """
    if not team_slug and dataset_identifier:
        team_slug = DatasetIdentifier.parse(dataset_identifier).team_slug
    try:
        config_dir = Path.home() / ".darwin" / "config.yaml"
        client = Client.from_config(config_dir, team_slug=team_slug)
        return client
    except MissingConfig:
        if maybe_guest:
            return Client.from_guest()
        else:
            _error("Authenticate first")
    except InvalidLogin:
        _error("Please re-authenticate")
    except Unauthenticated:
        _error("Please re-authenticate")
Example #2
0
def run_demo(
    *,
    team_slug: Optional[str],
    dataset_slug: Optional[str] = None,
    datasets_dir: Optional[str] = None,
    api_key: Optional[str] = None,
    config_path: Optional[Path] = None,
):
    """
    Download a Darwin dataset on the file system.

    Parameters
    ----------
    team_slug : str
        Slug of the team to select
    dataset_slug : str
        This is the dataset name with everything lower-case, removed specials characters and
        spaces are replaced by dashes, e.g., `bird-species`. This string is unique within a team
    datasets_dir : Path
        Path where the client should be initialized from (aka the root path)
    api_key: str
        API key to authenticate the client
    config_path: Path
        Path to a configuration path which contains the authentication information to use

    Returns
    -------
    splits : dict
        Keys are the different splits (random, tags, ...) and values are the relative file names
    """
    # Authenticate the new KEY if available
    if api_key is not None:
        authenticate(api_key=api_key,
                     default_team=True,
                     datasets_dir=datasets_dir)
    # Get the client used to perform remote operations
    if config_path is not None:
        client = Client.from_config(config_path=config_path)
    else:
        client = Client.local(team_slug=team_slug)
    # Create a dataset identifier
    dataset_identifier = DatasetIdentifier.from_slug(dataset_slug=dataset_slug,
                                                     team_slug=team_slug)
    # Get an object representing the remote dataset
    ds = client.get_remote_dataset(dataset_identifier=dataset_identifier)
    # Download the dataset on the local file system
    ds.pull()
    # Split the dataset in train/val/test
    splits = split_dataset(dataset=ds)
Example #3
0
def _load_client(team: Optional[str] = None, offline: bool = False):
    """Fetches a client, potentially offline

    Parameters
    ----------
    offline : bool
        Flag for using an offline client

    Returns
    -------
    Client
    The client requested
    """
    try:
        config_dir = Path.home() / ".darwin" / "config.yaml"
        client = Client.from_config(config_dir, team_slug=team)
        return client
    except MissingConfig:
        _error("Authenticate first")
    except InvalidLogin:
        _error("Please re-authenticate")
    except Unauthenticated:
        _error("Please re-authenticate")
Example #4
0
def get_darwin_dataset(
        *,
        team_slug: Optional[str] = None,
        dataset_slug: Optional[str] = None,
        dataset_id: Optional[str] = None,
        projects_dir: Optional[str] = None,
        token: Optional[str] = None,
        config_path: Optional[Path] = None,
        email: Optional[str] = None,
        password: Optional[str] = None,
        val_percentage: Optional[float] = 0.1,
        test_percentage: Optional[float] = 0.2,
        force_resplit: Optional[bool] = False,
        split_seed: Optional[int] = 42
):
    """
    Download a Darwin dataset on the file system.
    It is possible to select the way to authenticate and the configuration of
    the split of the dataset

    Parameters
    ----------
    team_slug : str
        Slug of the team to select
    dataset_slug : str
        This is the dataset name with everything lower-case, removed specials characters and
        spaces are replaced by dashes, e.g., `bird-species`. This string is unique within a team
    projects_dir : Path
        Path where the client should be initialized from (aka the root path)
    token : str
        Access token used to auth a specific request. It has a time spans of roughly 8min. to
    config_path : str
        Path to a configuration file to use to create the client
    email : str
        Email of the Darwin user to use for the login
    password : str
        Password of the Darwin user to use for the login
    val_percentage : float
        Percentage of images used in the validation set
    test_percentage : float
        Percentage of images used in the test set
    force_resplit : bool
        Discard previous split and create a new one
    split_seed : in
        Fix seed for random split creation

    Returns
    -------
    splits : dict
        Keys are the different splits (random, tags, ...) and values are the relative file names
    """
    # Authenticate client. The priority of the cases is arbitrarily chosen and should actually not matter
    if email is not None and password is not None:
        client = Client.login(email=email, password=password, projects_dir=projects_dir)
    elif token is not None:
        client = Client.from_token(token=token, projects_dir=projects_dir)
    elif config_path is not None:
        client = Client.from_config(config_path=config_path)
    else:
        client = Client.default(projects_dir=projects_dir)

    # Select the desired team
    if team_slug is not None:
        client.set_team(slug=team_slug)
    # Get the remote dataset
    dataset = client.get_remote_dataset(slug=dataset_slug, dataset_id=dataset_id)
    # Download the data on the file system
    dataset.pull()
    # Split the dataset with the param required
    return split_dataset(
        dataset=dataset,
        val_percentage=val_percentage,
        test_percentage=test_percentage,
        force_resplit=force_resplit,
        split_seed=split_seed
    )