Example #1
0
def test_auth_api_access_token_property_no_refresh():
    client = RESTAPIClient(_REMOTE)

    # Test that we raise an error if there's no refresh token in the config
    # (there won't be if we just use the default config).

    with pytest.raises(AuthAPIError) as e:
        token = client.access_token
        assert "No refresh token found in the config" in str(e)
Example #2
0
def curl_c(remote, request_type, image, request_params, curl_args):
    """
    Query a Splitgraph REST API.

    This is a thin wrapper around curl that performs an HTTP request to Splitgraph Cloud to
    interact with a dataset using PostgREST (http://postgrest.org) or the Splitfile execution service.

    The actual invocation is:

    ```
    curl [API endpoint][request] -H [access_token] [extra curl args].
    ```

    The image must be of the form `namespace/repository:[hash_or_tag (default latest)]`.

    The actual request parameters depend on the request type:

      * For PostgREST: `/table?[postgrest request]` or empty to get the OpenAPI spec for this image.
        For a reference on how to perform Postgrest requests, see http://postgrest.org/en/latest/api.html.
      * For the Splitfile executor: a JSON array to be POSTed to the executor, e.g.
        `'{"command": "FROM some/repo IMPORT some_table AS alias", "tag": "new_tag"}'`.

    `--curl-args` allows to pass extra arguments to curl. Note that every argument must be prefixed
    with `--curl-args`, e.g. `--curl-args --cacert --curl-args /path/to/ca.pem`.
    """
    from splitgraph.config import CONFIG
    from splitgraph.cloud import RESTAPIClient, get_headers

    repository, hash_or_tag = image

    # Craft a request
    config = CONFIG["remotes"][remote]
    access_token = RESTAPIClient(remote).access_token
    headers = get_headers()
    headers.update({"Authorization": "Bearer " + access_token})

    if request_type == "postgrest":
        if request_params and not request_params.startswith("/"):
            request_params = "/" + request_params
        full_request = (config["SG_QUERY_API"] + "/%s/%s" %
                        (str(repository), str(hash_or_tag)) + "/-/rest" +
                        request_params)
    else:
        full_request = (config["SG_QUERY_API"] + "/%s/%s" %
                        (str(repository), str(hash_or_tag)) + "/-/splitfile")
        curl_args = ["-X", "POST", "-d", request_params] + list(curl_args)
        headers.update({"Content-Type": "application/json"})

    header_invocation = [
        h for i in headers.items() for h in ("-H", "%s: %s" % i)
    ]
    subprocess_args = ["curl", full_request
                       ] + header_invocation + list(curl_args)

    logging.debug("Calling %s", " ".join(subprocess_args))
    subprocess.call(subprocess_args)
Example #3
0
def _do_version_check():
    """Do a pre-flight version check -- by default we only do it once a day"""
    from splitgraph.cloud import RESTAPIClient
    from packaging.version import Version
    from splitgraph.config import CONFIG

    api_client = RESTAPIClient(CONFIG["SG_UPDATE_REMOTE"])
    latest = api_client.get_latest_version()

    if not latest:
        return

    if Version(latest) > Version(__version__):
        click.echo(
            "You are using sgr version %s, however version %s is available." %
            (__version__, latest))
        click.echo(
            "Consider upgrading by running sgr upgrade or pip install -U splitgraph."
        )
        click.echo(
            "Disable this message by setting SG_UPDATE_FREQUENCY=0 in your .sgconfig."
        )
Example #4
0
def test_auth_api_access_token_property_expired():
    client = RESTAPIClient(_REMOTE)

    # strictly speaking, we should use freezegun or patch time here,
    # but by default AuthClient is supposed to refresh the token 30s
    # before it actually expires.
    now = time.time()
    old_token = _make_dummy_access_token(now)
    new_token = _make_dummy_access_token(now + 1800)
    refresh_token = "EEEEFFFFGGGGHHHH"

    def callback(request, uri, response_headers):
        assert json.loads(request.body) == {"refresh_token": refresh_token}
        return [200, response_headers, json.dumps({"access_token": new_token})]

    httpretty.register_uri(httpretty.HTTPretty.POST, _ENDPOINT + "/access_token", body=callback)

    with patch(
        "splitgraph.cloud.create_config_dict",
        return_value={
            "remotes": {
                _REMOTE: {
                    "SG_CLOUD_ACCESS_TOKEN": old_token,
                    "SG_CLOUD_REFRESH_TOKEN": refresh_token,
                }
            },
            "SG_CONFIG_FILE": ".sgconfig",
        },
    ):
        with patch("splitgraph.cloud.overwrite_config") as oc:
            token = client.access_token

    oc.assert_called_once_with(
        {
            "remotes": {
                _REMOTE: {
                    "SG_CLOUD_ACCESS_TOKEN": new_token,
                    "SG_CLOUD_REFRESH_TOKEN": refresh_token,
                }
            },
            "SG_CONFIG_FILE": ".sgconfig",
        },
        ".sgconfig",
    )
Example #5
0
def load_c(remote, readme_dir, repositories_file, limit_repositories):
    """
    Load a Splitgraph catalog from a YAML file.

    This will load a repositories.yml file and the `readmes` subdirectory produced by
    `sgr cloud dump` back into a remote Splitgraph catalog.

    The format is an extension of the format accepted by `sgr cloud metadata` to include multiple
    repositories. README files are read from the `readmes` subdirectory.

    \b
    ```
    credentials:      # Optional credentials to access remote data sources
      my_bucket:
        plugin: csv
        data:
          s3_access_key: ...
          s3_secret_key: ...
    repositories:
    - namespace: my_username
      repository: repository
      metadata:
        readme: dataset-readme.md
        description: Dataset description (160 characters max).
        topics:
          - topic_1
          - topic_2
        sources:
          - anchor: Source
            href: https://www.splitgraph.com
            isCreator: true
            isSameAs: false
          - anchor: Source 2
            href: https://www.splitgraph.com
            isCreator: false
            isSameAs: true
        license: Public Domain
        extra_metadata:
          key_1:
            key_1_1: value_1_1
            key_1_2: value_1_2
          key_2:
            key_2_1: value_2_1
            key_2_2: value_2_2
      external:
        credential: my_bucket
        plugin: csv
        params:
          s3_bucket: my_bucket
        tables:
          table_1:
            schema:
            - name: column_1
              type: text
            - name: column_2
              type: integer
            options:
              s3_object: some/s3_key.csv
    ```
    """
    import yaml
    from splitgraph.cloud import GQLAPIClient
    from splitgraph.cloud import RESTAPIClient

    repo_yaml = RepositoriesYAML.parse_obj(yaml.safe_load(repositories_file))

    # Set up and load credential IDs from the remote to allow users to refer to them by ID
    # or by a name.
    rest_client = RESTAPIClient(remote)
    gql_client = GQLAPIClient(remote)
    credential_map = _build_credential_map(rest_client,
                                           credentials=repo_yaml.credentials
                                           or {})

    repositories = repo_yaml.repositories

    if limit_repositories:
        repositories = [
            r for r in repositories
            if f"{r.namespace}/{r.repository}" in limit_repositories
        ]

    with tqdm(repositories) as t:
        for repository in t:
            t.set_description(
                f"{repository.namespace}/{repository.repository}")
            if repository.external:
                rest_client.upsert_external(repository.namespace,
                                            repository.repository,
                                            repository.external,
                                            credential_map)
            if repository.metadata:
                metadata = _prepare_metadata(repository.metadata,
                                             readme_basedir=readme_dir)
                gql_client.upsert_metadata(repository.namespace,
                                           repository.repository, metadata)
Example #6
0
def login_c(username, password, remote, overwrite, skip_inject):
    """Log into a Splitgraph registry with username/password.

    This will generate a new refresh token (to use the Splitgraph query API)
    and API keys to let sgr access the registry (if they don't already exist
    in the configuration file or if the actual username has changed).

    Note that if you already have generated an API key pair but it's not
    in the configuration file, this will generate a new pair instead of
    restoring the existing one, as the API secret is only stored in the configuration file.

    If you want to log in using an existing API key pair, use `sgr cloud login-api` instead.
    """
    from splitgraph.config import CONFIG
    from splitgraph.config.config import get_all_in_subsection
    from splitgraph.cloud import RESTAPIClient, get_token_claim, DEFAULT_REMOTES

    client = RESTAPIClient(remote)

    if not password:
        profile_url = _construct_user_profile_url(client.endpoint)
        password = click.prompt(
            text="Password (visit %s if you don't have it)" % profile_url,
            confirmation_prompt=False,
            hide_input=True,
        )

    access, refresh = client.get_refresh_token(username, password)

    # Extract namespace from the access token since we might have logged in with an e-mail.
    namespace = get_token_claim(access, "username")

    click.echo("Logged into %s as %s" % (remote, namespace))

    config_remote_params = get_all_in_subsection(CONFIG, "remotes", remote)

    remote_params = copy(DEFAULT_REMOTES.get(
        remote, {})) if not config_remote_params else {}
    remote_params.update({
        "SG_NAMESPACE": namespace,
        "SG_CLOUD_REFRESH_TOKEN": refresh,
        "SG_CLOUD_ACCESS_TOKEN": access,
    })

    config_patch = {
        "SG_REPO_LOOKUP": _update_repo_lookup(CONFIG, remote),
        "remotes": {
            remote: remote_params
        },
    }

    # Get new tokens in any case if we're logging in under a different username.
    try:
        username_changed = namespace != CONFIG["remotes"][remote][
            "SG_NAMESPACE"]
    except KeyError:
        username_changed = False

    if ("SG_ENGINE_USER" not in config_remote_params
            or "SG_ENGINE_PWD" not in config_remote_params or overwrite
            or username_changed):
        key, secret = client.create_machine_credentials(access, password)
        config_patch["remotes"][remote]["SG_ENGINE_USER"] = key
        config_patch["remotes"][remote]["SG_ENGINE_PWD"] = secret
        click.echo("Acquired new API keys")

    config_path = patch_and_save_config(CONFIG, config_patch)

    if not skip_inject:
        inject_config_into_engines(CONFIG["SG_ENGINE_PREFIX"], config_path)