Beispiel #1
0
def split_path(path: str) -> Tuple[str, str]:
    if not path.startswith("gs://"):
        raise Error(f"Invalid path: '{path}'")
    path = path[len("gs://"):]
    bucket, _, obj = path.partition("/")
    if bucket == "":
        raise Error(f"Invalid path: '{path}'")
    return bucket, obj
Beispiel #2
0
def split_url_https(path: str) -> Tuple[str, str, str]:
    parts = path[len("https://"):].split("/")
    if len(parts) < 2:
        raise Error(f"Invalid path: '{path}'")
    hostname = parts[0]
    container = parts[1]
    if not hostname.endswith(".blob.core.windows.net") or container == "":
        raise Error(f"Invalid path: '{path}'")
    obj = "/".join(parts[2:])
    account = hostname.split(".")[0]
    return account, container, obj
Beispiel #3
0
def load_credentials() -> Mapping[str, str]:
    if "AZURE_STORAGE_ACCOUNT_KEY" in os.environ:
        return dict(storageAccountKey=os.environ["AZURE_STORAGE_ACCOUNT_KEY"])

    if "AZURE_APPLICATION_CREDENTIALS" in os.environ:
        creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"]
        if not os.path.exists(creds_path):
            raise Error(
                f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'"
            )
        with open(creds_path) as f:
            return json.load(f)

    if "AZURE_CLIENT_ID" in os.environ:
        return dict(
            appId=os.environ["AZURE_CLIENT_ID"],
            password=os.environ["AZURE_CLIENT_SECRET"],
            tenant=os.environ["AZURE_TENANT_ID"],
        )

    # look for a refresh token in the az command line credentials
    # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/
    default_creds_path = os.path.expanduser("~/.azure/accessTokens.json")
    if os.path.exists(default_creds_path):
        default_profile_path = os.path.expanduser("~/.azure/azureProfile.json")
        if not os.path.exists(default_profile_path):
            raise Error(f"Missing default profile path: '{default_profile_path}'")
        with open(default_profile_path, "rb") as f:
            # this file has a UTF-8 BOM
            profile = json.loads(f.read().decode("utf-8-sig"))
            subscriptions = [sub["id"] for sub in profile["subscriptions"]]

        with open(default_creds_path) as f:
            tokens = json.load(f)
            best_token = None
            for token in tokens:
                if best_token is None:
                    best_token = token
                else:
                    if token["expiresOn"] > best_token["expiresOn"]:
                        best_token = token
            if best_token is not None:
                token = copy.copy(best_token)
                token["subscriptions"] = subscriptions
                return token

    raise Error(
        """Azure credentials not found, please do one of the following:

1) Log in with 'az login', blobfile will use your default credentials to lookup your storage account key
2) Set the environment variable 'AZURE_STORAGE_ACCOUNT_KEY' to your storage account key which you can find by following this guide: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage
3) Create an account with 'az ad sp create-for-rbac --name <name>' and set the 'AZURE_APPLICATION_CREDENTIALS' environment variable to the path of the output from that command or individually set the 'AZURE_CLIENT_ID', 'AZURE_CLIENT_SECRET', and 'AZURE_TENANT_ID' environment variables"""
    )
Beispiel #4
0
def create_access_token_request(scopes: List[str]) -> Request:
    creds, err = load_credentials()
    if err is not None:
        raise Error(err)
    if "private_key" in creds:
        # looks like GCS does not support the no-oauth flow https://developers.google.com/identity/protocols/OAuth2ServiceAccount#jwt-auth
        return _create_token_request(creds["client_email"],
                                     creds["private_key"], scopes)
    elif "refresh_token" in creds:
        return _refresh_access_token_request(
            refresh_token=creds["refresh_token"],
            client_id=creds["client_id"],
            client_secret=creds["client_secret"],
        )
    else:
        raise Error("Credentials not recognized")
Beispiel #5
0
def split_url(path: str) -> Tuple[str, str, str]:
    if path.startswith("az://"):
        return split_url_az(path)
    elif path.startswith("https://"):
        return split_url_https(path)
    else:
        raise Error(f"Invalid path: '{path}'")
Beispiel #6
0
def split_url_az(path: str) -> Tuple[str, str, str]:
    parts = path[len("az://"):].split("/")
    if len(parts) < 2:
        raise Error(f"Invalid path: '{path}'")
    account = parts[0]
    container = parts[1]
    obj = "/".join(parts[2:])
    return account, container, obj
Beispiel #7
0
def generate_signed_url(
    bucket: str,
    name: str,
    expiration: float,
    method: str = "GET",
    params: Optional[Mapping[str, str]] = None,
    headers: Optional[Mapping[str, str]] = None,
) -> Tuple[str, Optional[float]]:
    if params is None:
        p = {}
    else:
        p = dict(params).copy()

    if headers is None:
        h = {}
    else:
        h = dict(headers).copy()

    # https://cloud.google.com/storage/docs/access-control/signing-urls-manually
    creds, err = load_credentials()
    if err is not None:
        raise Error(err)
    if "private_key" not in creds:
        raise Error(
            "Private key not found in credentials.  Please set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to a JSON key for a service account to use this call"
        )

    if expiration > MAX_EXPIRATION:
        raise Error(
            f"Expiration can't be longer than {MAX_EXPIRATION} seconds.")

    escaped_object_name = urllib.parse.quote(name, safe="")
    canonical_uri = f"/{bucket}/{escaped_object_name}"

    datetime_now = datetime.datetime.utcnow()
    request_timestamp = datetime_now.strftime("%Y%m%dT%H%M%SZ")
    datestamp = datetime_now.strftime("%Y%m%d")

    credential_scope = f"{datestamp}/auto/storage/goog4_request"
    credential = f"{creds['client_email']}/{credential_scope}"
    h["host"] = "storage.googleapis.com"

    canonical_headers = ""
    ordered_headers = sorted(h.items())
    for k, v in ordered_headers:
        lower_k = str(k).lower()
        strip_v = str(v).lower()
        canonical_headers += f"{lower_k}:{strip_v}\n"

    signed_headers_parts = []
    for k, _ in ordered_headers:
        lower_k = str(k).lower()
        signed_headers_parts.append(lower_k)
    signed_headers = ";".join(signed_headers_parts)

    p["X-Goog-Algorithm"] = "GOOG4-RSA-SHA256"
    p["X-Goog-Credential"] = credential
    p["X-Goog-Date"] = request_timestamp
    p["X-Goog-Expires"] = str(expiration)
    p["X-Goog-SignedHeaders"] = signed_headers

    canonical_query_string_parts = []
    ordered_params = sorted(p.items())
    for k, v in ordered_params:
        encoded_k = urllib.parse.quote(str(k), safe="")
        encoded_v = urllib.parse.quote(str(v), safe="")
        canonical_query_string_parts.append(f"{encoded_k}={encoded_v}")
    canonical_query_string = "&".join(canonical_query_string_parts)

    canonical_request = "\n".join([
        method,
        canonical_uri,
        canonical_query_string,
        canonical_headers,
        signed_headers,
        "UNSIGNED-PAYLOAD",
    ])

    canonical_request_hash = hashlib.sha256(
        canonical_request.encode()).hexdigest()

    string_to_sign = "\n".join([
        "GOOG4-RSA-SHA256",
        request_timestamp,
        credential_scope,
        canonical_request_hash,
    ])

    signature = binascii.hexlify(
        _sign(creds["private_key"],
              string_to_sign.encode("utf8"))).decode("utf8")
    host_name = "https://storage.googleapis.com"
    signed_url = f"{host_name}{canonical_uri}?{canonical_query_string}&X-Goog-Signature={signature}"
    return signed_url, expiration
Beispiel #8
0
def load_credentials() -> Dict[str, Any]:
    # https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/identity/azure-identity#environment-variables
    # AZURE_STORAGE_KEY seems to be the environment variable mentioned by the az cli
    # AZURE_STORAGE_ACCOUNT_KEY is mentioned elsewhere on the internet
    for varname in ["AZURE_STORAGE_KEY", "AZURE_STORAGE_ACCOUNT_KEY"]:
        if varname in os.environ:
            result = dict(storageAccountKey=os.environ[varname])
            if "AZURE_STORAGE_ACCOUNT" in os.environ:
                result["account"] = os.environ["AZURE_STORAGE_ACCOUNT"]
            return result

    if "AZURE_APPLICATION_CREDENTIALS" in os.environ:
        creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"]
        if not os.path.exists(creds_path):
            raise Error(
                f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'"
            )
        with open(creds_path) as f:
            return json.load(f)

    if "AZURE_CLIENT_ID" in os.environ:
        return dict(
            appId=os.environ["AZURE_CLIENT_ID"],
            password=os.environ["AZURE_CLIENT_SECRET"],
            tenant=os.environ["AZURE_TENANT_ID"],
        )

    if "AZURE_STORAGE_CONNECTION_STRING" in os.environ:
        connection_data = {}
        # technically this should be parsed according to the rules in https://www.connectionstrings.com/formating-rules-for-connection-strings/
        for part in os.environ["AZURE_STORAGE_CONNECTION_STRING"].split(";"):
            key, _, val = part.partition("=")
            connection_data[key.lower()] = val
        return dict(
            account=connection_data["accountname"],
            storageAccountKey=connection_data["accountkey"],
        )

    # look for a refresh token in the az command line credentials
    # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/
    default_creds_path = os.path.expanduser("~/.azure/accessTokens.json")
    if os.path.exists(default_creds_path):
        default_profile_path = os.path.expanduser("~/.azure/azureProfile.json")
        if not os.path.exists(default_profile_path):
            raise Error(
                f"Missing default profile path: '{default_profile_path}'")
        with open(default_profile_path, "rb") as f:
            # this file has a UTF-8 BOM
            profile = json.loads(f.read().decode("utf-8-sig"))
        subscriptions = profile["subscriptions"]

        def key_fn(x: Mapping[str, Any]) -> bool:
            return x["isDefault"]

        subscriptions.sort(key=key_fn, reverse=True)
        subscription_ids = [sub["id"] for sub in subscriptions]

        with open(default_creds_path) as f:
            tokens = json.load(f)
            best_token = None
            for token in tokens:
                if best_token is None:
                    best_token = token
                else:
                    # expiresOn may be missing for tokens from service principals
                    if token.get("expiresOn", "") > best_token.get(
                            "expiresOn", ""):
                        best_token = token
            if best_token is not None:
                token = best_token.copy()
                token["subscription_ids"] = subscription_ids
                return token

    return {}
Beispiel #9
0
def load_credentials() -> Mapping[str, str]:
    # https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/identity/azure-identity#environment-variables
    # AZURE_STORAGE_KEY seems to be the environment variable mentioned by the az cli
    # AZURE_STORAGE_ACCOUNT_KEY is mentioned elsewhere on the internet
    for varname in ["AZURE_STORAGE_KEY", "AZURE_STORAGE_ACCOUNT_KEY"]:
        if varname in os.environ:
            result = dict(storageAccountKey=os.environ[varname])
            if "AZURE_STORAGE_ACCOUNT" in os.environ:
                result["account"] = os.environ["AZURE_STORAGE_ACCOUNT"]
            return result

    if "AZURE_APPLICATION_CREDENTIALS" in os.environ:
        creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"]
        if not os.path.exists(creds_path):
            raise Error(
                f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'"
            )
        with open(creds_path) as f:
            return json.load(f)

    if "AZURE_CLIENT_ID" in os.environ:
        return dict(
            appId=os.environ["AZURE_CLIENT_ID"],
            password=os.environ["AZURE_CLIENT_SECRET"],
            tenant=os.environ["AZURE_TENANT_ID"],
        )

    if "AZURE_STORAGE_CONNECTION_STRING" in os.environ:
        connection_data = {}
        # technically this should be parsed according to the rules in https://www.connectionstrings.com/formating-rules-for-connection-strings/
        for part in os.environ["AZURE_STORAGE_CONNECTION_STRING"].split(";"):
            key, _, val = part.partition("=")
            connection_data[key.lower()] = val
        return dict(
            account=connection_data["accountname"],
            storageAccountKey=connection_data["accountkey"],
        )

    # look for a refresh token in the az command line credentials
    # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/
    default_creds_path = os.path.expanduser("~/.azure/accessTokens.json")
    if os.path.exists(default_creds_path):
        default_profile_path = os.path.expanduser("~/.azure/azureProfile.json")
        if not os.path.exists(default_profile_path):
            raise Error(
                f"Missing default profile path: '{default_profile_path}'")
        with open(default_profile_path, "rb") as f:
            # this file has a UTF-8 BOM
            profile = json.loads(f.read().decode("utf-8-sig"))
            subscriptions = [sub["id"] for sub in profile["subscriptions"]]

        with open(default_creds_path) as f:
            tokens = json.load(f)
            best_token = None
            for token in tokens:
                if best_token is None:
                    best_token = token
                else:
                    if token["expiresOn"] > best_token["expiresOn"]:
                        best_token = token
            if best_token is not None:
                token = best_token.copy()
                token["subscriptions"] = subscriptions
                return token

    raise Error("""Azure credentials not found, please do one of the following:

1) Log in with 'az login', blobfile will use your default credentials to lookup your storage account key
2) Set the environment variable 'AZURE_STORAGE_KEY' to your storage account key which you can find by following this guide: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage
3) Create an account with 'az ad sp create-for-rbac --name <name>' and set the 'AZURE_APPLICATION_CREDENTIALS' environment variable to the path of the output from that command or individually set the 'AZURE_CLIENT_ID', 'AZURE_CLIENT_SECRET', and 'AZURE_TENANT_ID' environment variables"""
                )