def split_path(path: str) -> Tuple[str, str]: if not path.startswith("gs://"): raise Error(f"Invalid path: '{path}'") path = path[len("gs://"):] bucket, _, obj = path.partition("/") if bucket == "": raise Error(f"Invalid path: '{path}'") return bucket, obj
def split_url_https(path: str) -> Tuple[str, str, str]: parts = path[len("https://"):].split("/") if len(parts) < 2: raise Error(f"Invalid path: '{path}'") hostname = parts[0] container = parts[1] if not hostname.endswith(".blob.core.windows.net") or container == "": raise Error(f"Invalid path: '{path}'") obj = "/".join(parts[2:]) account = hostname.split(".")[0] return account, container, obj
def load_credentials() -> Mapping[str, str]: if "AZURE_STORAGE_ACCOUNT_KEY" in os.environ: return dict(storageAccountKey=os.environ["AZURE_STORAGE_ACCOUNT_KEY"]) if "AZURE_APPLICATION_CREDENTIALS" in os.environ: creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"] if not os.path.exists(creds_path): raise Error( f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'" ) with open(creds_path) as f: return json.load(f) if "AZURE_CLIENT_ID" in os.environ: return dict( appId=os.environ["AZURE_CLIENT_ID"], password=os.environ["AZURE_CLIENT_SECRET"], tenant=os.environ["AZURE_TENANT_ID"], ) # look for a refresh token in the az command line credentials # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/ default_creds_path = os.path.expanduser("~/.azure/accessTokens.json") if os.path.exists(default_creds_path): default_profile_path = os.path.expanduser("~/.azure/azureProfile.json") if not os.path.exists(default_profile_path): raise Error(f"Missing default profile path: '{default_profile_path}'") with open(default_profile_path, "rb") as f: # this file has a UTF-8 BOM profile = json.loads(f.read().decode("utf-8-sig")) subscriptions = [sub["id"] for sub in profile["subscriptions"]] with open(default_creds_path) as f: tokens = json.load(f) best_token = None for token in tokens: if best_token is None: best_token = token else: if token["expiresOn"] > best_token["expiresOn"]: best_token = token if best_token is not None: token = copy.copy(best_token) token["subscriptions"] = subscriptions return token raise Error( """Azure credentials not found, please do one of the following: 1) Log in with 'az login', blobfile will use your default credentials to lookup your storage account key 2) Set the environment variable 'AZURE_STORAGE_ACCOUNT_KEY' to your storage account key which you can find by following this guide: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage 3) Create an account with 'az ad sp create-for-rbac --name <name>' and set the 'AZURE_APPLICATION_CREDENTIALS' environment variable to the path of the output from that command or individually set the 'AZURE_CLIENT_ID', 'AZURE_CLIENT_SECRET', and 'AZURE_TENANT_ID' environment variables""" )
def create_access_token_request(scopes: List[str]) -> Request: creds, err = load_credentials() if err is not None: raise Error(err) if "private_key" in creds: # looks like GCS does not support the no-oauth flow https://developers.google.com/identity/protocols/OAuth2ServiceAccount#jwt-auth return _create_token_request(creds["client_email"], creds["private_key"], scopes) elif "refresh_token" in creds: return _refresh_access_token_request( refresh_token=creds["refresh_token"], client_id=creds["client_id"], client_secret=creds["client_secret"], ) else: raise Error("Credentials not recognized")
def split_url(path: str) -> Tuple[str, str, str]: if path.startswith("az://"): return split_url_az(path) elif path.startswith("https://"): return split_url_https(path) else: raise Error(f"Invalid path: '{path}'")
def split_url_az(path: str) -> Tuple[str, str, str]: parts = path[len("az://"):].split("/") if len(parts) < 2: raise Error(f"Invalid path: '{path}'") account = parts[0] container = parts[1] obj = "/".join(parts[2:]) return account, container, obj
def generate_signed_url( bucket: str, name: str, expiration: float, method: str = "GET", params: Optional[Mapping[str, str]] = None, headers: Optional[Mapping[str, str]] = None, ) -> Tuple[str, Optional[float]]: if params is None: p = {} else: p = dict(params).copy() if headers is None: h = {} else: h = dict(headers).copy() # https://cloud.google.com/storage/docs/access-control/signing-urls-manually creds, err = load_credentials() if err is not None: raise Error(err) if "private_key" not in creds: raise Error( "Private key not found in credentials. Please set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to a JSON key for a service account to use this call" ) if expiration > MAX_EXPIRATION: raise Error( f"Expiration can't be longer than {MAX_EXPIRATION} seconds.") escaped_object_name = urllib.parse.quote(name, safe="") canonical_uri = f"/{bucket}/{escaped_object_name}" datetime_now = datetime.datetime.utcnow() request_timestamp = datetime_now.strftime("%Y%m%dT%H%M%SZ") datestamp = datetime_now.strftime("%Y%m%d") credential_scope = f"{datestamp}/auto/storage/goog4_request" credential = f"{creds['client_email']}/{credential_scope}" h["host"] = "storage.googleapis.com" canonical_headers = "" ordered_headers = sorted(h.items()) for k, v in ordered_headers: lower_k = str(k).lower() strip_v = str(v).lower() canonical_headers += f"{lower_k}:{strip_v}\n" signed_headers_parts = [] for k, _ in ordered_headers: lower_k = str(k).lower() signed_headers_parts.append(lower_k) signed_headers = ";".join(signed_headers_parts) p["X-Goog-Algorithm"] = "GOOG4-RSA-SHA256" p["X-Goog-Credential"] = credential p["X-Goog-Date"] = request_timestamp p["X-Goog-Expires"] = str(expiration) p["X-Goog-SignedHeaders"] = signed_headers canonical_query_string_parts = [] ordered_params = sorted(p.items()) for k, v in ordered_params: encoded_k = urllib.parse.quote(str(k), safe="") encoded_v = urllib.parse.quote(str(v), safe="") canonical_query_string_parts.append(f"{encoded_k}={encoded_v}") canonical_query_string = "&".join(canonical_query_string_parts) canonical_request = "\n".join([ method, canonical_uri, canonical_query_string, canonical_headers, signed_headers, "UNSIGNED-PAYLOAD", ]) canonical_request_hash = hashlib.sha256( canonical_request.encode()).hexdigest() string_to_sign = "\n".join([ "GOOG4-RSA-SHA256", request_timestamp, credential_scope, canonical_request_hash, ]) signature = binascii.hexlify( _sign(creds["private_key"], string_to_sign.encode("utf8"))).decode("utf8") host_name = "https://storage.googleapis.com" signed_url = f"{host_name}{canonical_uri}?{canonical_query_string}&X-Goog-Signature={signature}" return signed_url, expiration
def load_credentials() -> Dict[str, Any]: # https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/identity/azure-identity#environment-variables # AZURE_STORAGE_KEY seems to be the environment variable mentioned by the az cli # AZURE_STORAGE_ACCOUNT_KEY is mentioned elsewhere on the internet for varname in ["AZURE_STORAGE_KEY", "AZURE_STORAGE_ACCOUNT_KEY"]: if varname in os.environ: result = dict(storageAccountKey=os.environ[varname]) if "AZURE_STORAGE_ACCOUNT" in os.environ: result["account"] = os.environ["AZURE_STORAGE_ACCOUNT"] return result if "AZURE_APPLICATION_CREDENTIALS" in os.environ: creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"] if not os.path.exists(creds_path): raise Error( f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'" ) with open(creds_path) as f: return json.load(f) if "AZURE_CLIENT_ID" in os.environ: return dict( appId=os.environ["AZURE_CLIENT_ID"], password=os.environ["AZURE_CLIENT_SECRET"], tenant=os.environ["AZURE_TENANT_ID"], ) if "AZURE_STORAGE_CONNECTION_STRING" in os.environ: connection_data = {} # technically this should be parsed according to the rules in https://www.connectionstrings.com/formating-rules-for-connection-strings/ for part in os.environ["AZURE_STORAGE_CONNECTION_STRING"].split(";"): key, _, val = part.partition("=") connection_data[key.lower()] = val return dict( account=connection_data["accountname"], storageAccountKey=connection_data["accountkey"], ) # look for a refresh token in the az command line credentials # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/ default_creds_path = os.path.expanduser("~/.azure/accessTokens.json") if os.path.exists(default_creds_path): default_profile_path = os.path.expanduser("~/.azure/azureProfile.json") if not os.path.exists(default_profile_path): raise Error( f"Missing default profile path: '{default_profile_path}'") with open(default_profile_path, "rb") as f: # this file has a UTF-8 BOM profile = json.loads(f.read().decode("utf-8-sig")) subscriptions = profile["subscriptions"] def key_fn(x: Mapping[str, Any]) -> bool: return x["isDefault"] subscriptions.sort(key=key_fn, reverse=True) subscription_ids = [sub["id"] for sub in subscriptions] with open(default_creds_path) as f: tokens = json.load(f) best_token = None for token in tokens: if best_token is None: best_token = token else: # expiresOn may be missing for tokens from service principals if token.get("expiresOn", "") > best_token.get( "expiresOn", ""): best_token = token if best_token is not None: token = best_token.copy() token["subscription_ids"] = subscription_ids return token return {}
def load_credentials() -> Mapping[str, str]: # https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/identity/azure-identity#environment-variables # AZURE_STORAGE_KEY seems to be the environment variable mentioned by the az cli # AZURE_STORAGE_ACCOUNT_KEY is mentioned elsewhere on the internet for varname in ["AZURE_STORAGE_KEY", "AZURE_STORAGE_ACCOUNT_KEY"]: if varname in os.environ: result = dict(storageAccountKey=os.environ[varname]) if "AZURE_STORAGE_ACCOUNT" in os.environ: result["account"] = os.environ["AZURE_STORAGE_ACCOUNT"] return result if "AZURE_APPLICATION_CREDENTIALS" in os.environ: creds_path = os.environ["AZURE_APPLICATION_CREDENTIALS"] if not os.path.exists(creds_path): raise Error( f"Credentials not found at '{creds_path}' specified by environment variable 'AZURE_APPLICATION_CREDENTIALS'" ) with open(creds_path) as f: return json.load(f) if "AZURE_CLIENT_ID" in os.environ: return dict( appId=os.environ["AZURE_CLIENT_ID"], password=os.environ["AZURE_CLIENT_SECRET"], tenant=os.environ["AZURE_TENANT_ID"], ) if "AZURE_STORAGE_CONNECTION_STRING" in os.environ: connection_data = {} # technically this should be parsed according to the rules in https://www.connectionstrings.com/formating-rules-for-connection-strings/ for part in os.environ["AZURE_STORAGE_CONNECTION_STRING"].split(";"): key, _, val = part.partition("=") connection_data[key.lower()] = val return dict( account=connection_data["accountname"], storageAccountKey=connection_data["accountkey"], ) # look for a refresh token in the az command line credentials # https://mikhail.io/2019/07/how-azure-cli-manages-access-tokens/ default_creds_path = os.path.expanduser("~/.azure/accessTokens.json") if os.path.exists(default_creds_path): default_profile_path = os.path.expanduser("~/.azure/azureProfile.json") if not os.path.exists(default_profile_path): raise Error( f"Missing default profile path: '{default_profile_path}'") with open(default_profile_path, "rb") as f: # this file has a UTF-8 BOM profile = json.loads(f.read().decode("utf-8-sig")) subscriptions = [sub["id"] for sub in profile["subscriptions"]] with open(default_creds_path) as f: tokens = json.load(f) best_token = None for token in tokens: if best_token is None: best_token = token else: if token["expiresOn"] > best_token["expiresOn"]: best_token = token if best_token is not None: token = best_token.copy() token["subscriptions"] = subscriptions return token raise Error("""Azure credentials not found, please do one of the following: 1) Log in with 'az login', blobfile will use your default credentials to lookup your storage account key 2) Set the environment variable 'AZURE_STORAGE_KEY' to your storage account key which you can find by following this guide: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage 3) Create an account with 'az ad sp create-for-rbac --name <name>' and set the 'AZURE_APPLICATION_CREDENTIALS' environment variable to the path of the output from that command or individually set the 'AZURE_CLIENT_ID', 'AZURE_CLIENT_SECRET', and 'AZURE_TENANT_ID' environment variables""" )