Example #1
0
def _try_inject_s3_credentials(url):
    """
    Inject aws credentials into s3 url as s3://[aws_id]:[aws_key]:[bucket/][objectkey]

    If s3 url already contains secret key/id pairs, just return as is.
    """
    assert url.startswith('s3://')
    path = url[5:]
    # Check if the path already contains credentials
    tokens = path.split(':')
    # If there are two ':', its possible that we have already injected credentials
    if len(tokens) == 3:
        # Edge case: there are exactly two ':'s in the object key which is a false alarm.
        # We prevent this by checking that '/' is not in the assumed key and id.
        if ('/' not in tokens[0]) and ('/' not in tokens[1]):
            return url

    # S3 url does not contain secret key/id pair, query the environment variables
    (k, v) = _get_aws_credentials()
    return 's3://' + k + ':' + v + ':' + path
Example #2
0
def _try_inject_s3_credentials(url):
    """
    Inject aws credentials into s3 url as s3://[aws_id]:[aws_key]:[bucket/][objectkey]

    If s3 url already contains secret key/id pairs, just return as is.
    """
    assert url.startswith('s3://')
    path = url[5:]
    # Check if the path already contains credentials
    tokens = path.split(':')
    # If there are two ':', its possible that we have already injected credentials
    if len(tokens) == 3:
        # Edge case: there are exactly two ':'s in the object key which is a false alarm.
        # We prevent this by checking that '/' is not in the assumed key and id.
        if ('/' not in tokens[0]) and ('/' not in tokens[1]):
            return url

    # S3 url does not contain secret key/id pair, query the environment variables
    (k, v) = _get_aws_credentials()
    return 's3://' + k + ':' + v + ':' + path
def make_internal_url(url):
    """
    Takes a user input url string and translates into url relative to the server process.
    - URL to a local location begins with "local://" or has no "*://" modifier.
      If the server is local, returns the absolute path of the url.
      For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo").
      If the server is not local, raise NotImplementedError.
    - URL to a server location begins with "remote://".
      Returns the absolute path after the "remote://" modifier.
      For example: "remote:///tmp/foo" -> "/tmp/foo".
    - URL to a s3 location begins with "s3://":
      Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential().
      For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo".
    - URL to other remote locations, e.g. http://, will remain as is.
    - Expands ~ to $HOME

    Parameters
    ----------
    string
        A URL (as described above).

    Raises
    ------
    ValueError
        If a bad url is provided.
    """
    if not url:
        raise ValueError('Invalid url: %s' % url)

    # The final file path on server.
    path_on_server = None

    # Try to split the url into (protocol, path).
    urlsplit = url.split("://")
    if len(urlsplit) == 2:
        protocol, path = urlsplit
        if not path:
            raise ValueError('Invalid url: %s' % url)
        if protocol in ['http', 'https']:
            # protocol is a remote url not on server, just return
            return url
        elif protocol == 'hdfs':
            if isinstance(_glconnect.get_server(), _server.LocalServer) and not _server._get_hadoop_class_path():
                raise ValueError("HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again.")
            else:
                return url
        elif protocol == 's3':
            if len(path.split(":")) == 3:
            # s3 url already contains secret key/id pairs, just return
                return url
            else:
            # s3 url does not contain secret key/id pair, query the environment variables
                (k, v) = _get_aws_credentials()
                return 's3://' + k + ':' + v + ':' + path
        elif protocol == 'remote':
        # url for files on the server
            path_on_server = path
        elif protocol == 'local':
        # url for files on local client, check if we are connecting to local server
            if (isinstance(_glconnect.get_server(), _server.LocalServer)):
                path_on_server = path
            else:
                raise ValueError('Cannot use local URL when connecting to a remote server.')
        else:
            raise ValueError('Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol)
    elif len(urlsplit) == 1:
        # expand ~ to $HOME
        url = _os.path.expanduser(url)
        # url for files on local client, check if we are connecting to local server
        if (isinstance(_glconnect.get_server(), _server.LocalServer)):
            path_on_server = url
        else:
            raise ValueError('Cannot use local URL when connecting to a remote server.')
    else:
        raise ValueError('Invalid url: %s' % url)

    if path_on_server:
        return _os.path.abspath(_os.path.expanduser(path_on_server))
    else:
        raise ValueError('Invalid url: %s' % url)