Esempio n. 1
0
'''
Copyright (C) 2015 Dato, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
'''
from graphlab.connect.server import _get_hadoop_class_path

if __name__ == '__main__':
    classpath = _get_hadoop_class_path()

    with open('classpath.sh', 'w') as f:
        f.write('export CLASSPATH=%s\n' % classpath)
Esempio n. 2
0
def make_internal_url(url):
    """
    Takes a user input url string and translates into url relative to the server process.
    - URL to a local location begins with "local://" or has no "*://" modifier.
      If the server is local, returns the absolute path of the url.
      For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo").
      If the server is not local, raise NotImplementedError.
    - URL to a server location begins with "remote://".
      Returns the absolute path after the "remote://" modifier.
      For example: "remote:///tmp/foo" -> "/tmp/foo".
    - URL to a s3 location begins with "s3://":
      Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential().
      For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo".
    - URL to other remote locations, e.g. http://, will remain as is.
    - Expands ~ to $HOME

    Parameters
    ----------
    string
        A URL (as described above).

    Raises
    ------
    ValueError
        If a bad url is provided.
    """
    if not url:
        raise ValueError('Invalid url: %s' % url)

    # The final file path on server.
    path_on_server = None

    # Try to split the url into (protocol, path).
    urlsplit = url.split("://")
    if len(urlsplit) == 2:
        protocol, path = urlsplit
        if not path:
            raise ValueError('Invalid url: %s' % url)
        if protocol in ['http', 'https']:
            # protocol is a remote url not on server, just return
            return url
        elif protocol == 'hdfs':
            if isinstance(_glconnect.get_server(), _server.LocalServer) and not _server._get_hadoop_class_path():
                raise ValueError("HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again.")
            else:
                return url
        elif protocol == 's3':
            if len(path.split(":")) == 3:
            # s3 url already contains secret key/id pairs, just return
                return url
            else:
            # s3 url does not contain secret key/id pair, query the environment variables
                (k, v) = _get_aws_credentials()
                return 's3://' + k + ':' + v + ':' + path
        elif protocol == 'remote':
        # url for files on the server
            path_on_server = path
        elif protocol == 'local':
        # url for files on local client, check if we are connecting to local server
            if (isinstance(_glconnect.get_server(), _server.LocalServer)):
                path_on_server = path
            else:
                raise ValueError('Cannot use local URL when connecting to a remote server.')
        else:
            raise ValueError('Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol)
    elif len(urlsplit) == 1:
        # expand ~ to $HOME
        url = _os.path.expanduser(url)
        # url for files on local client, check if we are connecting to local server
        if (isinstance(_glconnect.get_server(), _server.LocalServer)):
            path_on_server = url
        else:
            raise ValueError('Cannot use local URL when connecting to a remote server.')
    else:
        raise ValueError('Invalid url: %s' % url)

    if path_on_server:
        return _os.path.abspath(_os.path.expanduser(path_on_server))
    else:
        raise ValueError('Invalid url: %s' % url)