Esempio n. 1
0
def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
                 auth_mechanism='NOSASL', verify=True, **kwds):
    """
    Connect to HDFS

    Parameters
    ----------
    host : string, Host name of the HDFS NameNode
    port : int, NameNode's WebHDFS port (default 50070)
    protocol : {'webhdfs'}
    auth_mechanism : string, Set to NOSASL or PLAIN for non-secure clusters.
        Set to GSSAPI or LDAP for Kerberos-secured clusters.
    verify : boolean, Set to False to turn off verifying SSL certificates.
        (default True)

    Other keywords are forwarded to hdfs library classes

    Returns
    -------
    client : WebHDFS
    """
    import requests
    session = kwds.setdefault('session', requests.Session())
    session.verify = verify
    if auth_mechanism in ['GSSAPI', 'LDAP']:
        try:
            import requests_kerberos
        except ImportError:
            raise IbisError(
                "Unable to import requests-kerberos, which is required for "
                "Kerberos HDFS support. Install it by executing `pip install "
                "requests-kerberos` or `pip install hdfs[kerberos]`.")
        from hdfs.ext.kerberos import KerberosClient
        url = 'https://{0}:{1}'.format(host, port) # note SSL
        kwds.setdefault('mutual_auth', 'OPTIONAL')
        hdfs_client = KerberosClient(url, **kwds)
    else:
        from hdfs.client import InsecureClient
        url = 'http://{0}:{1}'.format(host, port)
        hdfs_client = InsecureClient(url, **kwds)
    return WebHDFS(hdfs_client)
Esempio n. 2
0
def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
                 use_kerberos=False, verify=True, **kwds):
    """
    Connect to HDFS

    Parameters
    ----------
    host : string
    port : int, default 50070 (webhdfs default)
    protocol : {'webhdfs'}
    use_kerberos : boolean, default False
    verify : boolean, default False
        Set to False to turn off verifying SSL certificates

    Other keywords are forwarded to hdfs library classes

    Returns
    -------
    client : ibis HDFS client
    """
    if use_kerberos:
        try:
            import requests_kerberos
        except ImportError:
            raise IbisError(
                "Unable to import requests-kerberos, which is required for "
                "Kerberos HDFS support. Install it by executing `pip install "
                "requests-kerberos` or `pip install hdfs[kerberos]`.")
        from hdfs.ext.kerberos import KerberosClient
        url = 'https://{0}:{1}'.format(host, port) # note SSL
        hdfs_client = KerberosClient(url, mutual_auth='OPTIONAL',
                                     verify=verify, **kwds)
    else:
        from hdfs.client import InsecureClient
        url = 'http://{0}:{1}'.format(host, port)
        hdfs_client = InsecureClient(url, verify=verify, **kwds)
    return WebHDFS(hdfs_client)
Esempio n. 3
0
def hdfs_connect(host='localhost',
                 port=50070,
                 protocol='webhdfs',
                 use_https='default',
                 auth_mechanism='NOSASL',
                 verify=True,
                 session=None,
                 **kwds):
    """Connect to HDFS.

    Parameters
    ----------
    host : str
        Host name of the HDFS NameNode
    port : int
        NameNode's WebHDFS port
    protocol : str,
        The protocol used to communicate with HDFS. The only valid value is
        ``'webhdfs'``.
    use_https : bool
        Connect to WebHDFS with HTTPS, otherwise plain HTTP. For secure
        authentication, the default for this is True, otherwise False.
    auth_mechanism : str
        Set to NOSASL or PLAIN for non-secure clusters.
        Set to GSSAPI or LDAP for Kerberos-secured clusters.
    verify : bool
        Set to :data:`False` to turn off verifying SSL certificates.
    session : Optional[requests.Session]
        A custom :class:`requests.Session` object.

    Notes
    -----
    Other keywords are forwarded to HDFS library classes.

    Returns
    -------
    WebHDFS

    """
    import requests

    if session is None:
        session = requests.Session()
    session.verify = verify
    if auth_mechanism in ('GSSAPI', 'LDAP'):
        if use_https == 'default':
            prefix = 'https'
        else:
            prefix = 'https' if use_https else 'http'
        try:
            import requests_kerberos  # noqa: F401
        except ImportError:
            raise IbisError(
                "Unable to import requests-kerberos, which is required for "
                "Kerberos HDFS support. Install it by executing `pip install "
                "requests-kerberos` or `pip install hdfs[kerberos]`.")
        from hdfs.ext.kerberos import KerberosClient

        # note SSL
        url = '{0}://{1}:{2}'.format(prefix, host, port)
        kwds.setdefault('mutual_auth', 'OPTIONAL')
        hdfs_client = KerberosClient(url, session=session, **kwds)
    else:
        if use_https == 'default':
            prefix = 'http'
        else:
            prefix = 'https' if use_https else 'http'
        from hdfs.client import InsecureClient

        url = '{}://{}:{}'.format(prefix, host, port)
        hdfs_client = InsecureClient(url, session=session, **kwds)
    return WebHDFS(hdfs_client)
Esempio n. 4
0
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     cls.hdfs_client = InsecureClient(cls.ENV.hdfs_url)
     cls.hdfs = WebHDFS(cls.hdfs_client)
     cls.hdfs.mkdir(cls.tmp_dir)