def hdfs_connect(host='localhost', port=50070, protocol='webhdfs', auth_mechanism='NOSASL', verify=True, **kwds): """ Connect to HDFS Parameters ---------- host : string, Host name of the HDFS NameNode port : int, NameNode's WebHDFS port (default 50070) protocol : {'webhdfs'} auth_mechanism : string, Set to NOSASL or PLAIN for non-secure clusters. Set to GSSAPI or LDAP for Kerberos-secured clusters. verify : boolean, Set to False to turn off verifying SSL certificates. (default True) Other keywords are forwarded to hdfs library classes Returns ------- client : WebHDFS """ import requests session = kwds.setdefault('session', requests.Session()) session.verify = verify if auth_mechanism in ['GSSAPI', 'LDAP']: try: import requests_kerberos except ImportError: raise IbisError( "Unable to import requests-kerberos, which is required for " "Kerberos HDFS support. Install it by executing `pip install " "requests-kerberos` or `pip install hdfs[kerberos]`.") from hdfs.ext.kerberos import KerberosClient url = 'https://{0}:{1}'.format(host, port) # note SSL kwds.setdefault('mutual_auth', 'OPTIONAL') hdfs_client = KerberosClient(url, **kwds) else: from hdfs.client import InsecureClient url = 'http://{0}:{1}'.format(host, port) hdfs_client = InsecureClient(url, **kwds) return WebHDFS(hdfs_client)
def hdfs_connect(host='localhost', port=50070, protocol='webhdfs', use_kerberos=False, verify=True, **kwds): """ Connect to HDFS Parameters ---------- host : string port : int, default 50070 (webhdfs default) protocol : {'webhdfs'} use_kerberos : boolean, default False verify : boolean, default False Set to False to turn off verifying SSL certificates Other keywords are forwarded to hdfs library classes Returns ------- client : ibis HDFS client """ if use_kerberos: try: import requests_kerberos except ImportError: raise IbisError( "Unable to import requests-kerberos, which is required for " "Kerberos HDFS support. Install it by executing `pip install " "requests-kerberos` or `pip install hdfs[kerberos]`.") from hdfs.ext.kerberos import KerberosClient url = 'https://{0}:{1}'.format(host, port) # note SSL hdfs_client = KerberosClient(url, mutual_auth='OPTIONAL', verify=verify, **kwds) else: from hdfs.client import InsecureClient url = 'http://{0}:{1}'.format(host, port) hdfs_client = InsecureClient(url, verify=verify, **kwds) return WebHDFS(hdfs_client)
def hdfs_connect(host='localhost', port=50070, protocol='webhdfs', use_https='default', auth_mechanism='NOSASL', verify=True, session=None, **kwds): """Connect to HDFS. Parameters ---------- host : str Host name of the HDFS NameNode port : int NameNode's WebHDFS port protocol : str, The protocol used to communicate with HDFS. The only valid value is ``'webhdfs'``. use_https : bool Connect to WebHDFS with HTTPS, otherwise plain HTTP. For secure authentication, the default for this is True, otherwise False. auth_mechanism : str Set to NOSASL or PLAIN for non-secure clusters. Set to GSSAPI or LDAP for Kerberos-secured clusters. verify : bool Set to :data:`False` to turn off verifying SSL certificates. session : Optional[requests.Session] A custom :class:`requests.Session` object. Notes ----- Other keywords are forwarded to HDFS library classes. Returns ------- WebHDFS """ import requests if session is None: session = requests.Session() session.verify = verify if auth_mechanism in ('GSSAPI', 'LDAP'): if use_https == 'default': prefix = 'https' else: prefix = 'https' if use_https else 'http' try: import requests_kerberos # noqa: F401 except ImportError: raise IbisError( "Unable to import requests-kerberos, which is required for " "Kerberos HDFS support. Install it by executing `pip install " "requests-kerberos` or `pip install hdfs[kerberos]`.") from hdfs.ext.kerberos import KerberosClient # note SSL url = '{0}://{1}:{2}'.format(prefix, host, port) kwds.setdefault('mutual_auth', 'OPTIONAL') hdfs_client = KerberosClient(url, session=session, **kwds) else: if use_https == 'default': prefix = 'http' else: prefix = 'https' if use_https else 'http' from hdfs.client import InsecureClient url = '{}://{}:{}'.format(prefix, host, port) hdfs_client = InsecureClient(url, session=session, **kwds) return WebHDFS(hdfs_client)
def setUpClass(cls): cls.ENV = ENV cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid()) cls.hdfs_client = InsecureClient(cls.ENV.hdfs_url) cls.hdfs = WebHDFS(cls.hdfs_client) cls.hdfs.mkdir(cls.tmp_dir)