def load(data, udf, data_dir, overwrite): """Load Ibis test data and build/upload UDFs""" con = make_ibis_client(ENV) # validate our environment before performing possibly expensive operations if not can_write_to_hdfs(con): raise IbisError('Failed to write to HDFS; check your settings') if udf and not can_build_udfs(): raise IbisError('Build environment does not support building UDFs') # load the data files if data: load_impala_data(con, str(data_dir), overwrite) else: logger.info('Skipping Ibis test data load (--no-data)') # build and upload the UDFs if udf: already_loaded = is_udf_loaded(con) logger.info('Attempting to build and load test UDFs') if already_loaded and not overwrite: logger.info('UDFs already loaded and not overwriting; moving on') else: if already_loaded: logger.info('UDFs already loaded; attempting to overwrite') logger.info('Building UDFs') build_udfs() logger.info('Uploading UDFs') upload_udfs(con) else: logger.info('Skipping UDF build/load (--no-udf)')
def _find_backend(self) -> BaseBackend: backends = self._find_backends() if not backends: default = config.options.default_backend if default is None: raise IbisError( 'Expression depends on no backends, and found no default') return default if len(backends) > 1: raise ValueError('Multiple backends found') return backends[0]
def hdfs_connect(host='localhost', port=50070, protocol='webhdfs', use_https='default', auth_mechanism='NOSASL', verify=True, session=None, **kwds): """Connect to HDFS. Parameters ---------- host : str Host name of the HDFS NameNode port : int NameNode's WebHDFS port protocol : str, The protocol used to communicate with HDFS. The only valid value is ``'webhdfs'``. use_https : bool Connect to WebHDFS with HTTPS, otherwise plain HTTP. For secure authentication, the default for this is True, otherwise False. auth_mechanism : str Set to NOSASL or PLAIN for non-secure clusters. Set to GSSAPI or LDAP for Kerberos-secured clusters. verify : bool Set to :data:`False` to turn off verifying SSL certificates. session : Optional[requests.Session] A custom :class:`requests.Session` object. Notes ----- Other keywords are forwarded to HDFS library classes. Returns ------- WebHDFS """ import requests if session is None: session = requests.Session() session.verify = verify if auth_mechanism in ('GSSAPI', 'LDAP'): if use_https == 'default': prefix = 'https' else: prefix = 'https' if use_https else 'http' try: import requests_kerberos # noqa: F401 except ImportError: raise IbisError( "Unable to import requests-kerberos, which is required for " "Kerberos HDFS support. Install it by executing `pip install " "requests-kerberos` or `pip install hdfs[kerberos]`.") from hdfs.ext.kerberos import KerberosClient # note SSL url = '{0}://{1}:{2}'.format(prefix, host, port) kwds.setdefault('mutual_auth', 'OPTIONAL') hdfs_client = KerberosClient(url, session=session, **kwds) else: if use_https == 'default': prefix = 'http' else: prefix = 'https' if use_https else 'http' from hdfs.client import InsecureClient url = '{}://{}:{}'.format(prefix, host, port) hdfs_client = InsecureClient(url, session=session, **kwds) return WebHDFS(hdfs_client)
def _check_connected(self): if not self.is_connected: raise IbisError('Please first connect to a Kudu cluster ' 'with client.kudu.connect')