def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get(), 'querycache_rows': QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': QUERY_TIMEOUT_S.get(), } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal } LOG.debug("Query Server: %s" % query_server) return query_server
def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get(), 'querycache_rows': QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': QUERY_TIMEOUT_S.get(), } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal( HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal } LOG.debug("Query Server: %s" % query_server) return query_server
def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get(), 'querycache_rows': QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': QUERY_TIMEOUT_S.get(), } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': HIVE_SERVER_HOST.get(), 'port': hive_site.hiveserver2_thrift_http_port(), 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', } LOG.debug("Query Server: %s" % query_server) return query_server
def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S, AUTH_USERNAME as IMPALA_AUTH_USERNAME, AUTH_PASSWORD as IMPALA_AUTH_PASSWORD query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get(), 'querycache_rows': QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': QUERY_TIMEOUT_S.get(), 'auth_username': IMPALA_AUTH_USERNAME.get(), 'auth_password': IMPALA_AUTH_PASSWORD.get() } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal( HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': HIVE_SERVER_HOST.get(), 'port': hive_site.hiveserver2_thrift_http_port(), 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get() } if name == 'sparksql': # Spark SQL is almost the same as Hive from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get() }) debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool( debug_query_server.pop('auth_password')) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def get_query_server_config(name="beeswax", server=None): if name == "impala": from impala.conf import ( SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S, AUTH_USERNAME as IMPALA_AUTH_USERNAME, AUTH_PASSWORD as IMPALA_AUTH_PASSWORD, ) query_server = { "server_name": "impala", "server_host": IMPALA_SERVER_HOST.get(), "server_port": IMPALA_SERVER_PORT.get(), "principal": IMPALA_PRINCIPAL.get(), "impersonation_enabled": IMPERSONATION_ENABLED.get(), "querycache_rows": QUERYCACHE_ROWS.get(), "QUERY_TIMEOUT_S": QUERY_TIMEOUT_S.get(), "auth_username": IMPALA_AUTH_USERNAME.get(), "auth_password": IMPALA_AUTH_PASSWORD.get(), } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { "server_name": "beeswax", # Aka HiveServer2 now "server_host": HIVE_SERVER_HOST.get(), "server_port": HIVE_SERVER_PORT.get(), "principal": kerberos_principal, "http_url": "%(protocol)s://%(host)s:%(port)s/%(end_point)s" % { "protocol": "https" if hiveserver2_use_ssl() else "http", "host": HIVE_SERVER_HOST.get(), "port": hive_site.hiveserver2_thrift_http_port(), "end_point": hive_site.hiveserver2_thrift_http_path(), }, "transport_mode": "http" if hive_site.hiveserver2_transport_mode() == "HTTP" else "socket", "auth_username": AUTH_USERNAME.get(), "auth_password": AUTH_PASSWORD.get(), } if name == "sparksql": # Spark SQL is almost the same as Hive from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT query_server.update( {"server_name": "sparksql", "server_host": SPARK_SERVER_HOST.get(), "server_port": SPARK_SERVER_PORT.get()} ) debug_query_server = query_server.copy() debug_query_server["auth_password_used"] = bool(debug_query_server.pop("auth_password")) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S, AUTH_USERNAME as IMPALA_AUTH_USERNAME, AUTH_PASSWORD as IMPALA_AUTH_PASSWORD, \ SESSION_TIMEOUT_S query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get(), 'querycache_rows': QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': QUERY_TIMEOUT_S.get(), 'SESSION_TIMEOUT_S': SESSION_TIMEOUT_S.get(), 'auth_username': IMPALA_AUTH_USERNAME.get(), 'auth_password': IMPALA_AUTH_PASSWORD.get() } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': HIVE_SERVER_HOST.get(), 'port': hive_site.hiveserver2_thrift_http_port(), 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get() } if name == 'sparksql': # Spark SQL is almost the same as Hive from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get() }) debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password')) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def load(self, django_user): """ Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS). """ LOAD_HQL = \ """ LOAD DATA INPATH '%(filename)s' OVERWRITE INTO TABLE %(tablename)s """ fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) hdfs_destination = os.path.join(hdfs_root_destination, self.name) LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination)) fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination) LOG.info('Loading data into table "%s"' % (self.name, )) hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination} query = hql_query(hql) try: results = dbms.get(django_user, self.query_server).execute_and_wait(query) if not results: msg = _( 'Error loading table %(table)s: Operation timeout.') % { 'table': self.name } LOG.error(msg) raise InstallException(msg) except QueryServerException, ex: msg = _('Error loading table %(table)s: %(error)s.') % { 'table': self.name, 'error': ex } LOG.error(msg) raise InstallException(msg)
def get_query_server_config(name='beeswax', server=None): if name == 'impala': from impala.conf import SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, \ IMPALA_PRINCIPAL, IMPERSONATION_ENABLED query_server = { 'server_name': 'impala', 'server_host': IMPALA_SERVER_HOST.get(), 'server_port': IMPALA_SERVER_PORT.get(), 'principal': IMPALA_PRINCIPAL.get(), 'impersonation_enabled': IMPERSONATION_ENABLED.get() } elif name == 'rdbms': from rdbms.conf import RDBMS if not server or server not in RDBMS: keys = RDBMS.keys() name = keys and keys[0] or None else: name = server if name: query_server = { 'server_name': RDBMS[name].ENGINE.get().split('.')[-1], 'server_host': RDBMS[name].HOST.get(), 'server_port': RDBMS[name].PORT.get(), 'username': RDBMS[name].USER.get(), 'password': RDBMS[name].PASSWORD.get(), 'password': RDBMS[name].PASSWORD.get(), 'alias': name } else: query_server = {} else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', # Aka HiveServer2 now 'server_host': HIVE_SERVER_HOST.get(), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal } LOG.debug("Query Server: %s" % query_server) return query_server
def _get_hdfs_root_destination(self, django_user, subdir=None): fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' if subdir: tmp_public += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) if subdir: hdfs_root_destination += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777') return hdfs_root_destination
def get_query_server_config(name="beeswax", server=None): if name == "impala": from impala.conf import ( SERVER_HOST as IMPALA_SERVER_HOST, SERVER_PORT as IMPALA_SERVER_PORT, IMPALA_PRINCIPAL, IMPERSONATION_ENABLED, QUERYCACHE_ROWS, QUERY_TIMEOUT_S, ) query_server = { "server_name": "impala", "server_host": IMPALA_SERVER_HOST.get(), "server_port": IMPALA_SERVER_PORT.get(), "principal": IMPALA_PRINCIPAL.get(), "impersonation_enabled": IMPERSONATION_ENABLED.get(), "querycache_rows": QUERYCACHE_ROWS.get(), "QUERY_TIMEOUT_S": QUERY_TIMEOUT_S.get(), } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { "server_name": "beeswax", # Aka HiveServer2 now "server_host": HIVE_SERVER_HOST.get(), "server_port": HIVE_SERVER_PORT.get(), "principal": kerberos_principal, "http_url": "%(protocol)s://%(host)s:%(port)s/%(end_point)s" % { "protocol": "https" if hiveserver2_use_ssl() else "http", "host": HIVE_SERVER_HOST.get(), "port": hive_site.hiveserver2_thrift_http_port(), "end_point": hive_site.hiveserver2_thrift_http_path(), }, "transport_mode": "http" if hive_site.hiveserver2_transport_mode() == "HTTP" else "socket", } LOG.debug("Query Server: %s" % query_server) return query_server
def load(self, django_user): """ Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS). """ LOAD_HQL = \ """ LOAD DATA INPATH '%(filename)s' OVERWRITE INTO TABLE %(tablename)s """ fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) hdfs_destination = os.path.join(hdfs_root_destination, self.name) LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination)) fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination) LOG.info('Loading data into table "%s"' % (self.name,)) hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination} query = hql_query(hql) try: results = dbms.get(django_user, self.query_server).execute_and_wait(query) if not results: msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name} LOG.error(msg) raise InstallException(msg) except QueryServerException, ex: msg = _('Error loading table %(table)s: %(error)s.') % {'table': self.name, 'error': ex} LOG.error(msg) raise InstallException(msg)
def _get_hdfs_root_destination(self, django_user, subdir=None): fs = cluster.get_hdfs() hdfs_root_destination = None can_impersonate_hdfs = False if self.app_name == 'impala': # Impala can support impersonation, so use home instead of a public destination for the upload from impala.conf import IMPERSONATION_ENABLED can_impersonate_hdfs = IMPERSONATION_ENABLED.get() if can_impersonate_hdfs: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) if subdir: hdfs_root_destination += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777') else: tmp_public = '/tmp/public_hue_examples' if subdir: tmp_public += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public return hdfs_root_destination