def get_query_server_config(name='beeswax', server=None, cluster=None): if cluster and cluster != CLUSTER_ID.get(): cluster_config = Cluster(user=None).get_config(cluster) else: cluster_config = None if name == 'impala': from impala.dbms import get_query_server_config as impala_query_server_config query_server = impala_query_server_config( cluster_config=cluster_config) else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal( HIVE_SERVER_HOST.get()) query_server = { 'server_name': name if not cluster_config else '%s-%s' % (name, cluster_config.get('id')), 'server_host': HIVE_SERVER_HOST.get() if not cluster_config else cluster_config.get('server_host'), 'server_port': HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': HIVE_SERVER_HOST.get(), 'port': hive_site.hiveserver2_thrift_http_port(), 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get() } if name == 'sparksql': # Spark SQL is almost the same as Hive from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get() }) debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool( debug_query_server.pop('auth_password')) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def _get_beeswax_tables(self, database): beeswax_query_server = dbms.get( user=self.client.user, query_server=beeswax_query_server_config( name=Cluster(self.client.user).get_app_config( ).get_hive_metastore_interpreters()[0])) return beeswax_query_server.get_tables(database=database)
def hue(request): apps = appmanager.get_apps_dict(request.user) current_app, other_apps, apps_list = _get_apps(request.user, '') default_cluster_index, default_cluster_interface = Cluster(request.user).get_list_interface_indexes() clusters = get_clusters().values() return render('hue.mako', request, { 'apps': apps, 'other_apps': other_apps, 'is_s3_enabled': is_s3_enabled() and has_s3_access(request.user), 'is_adls_enabled': is_adls_enabled() and has_adls_access(request.user), 'is_ldap_setup': 'desktop.auth.backend.LdapBackend' in desktop.conf.AUTH.BACKEND.get(), 'leaflet': { 'layer': desktop.conf.LEAFLET_TILE_LAYER.get(), 'attribution': desktop.conf.LEAFLET_TILE_LAYER_ATTRIBUTION.get(), 'map_options': json.dumps(desktop.conf.LEAFLET_MAP_OPTIONS.get()), 'layer_options': json.dumps(desktop.conf.LEAFLET_TILE_LAYER_OPTIONS.get()), }, 'is_demo': desktop.conf.DEMO_ENABLED.get(), 'banner_message': get_banner_message(request), 'user_preferences': dict((x.key, x.value) for x in UserPreferences.objects.filter(user=request.user)), 'cluster': clusters[0]['type'] if clusters else None, 'clusters_config_json': json.dumps(clusters), 'default_cluster_index': default_cluster_index, 'default_cluster_interface': default_cluster_interface })
def invalidate(self, database=None, table=None, flush_all=False): handle = None try: if flush_all or database is None: hql = "INVALIDATE METADATA" query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) elif table is None: if not Cluster(self.client.user).get_app_config().get_hive_metastore_interpreters(): raise PopupException(_("Hive and HMS not configured. Please do a full refresh")) diff_tables = self._get_different_tables(database) if len(diff_tables) > 10: raise PopupException(_("Too many tables (%d) to invalidate. Please do a full refresh") % len(diff_tables)) else: for table in diff_tables: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) else: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) except QueryServerTimeoutException as e: # Allow timeout exceptions to propagate raise e except PopupException as e: raise e except Exception as e: msg = 'Failed to invalidate `%s`: %s' % (database or 'databases', e) raise QueryServerException(msg) finally: if handle: self.close(handle)
def get(user, query_server=None): global DBMS_CACHE global DBMS_CACHE_LOCK if query_server is None: cluster_type = Cluster(user).get_type() if cluster_type == ANALYTIC_DB: kwargs = {'name': 'impala'} else: kwargs = {} query_server = get_query_server_config(**kwargs) DBMS_CACHE_LOCK.acquire() try: DBMS_CACHE.setdefault(user.username, {}) if query_server['server_name'] not in DBMS_CACHE[user.username]: # Avoid circular dependency from beeswax.server.hive_server2_lib import HiveServerClientCompatible if query_server['server_name'] == 'impala': from impala.dbms import ImpalaDbms from impala.server import ImpalaServerClient DBMS_CACHE[user.username][query_server['server_name']] = ImpalaDbms(HiveServerClientCompatible(ImpalaServerClient(query_server, user)), QueryHistory.SERVER_TYPE[1][0]) else: from beeswax.server.hive_server2_lib import HiveServerClient DBMS_CACHE[user.username][query_server['server_name']] = HiveServer2Dbms(HiveServerClientCompatible(HiveServerClient(query_server, user)), QueryHistory.SERVER_TYPE[1][0]) return DBMS_CACHE[user.username][query_server['server_name']] finally: DBMS_CACHE_LOCK.release()
def get_api(request, snippet): from notebook.connectors.dataeng import DataEngApi from notebook.connectors.hiveserver2 import HS2Api from notebook.connectors.jdbc import JdbcApi from notebook.connectors.rdbms import RdbmsApi from notebook.connectors.oozie_batch import OozieApi from notebook.connectors.solr import SolrApi from notebook.connectors.spark_shell import SparkApi from notebook.connectors.spark_batch import SparkBatchApi from notebook.connectors.text import TextApi if snippet.get('wasBatchExecuted'): return OozieApi(user=request.user, request=request) interpreter = [interpreter for interpreter in get_ordered_interpreters(request.user) if interpreter['type'] == snippet['type']] if not interpreter: raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] # Multi cluster cluster = Cluster(request.user) if cluster and cluster.get_type() == 'dataeng': interface = 'dataeng' if interface == 'hiveserver2': return HS2Api(user=request.user, request=request) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': return SparkApi(request.user) elif interface == 'livy-batch': return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': return TextApi(request.user) elif interface == 'rdbms': return RdbmsApi(request.user, interpreter=snippet['type']) elif interface == 'dataeng': return DataEngApi(user=request.user, request=request, cluster_name=cluster.get_interface()) elif interface == 'jdbc': return JdbcApi(request.user, interpreter=interpreter) elif interface == 'solr': return SolrApi(request.user, interpreter=interpreter) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException(_('Notebook connector interface not recognized: %s') % interface)
def get_cluster_config(cluster=None): if cluster and cluster.get('id') != CLUSTER_ID.get(): if 'altus:dataware:k8s' in cluster['id']: compute_end_point = cluster['compute_end_point'][0] if type(cluster['compute_end_point']) == list else cluster['compute_end_point'] # TODO getting list from left assist cluster_config = {'server_host': compute_end_point, 'name': cluster['name']} # TODO get port too else: cluster_config = Cluster(user=None).get_config(cluster['id']) # Direct cluster else: cluster_config = None return cluster_config
def apps(self, filters): kwargs = {} if 'time' in filters: if filters['time']['time_unit'] == 'minutes': delta = timedelta(minutes=int(filters['time']['time_value'])) elif filters['time']['time_unit'] == 'hours': delta = timedelta(hours=int(filters['time']['time_value'])) else: delta = timedelta(days=int(filters['time']['time_value'])) kwargs['creation_date_after'] = (datetime.today() - delta).strftime(DATE_FORMAT) # Could also come from filters cluster = Cluster(self.user) if cluster.get_type() == DATAENG: kwargs['cluster_crn'] = cluster.get_id() api = DataEng(self.user) jobs = api.list_jobs(**kwargs) return { 'apps': [{ 'id': app['jobId'], 'name': app['creationDate'], 'status': app['status'], 'apiStatus': self._api_status(app['status']), 'type': app['jobType'], 'user': '', 'progress': 100, 'duration': 10 * 3600, 'submitted': app['creationDate'] } for app in jobs['jobs']], 'total': len(jobs) }
def _default_interpreters(user): interpreters = [] apps = appmanager.get_apps_dict(user) if 'hive' in apps: interpreters.append(('hive', { 'name': 'Hive', 'interface': 'hiveserver2', 'options': {} }),) if 'impala' in apps: interpreters.append(('impala', { 'name': 'Impala', 'interface': 'hiveserver2', 'options': {} }),) if 'pig' in apps: interpreters.append(('pig', { 'name': 'Pig', 'interface': 'oozie', 'options': {} })) if 'oozie' in apps and 'jobsub' in apps: interpreters.extend(( ('java', { 'name': 'Java', 'interface': 'oozie', 'options': {} }), ('spark2', { 'name': 'Spark', 'interface': 'oozie', 'options': {} }), ('mapreduce', { 'name': 'MapReduce', 'interface': 'oozie', 'options': {} }), ('shell', { 'name': 'Shell', 'interface': 'oozie', 'options': {} }), ('sqoop1', { 'name': 'Sqoop 1', 'interface': 'oozie', 'options': {} }), ('distcp', { 'name': 'Distcp', 'interface': 'oozie', 'options': {} }), )) from dashboard.conf import get_properties # Cyclic dependency dashboards = get_properties() if dashboards.get('solr') and dashboards['solr']['analytics']: interpreters.append(('solr', { 'name': 'Solr SQL', 'interface': 'solr', 'options': {} }),) from desktop.models import Cluster # Cyclic dependency cluster = Cluster(user) if cluster and cluster.get_type() == 'dataeng': interpreters.append(('dataeng', { 'name': 'DataEng', 'interface': 'dataeng', 'options': {} })) if 'spark' in apps: interpreters.extend(( ('spark', { 'name': 'Scala', 'interface': 'livy', 'options': {} }), ('pyspark', { 'name': 'PySpark', 'interface': 'livy', 'options': {} }), ('r', { 'name': 'R', 'interface': 'livy', 'options': {} }), ('jar', { 'name': 'Spark Submit Jar', 'interface': 'livy-batch', 'options': {} }), ('py', { 'name': 'Spark Submit Python', 'interface': 'livy-batch', 'options': {} }), ('text', { 'name': 'Text', 'interface': 'text', 'options': {} }), ('markdown', { 'name': 'Markdown', 'interface': 'text', 'options': {} }) )) INTERPRETERS.set_for_testing(OrderedDict(interpreters))