def invalidate(self, database=None, table=None, flush_all=False): handle = None try: if flush_all or database is None: hql = "INVALIDATE METADATA" query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) elif table is None: if not Cluster(self.client.user).get_app_config().get_hive_metastore_interpreters(): raise PopupException(_("Hive and HMS not configured. Please do a full refresh")) diff_tables = self._get_different_tables(database) if len(diff_tables) > 10: raise PopupException(_("Too many tables (%d) to invalidate. Please do a full refresh") % len(diff_tables)) else: for table in diff_tables: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) else: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) except QueryServerTimeoutException as e: # Allow timeout exceptions to propagate raise e except PopupException as e: raise e except Exception as e: msg = 'Failed to invalidate `%s`: %s' % (database or 'databases', e) raise QueryServerException(msg) finally: if handle: self.close(handle)
def call(self, fn, req, status=TStatusCode.SUCCESS_STATUS): session = Session.objects.get_session(self.user, self.query_server['server_name']) if session is None: session = self.open_session(self.user) if hasattr(req, 'sessionHandle') and req.sessionHandle is None: req.sessionHandle = session.get_handle() res = fn(req) # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS if res.status.statusCode == TStatusCode.ERROR_STATUS and \ re.search('Invalid SessionHandle|Invalid session|Client session expired', res.status.errorMessage or '', re.I): LOG.info('Retrying with a new session because for %s of %s' % (self.user, res)) session = self.open_session(self.user) req.sessionHandle = session.get_handle() # Get back the name of the function to call res = getattr(self._client, fn.attr)(req) if status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) else: return res
class ImpalaDbms(HiveServer2Dbms): @classmethod def get_nested_select(cls, database, table, column, nested=None): """ Given a column or nested type, return the corresponding SELECT and FROM clauses in Impala's nested-type syntax """ select_tokens = [column] from_tokens = [database, table] if nested: nested_tokens = nested.strip('/').split('/') while nested_tokens: token = nested_tokens.pop(0) if token not in ['key', 'value', 'item']: select_tokens.append(token) else: # if we encounter a reserved keyword, move current select_tokens to from_tokens and reset the select_tokens from_tokens.extend(select_tokens) select_tokens = [] # if reserved keyword is the last token, make it the only select_token, otherwise we ignore and continue if not nested_tokens: select_tokens = [token] select_clause = '.'.join(select_tokens) from_clause = '.'.join('`%s`' % token.strip('`') for token in from_tokens) return select_clause, from_clause @classmethod def get_histogram_query(cls, database, table, column, nested=None): select_clause, from_clause = cls.get_nested_select( database, table, column, nested) return 'SELECT histogram(%s) FROM %s' % (select_clause, from_clause) def invalidate(self, database=None, table=None, flush_all=False): handle = None try: if flush_all or database is None: hql = "INVALIDATE METADATA" query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) elif table is None: diff_tables = self._get_different_tables(database) for table in diff_tables: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) else: hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table) query = hql_query(hql, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) except QueryServerTimeoutException, e: # Allow timeout exceptions to propagate raise e except Exception, e: msg = 'Failed to invalidate `%s`: %s' % (database or 'databases', e) raise QueryServerException(msg)
def __init__(self, table_results, table_schema, desc_results, desc_schema): if beeswax_conf.THRIFT_VERSION.get() >= 7: if not table_results.columns: raise QueryServerException('No table columns') self.table = table_results.columns else: # Deprecated. To remove in Hue 4. if not table_results.rows: raise QueryServerException('No table rows') self.table = table_results.rows and table_results.rows[0] or '' self.table_schema = table_schema self.desc_results = desc_results self.desc_schema = desc_schema self.is_impala_only = False # Aka Kudu self.describe = HiveServerTTableSchema(self.desc_results, self.desc_schema).cols() self._details = None
def refresh_table(self, database, table): handle = None try: hql = "REFRESH `%s`.`%s`" % (database, table) query = hql_query(hql, database, query_type=QUERY_TYPES[1]) handle = self.execute_and_wait(query, timeout_sec=10.0) except Exception, e: msg = 'Failed to refresh `%s`.`%s`' % (database, table) raise QueryServerException(msg)
def open_session(self, user): self.user = user kwargs = { 'client_protocol': beeswax_conf.THRIFT_VERSION.get() - 1, 'username': user.username, # If SASL or LDAP, it gets the username from the authentication mechanism" since it dependents on it. 'configuration': {}, } if self.impersonation_enabled: kwargs.update({'username': DEFAULT_USER}) if self.query_server['server_name'] == 'impala': # Only when Impala accepts it kwargs['configuration'].update({'impala.doas.user': user.username}) if self.query_server['server_name'] == 'beeswax': # All the time kwargs['configuration'].update({'hive.server2.proxy.user': user.username}) if self.query_server['server_name'] == 'sparksql': # All the time kwargs['configuration'].update({'hive.server2.proxy.user': user.username}) if self.query_server['server_name'] == 'impala' and self.query_server['SESSION_TIMEOUT_S'] > 0: kwargs['configuration'].update({'idle_session_timeout': str(self.query_server['SESSION_TIMEOUT_S'])}) LOG.info('Opening %s thrift session for user %s' % (self.query_server['server_name'], user.username)) req = TOpenSessionReq(**kwargs) res = self._client.OpenSession(req) self.coordinator_host = self._client.get_coordinator_host() if res.status is not None and res.status.statusCode not in (TStatusCode.SUCCESS_STATUS,): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) sessionId = res.sessionHandle.sessionId LOG.info('Session %s opened' % repr(sessionId.guid)) encoded_status, encoded_guid = HiveServerQueryHandle(secret=sessionId.secret, guid=sessionId.guid).get() properties = json.dumps(res.configuration) session = Session.objects.create(owner=user, application=self.query_server['server_name'], status_code=res.status.statusCode, secret=encoded_status, guid=encoded_guid, server_protocol_version=res.serverProtocolVersion, properties=properties) # HS2 does not return properties in TOpenSessionResp if not session.get_properties(): session.properties = json.dumps(self.get_configuration()) session.save() return session
def open_session(self, user): kwargs = { 'client_protocol': 4, ##TODO: support latest column protocol 'username': user. username, # If SASL, it gets the username from the authentication mechanism" since it dependents on it. 'configuration': {}, } if self.impersonation_enabled: kwargs.update({'username': '******'}) if self.query_server[ 'server_name'] == 'impala': # Only when Impala accepts it kwargs['configuration'].update( {'impala.doas.user': user.username}) if self.query_server['server_name'] == 'beeswax': # All the time kwargs['configuration'].update( {'hive.server2.proxy.user': user.username}) if LDAP_PASSWORD.get( ): # HiveServer2 supports pass-through LDAP authentication. kwargs['username'] = LDAP_USERNAME.get() kwargs['password'] = LDAP_PASSWORD.get() req = TOpenSessionReq(**kwargs) res = self._client.OpenSession(req) if res.status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, ): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception( 'Bad status for request %s:\n%s' % (req, res)), message=message) sessionId = res.sessionHandle.sessionId LOG.info('Opening session %s' % sessionId) encoded_status, encoded_guid = HiveServerQueryHandle( secret=sessionId.secret, guid=sessionId.guid).get() return Session.objects.create( owner=user, application=self.query_server['server_name'], status_code=res.status.statusCode, secret=encoded_status, guid=encoded_guid, server_protocol_version=res.serverProtocolVersion)
def call_return_result_and_session(self, fn, req, status=TStatusCode.SUCCESS_STATUS, with_multiple_session=False): n_sessions = conf.MAX_NUMBER_OF_SESSIONS.get() # When a single session is allowed, avoid multiple session logic with_multiple_session = n_sessions > 1 session = None if not with_multiple_session: # Default behaviour: get one session session = Session.objects.get_session(self.user, self.query_server['server_name']) else: session = self._get_tez_session(n_sessions) if session is None: session = self.open_session(self.user) if hasattr(req, 'sessionHandle') and req.sessionHandle is None: req.sessionHandle = session.get_handle() res = fn(req) # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS if res.status.statusCode == TStatusCode.ERROR_STATUS and \ re.search('Invalid SessionHandle|Invalid session|Client session expired', res.status.errorMessage or '', re.I): LOG.info('Retrying with a new session because for %s of %s' % (self.user, res)) session.status_code = TStatusCode.INVALID_HANDLE_STATUS session.save() session = self.open_session(self.user) req.sessionHandle = session.get_handle() # Get back the name of the function to call res = getattr(self._client, fn.attr)(req) if status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) else: return (res, session)
def call_return_result_and_session(self, fn, req, status=TStatusCode.SUCCESS_STATUS, withMultipleSession=False): n_sessions = conf.MAX_NUMBER_OF_SESSIONS.get() # When a single session is allowed, avoid multiple session logic if n_sessions == 1: withMultipleSession = False session = None if not withMultipleSession: # Default behaviour: get one session session = Session.objects.get_session(self.user, self.query_server['server_name']) else: # Get 2 + n_sessions sessions and filter out the busy ones sessions = Session.objects.get_n_sessions(self.user, n=2 + n_sessions, application=self.query_server['server_name']) LOG.debug('%s sessions found' % len(sessions)) if sessions: # Include trashed documents to keep the query lazy # and avoid retrieving all documents docs = Document2.objects.get_history(doc_type='query-hive', user=self.user, include_trashed=True) busy_sessions = set() # Only check last 40 documents for performance for doc in docs[:40]: try: snippet_data = json.loads(doc.data)['snippets'][0] except (KeyError, IndexError): # data might not contain a 'snippets' field or it might be empty LOG.warn('No snippets in Document2 object of type query-hive') continue session_guid = snippet_data.get('result', {}).get('handle', {}).get('session_guid') status = snippet_data.get('status') if status in [str(QueryHistory.STATE.submitted), str(QueryHistory.STATE.running)]: if session_guid is not None and session_guid not in busy_sessions: busy_sessions.add(session_guid) n_busy_sessions = 0 available_sessions = [] for session in sessions: if session.guid not in busy_sessions: available_sessions.append(session) else: n_busy_sessions += 1 if n_busy_sessions == n_sessions: raise Exception('Too many open sessions. Stop a running query before starting a new one') if available_sessions: session = available_sessions[0] else: session = None # No available session found if session is None: session = self.open_session(self.user) if hasattr(req, 'sessionHandle') and req.sessionHandle is None: req.sessionHandle = session.get_handle() res = fn(req) # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS if res.status.statusCode == TStatusCode.ERROR_STATUS and \ re.search('Invalid SessionHandle|Invalid session|Client session expired', res.status.errorMessage or '', re.I): LOG.info('Retrying with a new session because for %s of %s' % (self.user, res)) session.status_code = TStatusCode.INVALID_HANDLE_STATUS session.save() session = self.open_session(self.user) req.sessionHandle = session.get_handle() # Get back the name of the function to call res = getattr(self._client, fn.attr)(req) if status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) else: return (res, session)