def call(self, fn, req, status=TStatusCode.SUCCESS_STATUS): session = Session.objects.get_session(self.user, self.query_server['server_name']) if session is None: session = self.open_session(self.user) if hasattr(req, 'sessionHandle') and req.sessionHandle is None: req.sessionHandle = session.get_handle() res = fn(req) # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS if res.status.statusCode == TStatusCode.ERROR_STATUS and \ re.search('Invalid SessionHandle|Invalid session', res.status.errorMessage or '', re.I): LOG.info('Retrying with a new session because for %s of %s' % (self.user, res)) session = self.open_session(self.user) req.sessionHandle = session.get_handle() # Get back the name of the function to call res = getattr(self._client, fn.attr)(req) if status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception( 'Bad status for request %s:\n%s' % (req, res)), message=message) else: return res
def open_session(self, user): kwargs = { 'client_protocol': beeswax_conf.THRIFT_VERSION.get() - 1, 'username': user.username, # If SASL or LDAP, it gets the username from the authentication mechanism" since it dependents on it. 'configuration': {}, } if self.impersonation_enabled: kwargs.update({'username': DEFAULT_USER}) if self.query_server['server_name'] == 'impala': # Only when Impala accepts it kwargs['configuration'].update({'impala.doas.user': user.username}) if self.query_server['server_name'] == 'beeswax': # All the time kwargs['configuration'].update({'hive.server2.proxy.user': user.username}) req = TOpenSessionReq(**kwargs) res = self._client.OpenSession(req) if res.status is not None and res.status.statusCode not in (TStatusCode.SUCCESS_STATUS,): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) sessionId = res.sessionHandle.sessionId LOG.info('Opening session %s' % sessionId) encoded_status, encoded_guid = HiveServerQueryHandle(secret=sessionId.secret, guid=sessionId.guid).get() return Session.objects.create(owner=user, application=self.query_server['server_name'], status_code=res.status.statusCode, secret=encoded_status, guid=encoded_guid, server_protocol_version=res.serverProtocolVersion)
def call_return_result_and_session(self, fn, req, status=TStatusCode.SUCCESS_STATUS, withMultipleSession=False): n_sessions = conf.MAX_NUMBER_OF_SESSIONS.get() # When a single session is allowed, avoid multiple session logic if n_sessions == 1: withMultipleSession = False session = None if not withMultipleSession: # Default behaviour: get one session session = Session.objects.get_session(self.user, self.query_server['server_name']) else: # Get 2 + n_sessions sessions and filter out the busy ones sessions = Session.objects.get_n_sessions(self.user, n=2 + n_sessions, application=self.query_server['server_name']) LOG.debug('%s sessions found' % len(sessions)) if sessions: # Include trashed documents to keep the query lazy # and avoid retrieving all documents docs = Document2.objects.get_history(doc_type='query-hive', user=self.user, include_trashed=True) busy_sessions = set() # Only check last 40 documents for performance for doc in docs[:40]: try: snippet_data = json.loads(doc.data)['snippets'][0] except (KeyError, IndexError): # data might not contain a 'snippets' field or it might be empty LOG.warn('No snippets in Document2 object of type query-hive') continue session_guid = snippet_data.get('result', {}).get('handle', {}).get('session_guid') status = snippet_data.get('status') if status in [str(QueryHistory.STATE.submitted), str(QueryHistory.STATE.running)]: if session_guid is not None and session_guid not in busy_sessions: busy_sessions.add(session_guid) n_busy_sessions = 0 available_sessions = [] for session in sessions: if session.guid not in busy_sessions: available_sessions.append(session) else: n_busy_sessions += 1 if n_busy_sessions == n_sessions: raise Exception('Too many open sessions. Stop a running query before starting a new one') if available_sessions: session = available_sessions[0] else: session = None # No available session found if session is None: session = self.open_session(self.user) if hasattr(req, 'sessionHandle') and req.sessionHandle is None: req.sessionHandle = session.get_handle() res = fn(req) # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS if res.status.statusCode == TStatusCode.ERROR_STATUS and \ re.search('Invalid SessionHandle|Invalid session|Client session expired', res.status.errorMessage or '', re.I): LOG.info('Retrying with a new session because for %s of %s' % (self.user, res)) session.status_code = TStatusCode.INVALID_HANDLE_STATUS session.save() session = self.open_session(self.user) req.sessionHandle = session.get_handle() # Get back the name of the function to call res = getattr(self._client, fn.attr)(req) if status is not None and res.status.statusCode not in ( TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) else: return (res, session)
def open_session(self, user): self.user = user kwargs = { 'client_protocol': beeswax_conf.THRIFT_VERSION.get() - 1, 'username': user.username, # If SASL or LDAP, it gets the username from the authentication mechanism" since it dependents on it. 'configuration': {}, } if self.impersonation_enabled: kwargs.update({'username': DEFAULT_USER}) if self.query_server['server_name'].startswith('impala'): # Only when Impala accepts it kwargs['configuration'].update({'impala.doas.user': user.username}) if self.query_server['server_name'] == 'beeswax': # All the time kwargs['configuration'].update({'hive.server2.proxy.user': user.username}) if self.query_server['server_name'] == 'sparksql': # All the time kwargs['configuration'].update({'hive.server2.proxy.user': user.username}) if self.query_server['server_name'].startswith('impala') and self.query_server['SESSION_TIMEOUT_S'] > 0: kwargs['configuration'].update({'idle_session_timeout': str(self.query_server['SESSION_TIMEOUT_S'])}) LOG.info('Opening %s thrift session for user %s' % (self.query_server['server_name'], user.username)) req = TOpenSessionReq(**kwargs) res = self._client.OpenSession(req) self.coordinator_host = self._client.get_coordinator_host() if self.coordinator_host: res.configuration['coordinator_host'] = self.coordinator_host if res.status is not None and res.status.statusCode not in (TStatusCode.SUCCESS_STATUS,): if hasattr(res.status, 'errorMessage') and res.status.errorMessage: message = res.status.errorMessage else: message = '' raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message) sessionId = res.sessionHandle.sessionId LOG.info('Session %s opened' % repr(sessionId.guid)) encoded_status, encoded_guid = HiveServerQueryHandle(secret=sessionId.secret, guid=sessionId.guid).get() properties = json.dumps(res.configuration) session = Session.objects.create( owner=user, application=self.query_server['server_name'], status_code=res.status.statusCode, secret=encoded_status, guid=encoded_guid, server_protocol_version=res.serverProtocolVersion, properties=properties ) # HS2 does not return properties in TOpenSessionResp # TEZ returns properties, but we need the configuration to detect engine properties = session.get_properties() if not properties or self.query_server['server_name'] == 'beeswax': configuration = self.get_configuration() properties.update(configuration) session.properties = json.dumps(properties) session.save() return session