Beispiel #1
0
    def call(self, fn, req, status=TStatusCode.SUCCESS_STATUS):
        session = Session.objects.get_session(self.user,
                                              self.query_server['server_name'])

        if session is None:
            session = self.open_session(self.user)

        if hasattr(req, 'sessionHandle') and req.sessionHandle is None:
            req.sessionHandle = session.get_handle()

        res = fn(req)

        # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS
        if res.status.statusCode == TStatusCode.ERROR_STATUS and \
            re.search('Invalid SessionHandle|Invalid session', res.status.errorMessage or '', re.I):
            LOG.info('Retrying with a new session because for %s of %s' %
                     (self.user, res))

            session = self.open_session(self.user)
            req.sessionHandle = session.get_handle()

            # Get back the name of the function to call
            res = getattr(self._client, fn.attr)(req)

        if status is not None and res.status.statusCode not in (
                TStatusCode.SUCCESS_STATUS,
                TStatusCode.SUCCESS_WITH_INFO_STATUS,
                TStatusCode.STILL_EXECUTING_STATUS):
            if hasattr(res.status, 'errorMessage') and res.status.errorMessage:
                message = res.status.errorMessage
            else:
                message = ''
            raise QueryServerException(Exception(
                'Bad status for request %s:\n%s' % (req, res)),
                                       message=message)
        else:
            return res
Beispiel #2
0
  def open_session(self, user):
    kwargs = {
        'client_protocol': beeswax_conf.THRIFT_VERSION.get() - 1,
        'username': user.username, # If SASL or LDAP, it gets the username from the authentication mechanism" since it dependents on it.
        'configuration': {},
    }

    if self.impersonation_enabled:
      kwargs.update({'username': DEFAULT_USER})

      if self.query_server['server_name'] == 'impala': # Only when Impala accepts it
        kwargs['configuration'].update({'impala.doas.user': user.username})

    if self.query_server['server_name'] == 'beeswax': # All the time
      kwargs['configuration'].update({'hive.server2.proxy.user': user.username})

    req = TOpenSessionReq(**kwargs)
    res = self._client.OpenSession(req)

    if res.status is not None and res.status.statusCode not in (TStatusCode.SUCCESS_STATUS,):
      if hasattr(res.status, 'errorMessage') and res.status.errorMessage:
        message = res.status.errorMessage
      else:
        message = ''
      raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message)

    sessionId = res.sessionHandle.sessionId
    LOG.info('Opening session %s' % sessionId)

    encoded_status, encoded_guid = HiveServerQueryHandle(secret=sessionId.secret, guid=sessionId.guid).get()

    return Session.objects.create(owner=user,
                                  application=self.query_server['server_name'],
                                  status_code=res.status.statusCode,
                                  secret=encoded_status,
                                  guid=encoded_guid,
                                  server_protocol_version=res.serverProtocolVersion)
Beispiel #3
0
  def call_return_result_and_session(self, fn, req, status=TStatusCode.SUCCESS_STATUS,
                                     withMultipleSession=False):

    n_sessions = conf.MAX_NUMBER_OF_SESSIONS.get()

    # When a single session is allowed, avoid multiple session logic
    if n_sessions == 1:
      withMultipleSession = False

    session = None

    if not withMultipleSession:
      # Default behaviour: get one session
      session = Session.objects.get_session(self.user, self.query_server['server_name'])

    else:
      # Get 2 + n_sessions sessions and filter out the busy ones
      sessions = Session.objects.get_n_sessions(self.user, n=2 + n_sessions, application=self.query_server['server_name'])
      LOG.debug('%s sessions found' % len(sessions))
      if sessions:
        # Include trashed documents to keep the query lazy
        # and avoid retrieving all documents
        docs = Document2.objects.get_history(doc_type='query-hive', user=self.user, include_trashed=True)
        busy_sessions = set()

        # Only check last 40 documents for performance
        for doc in docs[:40]:
          try:
            snippet_data = json.loads(doc.data)['snippets'][0]
          except (KeyError, IndexError):
            # data might not contain a 'snippets' field or it might be empty
            LOG.warn('No snippets in Document2 object of type query-hive')
            continue
          session_guid = snippet_data.get('result', {}).get('handle', {}).get('session_guid')
          status = snippet_data.get('status')

          if status in [str(QueryHistory.STATE.submitted), str(QueryHistory.STATE.running)]:
            if session_guid is not None and session_guid not in busy_sessions:
              busy_sessions.add(session_guid)

        n_busy_sessions = 0
        available_sessions = []
        for session in sessions:
          if session.guid not in busy_sessions:
            available_sessions.append(session)
          else:
            n_busy_sessions += 1

        if n_busy_sessions == n_sessions:
          raise Exception('Too many open sessions. Stop a running query before starting a new one')

        if available_sessions:
          session = available_sessions[0]
        else:
          session = None # No available session found

    if session is None:
      session = self.open_session(self.user)

    if hasattr(req, 'sessionHandle') and req.sessionHandle is None:
      req.sessionHandle = session.get_handle()

    res = fn(req)

    # Not supported currently in HS2 and Impala: TStatusCode.INVALID_HANDLE_STATUS
    if res.status.statusCode == TStatusCode.ERROR_STATUS and \
        re.search('Invalid SessionHandle|Invalid session|Client session expired', res.status.errorMessage or '', re.I):
      LOG.info('Retrying with a new session because for %s of %s' % (self.user, res))
      session.status_code = TStatusCode.INVALID_HANDLE_STATUS
      session.save()

      session = self.open_session(self.user)

      req.sessionHandle = session.get_handle()

      # Get back the name of the function to call
      res = getattr(self._client, fn.attr)(req)

    if status is not None and res.status.statusCode not in (
        TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS, TStatusCode.STILL_EXECUTING_STATUS):
      if hasattr(res.status, 'errorMessage') and res.status.errorMessage:
        message = res.status.errorMessage
      else:
        message = ''
      raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message)
    else:
      return (res, session)
Beispiel #4
0
  def open_session(self, user):

    self.user = user
    kwargs = {
        'client_protocol': beeswax_conf.THRIFT_VERSION.get() - 1,
        'username': user.username, # If SASL or LDAP, it gets the username from the authentication mechanism" since it dependents on it.
        'configuration': {},
    }

    if self.impersonation_enabled:
      kwargs.update({'username': DEFAULT_USER})

      if self.query_server['server_name'].startswith('impala'): # Only when Impala accepts it
        kwargs['configuration'].update({'impala.doas.user': user.username})

    if self.query_server['server_name'] == 'beeswax': # All the time
      kwargs['configuration'].update({'hive.server2.proxy.user': user.username})

    if self.query_server['server_name'] == 'sparksql': # All the time
      kwargs['configuration'].update({'hive.server2.proxy.user': user.username})

    if self.query_server['server_name'].startswith('impala') and self.query_server['SESSION_TIMEOUT_S'] > 0:
      kwargs['configuration'].update({'idle_session_timeout': str(self.query_server['SESSION_TIMEOUT_S'])})

    LOG.info('Opening %s thrift session for user %s' % (self.query_server['server_name'], user.username))

    req = TOpenSessionReq(**kwargs)
    res = self._client.OpenSession(req)
    self.coordinator_host = self._client.get_coordinator_host()
    if self.coordinator_host:
      res.configuration['coordinator_host'] = self.coordinator_host

    if res.status is not None and res.status.statusCode not in (TStatusCode.SUCCESS_STATUS,):
      if hasattr(res.status, 'errorMessage') and res.status.errorMessage:
        message = res.status.errorMessage
      else:
        message = ''
      raise QueryServerException(Exception('Bad status for request %s:\n%s' % (req, res)), message=message)

    sessionId = res.sessionHandle.sessionId
    LOG.info('Session %s opened' % repr(sessionId.guid))

    encoded_status, encoded_guid = HiveServerQueryHandle(secret=sessionId.secret, guid=sessionId.guid).get()
    properties = json.dumps(res.configuration)

    session = Session.objects.create(
        owner=user,
        application=self.query_server['server_name'],
        status_code=res.status.statusCode,
        secret=encoded_status,
        guid=encoded_guid,
        server_protocol_version=res.serverProtocolVersion,
        properties=properties
    )

    # HS2 does not return properties in TOpenSessionResp
    # TEZ returns properties, but we need the configuration to detect engine
    properties = session.get_properties()
    if not properties or self.query_server['server_name'] == 'beeswax':
      configuration = self.get_configuration()
      properties.update(configuration)
      session.properties = json.dumps(properties)
      session.save()

    return session