Beispiel #1
0
    def explain(self, notebook, snippet):
        db = self._get_db(snippet)
        response = self._get_current_statement(db, snippet)
        session = self._get_session(notebook, snippet['type'])

        query = self._prepare_hql_query(snippet, response.pop('statement'),
                                        session)

        try:
            db.use(query.database)

            explanation = db.explain(query)
        except QueryServerException, ex:
            raise QueryError(ex.message)
Beispiel #2
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index):
      raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available'

    return response
Beispiel #3
0
  def execute(self, notebook, snippet):
    statement = snippet['statement']

    handle = DataEng(self.user).submit_hive_job(self.cluster_name, statement, params=None, job_xml=None)
    job = handle['jobs'][0]

    if job['status'] not in RUNNING_STATES:
      raise QueryError('Submission failure', handle=job['status'])

    return {
      'id': job['jobId'],
      'crn': job['crn'],
      'has_result_set': False,
    }
Beispiel #4
0
    def execute(self, notebook, snippet):
        db = self._get_db(snippet)

        statement = self._get_current_statement(db, snippet)
        session = self._get_session(notebook, snippet['type'])

        query = self._prepare_hql_query(snippet, statement['statement'],
                                        session)

        try:
            if statement.get('statement_id') == 0:
                db.use(query.database)
            handle = db.client.query(query, withMultipleSession=True)
        except QueryServerException, ex:
            raise QueryError(ex.message, handle=statement)
Beispiel #5
0
    def check_status(self, notebook, snippet):
        response = {'status': 'running'}

        job_id = snippet['result']['handle']['id']

        handle = DataEng(self.user).list_jobs(job_ids=[job_id])
        job = handle['jobs'][0]

        if job['status'] in RUNNING_STATES:
            return response
        elif job['status'] in ('failed', 'terminated'):
            raise QueryError(_('Job was %s') % job['status'])
        else:
            response['status'] = 'available'

        return response
Beispiel #6
0
def _get_query_key(notebook, snippet):
    if ENABLE_NOTEBOOK_2.get():
        if snippet.get('executable'):
            query_key = snippet['executable']['id']
        elif snippet.get('executor'):
            query_key = snippet['executor']['executables'][0].get(
                'history', {}).get('uuid')
        else:
            query_key = notebook['uuid']  # get_logs()
    else:
        query_key = notebook['uuid']

    if not query_key:
        raise QueryError('Query Key Missing')
    else:
        return query_key
Beispiel #7
0
def _get_data(task_id):
  result_key = _result_key(task_id)

  if TASK_SERVER.RESULT_CACHE.get():
    csv_reader = caches[CACHES_CELERY_QUERY_RESULT_KEY].get(result_key)  # TODO check if expired
    if csv_reader is None:
      raise QueryError('Cached results %s not found.' % result_key)
    headers = csv_reader[0] if csv_reader else []  # TODO check size
    csv_reader = csv_reader[1:] if csv_reader else []
  else:
    f = storage.open(result_key)
    delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8')
    csv_reader = csv.reader(f, delimiter=delimiter)
    headers = next(csv_reader, [])

  return headers, csv_reader
Beispiel #8
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except OperationalError as e:
         message = str(e)
         if '1045' in message:  # 'Access denied' # MySQL
             raise AuthenticationRequired(message=message)
         else:
             raise e
     except Exception as e:
         message = force_unicode(e)
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             LOG.exception('Query Error')
             raise QueryError(message)
Beispiel #9
0
def query_error_handler(func):
    def decorator(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except StructuredException, e:
            message = force_unicode(str(e))
            if 'timed out' in message:
                raise OperationTimeout(e)
            else:
                raise QueryError(message)
        except QueryServerException, e:
            message = force_unicode(str(e))
            if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
                raise QueryExpired(e)
            else:
                raise QueryError(message)
Beispiel #10
0
    def execute(self, notebook, snippet):
        from search.conf import SOLR_URL

        api = NativeSolrApi(SOLR_URL.get(), self.user.username)

        collection = self.options.get('collection') or snippet.get('database')
        if not collection or collection == 'default':
            collection = api.collections2()[0]

        response = api.sql(collection, snippet['statement'])

        info = response['result-set']['docs'].pop(
            -1)  # EOF, RESPONSE_TIME, EXCEPTION
        if info.get('EXCEPTION'):
            raise QueryError(info['EXCEPTION'])

        headers = []
        for row in response['result-set']['docs']:
            for col in list(row.keys()):
                if col not in headers:
                    headers.append(col)

        data = [[doc.get(col) for col in headers]
                for doc in response['result-set']['docs']]
        has_result_set = bool(data)

        return {
            'sync': True,
            'has_result_set': has_result_set,
            'modified_row_count': 0,
            'result': {
                'has_more':
                False,
                'data':
                data if has_result_set else [],
                'meta': [{
                    'name': col,
                    'type': '',
                    'comment': ''
                } for col in headers] if has_result_set else [],
                'type':
                'table'
            },
            'statement_id': 0,
            'has_more_statements': False,
            'statements_count': 1
        }
Beispiel #11
0
    def execute(self, notebook, snippet):
        db = self._get_db(snippet, cluster=snippet.get('selectedCompute'))

        statement = self._get_current_statement(db, snippet)
        session = self._get_session(notebook, snippet['type'])

        query = self._prepare_hql_query(snippet, statement['statement'],
                                        session)

        try:
            if statement.get('statement_id') == 0:
                if query.database and not statement['statement'].lower(
                ).startswith('set'):
                    db.use(query.database)
            handle = db.client.query(query, with_multiple_session=True)
        except QueryServerException, ex:
            raise QueryError(ex.message, handle=statement)
Beispiel #12
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index):
      if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query
        raise QueryExpired()
      else:
        raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available'

    return response
Beispiel #13
0
  def check_status(self, notebook, snippet):
    job_id = snippet['result']['handle']['id']
    request = MockRequest(self.user, self.fs, self.jt)

    oozie_workflow = check_job_access_permission(request, job_id)
    logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow)

    if is_really_done and not oozie_workflow.is_running():
      if oozie_workflow.status in ('KILLED', 'FAILED'):
        raise QueryError(_('The script failed to run and was stopped'))
      status = 'available'
    elif oozie_workflow.is_running():
      status = 'running'
    else:
      status = 'failed'

    return {
        'status': status
    }
Beispiel #14
0
    def fetch_result_size(self, notebook, snippet):
        resp = {'rows': None, 'size': None, 'message': ''}

        if snippet.get('status') != 'available':
            raise QueryError(_('Result status is not available'))

        if snippet['type'] not in ('hive', 'impala'):
            raise OperationNotSupported(
                _('Cannot fetch result metadata for snippet type: %s') %
                snippet['type'])

        if snippet['type'] == 'hive':
            resp['rows'], resp['size'], resp[
                'message'] = self._get_hive_result_size(notebook, snippet)
        else:  # Impala
            resp['rows'], resp['size'], resp[
                'message'] = self._get_impala_result_size(notebook, snippet)

        return resp
Beispiel #15
0
  def explain(self, notebook, snippet):
    db = self._get_db(snippet, interpreter=self.interpreter)
    response = self._get_current_statement(notebook, snippet)
    session = self._get_session(notebook, snippet['type'])

    query = self._prepare_hql_query(snippet, response.pop('statement'), session)

    try:
      db.use(query.database)

      explanation = db.explain(query)
    except QueryServerException as ex:
      raise QueryError(ex.message)

    return {
      'status': 0,
      'explanation': explanation.textual,
      'statement': query.get_query_statement(0),
    }
Beispiel #16
0
    def check_status(self, notebook, snippet):
        job_id = snippet['result']['handle']['id']

        oozie_workflow = check_job_access_permission(self.request, job_id)
        logs, workflow_actions, is_really_done = self._get_output(
            oozie_workflow)

        if is_really_done and not oozie_workflow.is_running():
            if oozie_workflow.status in ('KILLED', 'FAILED'):
                raise QueryError(_('The script failed to run and was stopped'))
            if logs:
                status = 'available'
            else:
                status = 'running'  # Tricky case when the logs are being moved by YARN at job completion
        elif oozie_workflow.is_running():
            status = 'running'
        else:
            status = 'failed'

        return {'status': status}
Beispiel #17
0
 def get_sample_data(self,
                     snippet,
                     database=None,
                     table=None,
                     column=None,
                     is_async=False,
                     operation=None):
     try:
         db = self._get_db(snippet,
                           is_async=is_async,
                           interpreter=self.interpreter)
         return _get_sample_data(db,
                                 database,
                                 table,
                                 column,
                                 is_async,
                                 operation=operation,
                                 cluster=self.interpreter)
     except QueryServerException as ex:
         raise QueryError(ex.message)
Beispiel #18
0
    def execute(self, notebook, snippet):
        db = self._get_db(snippet, interpreter=self.interpreter)

        statement = self._get_current_statement(notebook, snippet)
        session = self._get_session(notebook, snippet['type'])

        query = self._prepare_hql_query(snippet, statement['statement'],
                                        session)
        _session = self._get_session_by_id(notebook, snippet['type'])

        try:
            if statement.get('statement_id') == 0:  # TODO: move this to client
                if query.database and not statement['statement'].lower(
                ).startswith('set'):
                    result = db.use(query.database, session=_session)
                    if result.session:
                        _session = result.session
            handle = db.client.query(query, session=_session)
        except QueryServerException as ex:
            raise QueryError(ex.message, handle=statement)

        # All good
        server_id, server_guid = handle.get()
        if sys.version_info[0] > 2:
            server_id = server_id.decode('utf-8')
            server_guid = server_guid.decode('utf-8')

        response = {
            'secret': server_id,
            'guid': server_guid,
            'operation_type': handle.operation_type,
            'has_result_set': handle.has_result_set,
            'modified_row_count': handle.modified_row_count,
            'log_context': handle.log_context,
            'session_guid': handle.session_guid,
            'session_id': handle.session_id,
            'session_type': snippet['type']
        }
        response.update(statement)

        return response
Beispiel #19
0
    def check_status(self, notebook, snippet):
        response = {'status': 'running'}

        job_id = snippet['result']['handle']['id']
        oozie_job = check_job_access_permission(self.request, job_id)

        if oozie_job.is_running():
            return response
        elif oozie_job.status in ('KILLED', 'FAILED'):
            raise QueryError(_('Job was %s') % oozie_job.status)
        else:
            # Check if job results are actually available, since YARN takes a while to move logs to JHS,
            log_output = self.get_log(notebook, snippet)
            if log_output:
                results = self._get_results(log_output, snippet['type'])
                if results:
                    response['status'] = 'available'
            else:
                response['status'] = 'failed'

        return response
Beispiel #20
0
  def create_session(self, lang='scala', properties=None):
    api = self.get_api()
    stored_session_info = self._get_session_info_from_user()

    if stored_session_info:
      session_present = self._check_session(stored_session_info)
      if session_present:
        return stored_session_info

    if not properties and USE_DEFAULT_CONFIGURATION.get():
      user_config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user)
      if user_config is not None:
        properties = user_config.properties_list

    props = self.get_livy_props(lang, properties)

    response = api.create_session(**props)

    status = api.get_session(response['id'])
    count = 0

    while status['state'] == 'starting' and count < 120:
      status = api.get_session(response['id'])
      count += 1
      time.sleep(1)

    if status['state'] != 'idle':
      info = '\n'.join(status['log']) if status['log'] else 'timeout'
      raise QueryError(_('The Spark session is %s and could not be created in the cluster: %s') % (status['state'], info))

    new_session_info = {
        'type': lang,
        'id': response['id'],
        'properties': self.to_properties(props)
    }
    self._set_session_info_to_user(new_session_info)

    return new_session_info
Beispiel #21
0
  def fetch_result(self, notebook, snippet, rows, start_over):
    db = self._get_db(snippet, interpreter=self.interpreter)

    handle = self._get_handle(snippet)
    try:
      results = db.fetch(handle, start_over=start_over, rows=rows)
    except QueryServerException as ex:
      if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message:
        raise QueryExpired(message=ex.message)
      else:
        raise QueryError(ex)

    # No escaping...
    return {
        'has_more': results.has_more,
        'data': results.rows(),
        'meta': [{
          'name': column.name,
          'type': column.type,
          'comment': column.comment
        } for column in results.data_table.cols()],
        'type': 'table'
    }
Beispiel #22
0
  def create_session(self, lang='scala', properties=None):
    if not properties:
      config = None
      if USE_DEFAULT_CONFIGURATION.get():
        config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user)

      if config is not None:
        properties = config.properties_list
      else:
        properties = self.get_properties()

    props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {}

    props['kind'] = lang

    api = get_spark_api(self.user)

    response = api.create_session(**props)

    status = api.get_session(response['id'])
    count = 0

    while status['state'] == 'starting' and count < 120:
      status = api.get_session(response['id'])
      count += 1
      time.sleep(1)

    if status['state'] != 'idle':
      info = '\n'.join(status['log']) if status['log'] else 'timeout'
      raise QueryError(_('The Spark session could not be created in the cluster: %s') % info)

    return {
        'type': lang,
        'id': response['id'],
        'properties': properties
    }
Beispiel #23
0
    def create_session(self, lang='scala', properties=None):
        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app='spark', user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties()

        props = dict([(p['name'], p['value'])
                      for p in properties]) if properties is not None else {}

        # HUE-4761: Hue's session request is causing Livy to fail with "JsonMappingException: Can not deserialize
        # instance of scala.collection.immutable.List out of VALUE_STRING token" due to List type values
        # not being formed properly, they are quoted csv strings (without brackets) instead of proper List
        # types, this is for keys; archives, jars, files and pyFiles. The Mako frontend probably should be
        # modified to pass the values as Livy expects but for now we coerce these types to be Lists.
        # Issue only occurs when non-default values are used because the default path properly sets the
        # empty list '[]' for these four values.
        # Note also that Livy has a 90 second timeout for the session request to complete, this needs to
        # be increased for requests that take longer, for example when loading large archives.
        tmparchives = props['archives']
        if type(tmparchives) is not list:
            props['archives'] = tmparchives.split(",")
            LOG.debug("Check List type: archives was not a list")

        tmpjars = props['jars']
        if type(tmpjars) is not list:
            props['jars'] = tmpjars.split(",")
            LOG.debug("Check List type: jars was not a list")

        tmpfiles = props['files']
        if type(tmpfiles) is not list:
            props['files'] = tmpfiles.split(",")
            LOG.debug("Check List type: files was not a list")

        tmppyFiles = props['pyFiles']
        if type(tmppyFiles) is not list:
            props['pyFiles'] = tmppyFiles.split(",")
            LOG.debug("Check List type: pyFiles was not a list")

        # Convert the conf list to a dict for Livy
        listitems = props['conf']
        LOG.debug("Property Spark Conf kvp list from UI is: " + str(listitems))
        confDict = {}
        for i in range(len(listitems)):
            kvp = listitems[i]
            LOG.debug("Property Spark Conf key " + str(i) + " = " +
                      str(kvp.get('key')))
            LOG.debug("Property Spark Conf value " + str(i) + " = " +
                      str(kvp.get('value')))
            confDict[kvp.get('key')] = kvp.get('value')
        props['conf'] = confDict
        LOG.debug("Property Spark Conf dictionary is: " + str(confDict))

        props['kind'] = lang

        api = get_spark_api(self.user)

        response = api.create_session(**props)

        status = api.get_session(response['id'])
        count = 0

        while status['state'] == 'starting' and count < 120:
            status = api.get_session(response['id'])
            count += 1
            time.sleep(1)

        if status['state'] != 'idle':
            info = '\n'.join(status['log']) if status['log'] else 'timeout'
            raise QueryError(
                _('The Spark session could not be created in the cluster: %s')
                % info)

        return {'type': lang, 'id': response['id'], 'properties': properties}
Beispiel #24
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception as e:
         message = force_unicode(str(e))
         raise QueryError(message)
Beispiel #25
0
    def fetch_result(self, notebook, snippet, rows, start_over):
        api = self.get_api()
        session = _get_snippet_session(notebook, snippet)
        cell = snippet['result']['handle']['id']

        try:
            response = api.fetch_data(session['id'], cell)
        except Exception as e:
            message = force_unicode(str(e)).lower()
            if re.search("session ('\d+' )?not found", message):
                raise SessionExpired(e)
            else:
                raise e

        content = response['output']

        if content['status'] == 'ok':
            data = content['data']
            images = []

            try:
                table = data['application/vnd.livy.table.v1+json']
            except KeyError:
                try:
                    images = [data['image/png']]
                except KeyError:
                    images = []
                if 'application/json' in data:
                    result = data['application/json']
                    data = result['data']
                    meta = [{
                        'name': field['name'],
                        'type': field['type'],
                        'comment': ''
                    } for field in result['schema']['fields']]
                    type = 'table'
                else:
                    data = [[data['text/plain']]]
                    meta = [{
                        'name': 'Header',
                        'type': 'STRING_TYPE',
                        'comment': ''
                    }]
                    type = 'text'
            else:
                data = table['data']
                headers = table['headers']
                meta = [{
                    'name': h['name'],
                    'type': h['type'],
                    'comment': ''
                } for h in headers]
                type = 'table'

            # Non start_over not supported
            if not start_over:
                data = []

            return {'data': data, 'images': images, 'meta': meta, 'type': type}
        elif content['status'] == 'error':
            tb = content.get('traceback', None)

            if tb is None or not tb:
                msg = content.get('ename', 'unknown error')

                evalue = content.get('evalue')
                if evalue is not None:
                    msg = '%s: %s' % (msg, evalue)
            else:
                msg = ''.join(tb)

            raise QueryError(msg)
Beispiel #26
0
    def _create_engine(self):
        if '${' in self.options['url']:  # URL parameters substitution
            vars = {'USER': self.user.username}

            if '${PASSWORD}' in self.options['url']:
                auth_provided = False
                if 'session' in self.options:
                    for _prop in self.options['session']['properties']:
                        if _prop['name'] == 'user':
                            vars['USER'] = _prop['value']
                            auth_provided = True
                        if _prop['name'] == 'password':
                            vars['PASSWORD'] = _prop['value']
                            auth_provided = True

                if not auth_provided:
                    raise AuthenticationRequired(
                        message='Missing username and/or password')

            raw_url = Template(self.options['url'])
            url = raw_url.safe_substitute(**vars)
        else:
            url = self.options['url']

        if url.startswith('awsathena+rest://'):
            url = url.replace(url[17:37], urllib_quote_plus(url[17:37]))
            url = url.replace(url[38:50], urllib_quote_plus(url[38:50]))
            s3_staging_dir = url.rsplit('s3_staging_dir=', 1)[1]
            url = url.replace(s3_staging_dir,
                              urllib_quote_plus(s3_staging_dir))

        if self.options.get('has_impersonation'):
            m = re.search(URL_PATTERN, url)
            driver_name = m.group('driver_name')

            if not driver_name:
                raise QueryError(
                    'Driver name of %(url)s could not be found and impersonation is turned on'
                    % {'url': url})

            url = url.replace(
                driver_name, '%(driver_name)s%(username)s@' % {
                    'driver_name': driver_name,
                    'username': self.user.username
                })

        if self.options.get('credentials_json'):
            self.options['credentials_info'] = json.loads(
                self.options.pop('credentials_json'))

        # Enables various SqlAlchemy args to be passed along for both Hive & Presto connectors
        # Refer to SqlAlchemy pyhive for more details
        if self.options.get('connect_args'):
            self.options['connect_args'] = json.loads(
                self.options.pop('connect_args'))

        options = self.options.copy()
        options.pop('session', None)
        options.pop('url', None)
        options.pop('has_ssh', None)
        options.pop('has_impersonation', None)
        options.pop('ssh_server_host', None)

        options['pool_pre_ping'] = not url.startswith(
            'phoenix://'
        )  # Should be moved to dialect when connectors always on

        return create_engine(url, **options)
Beispiel #27
0
 def get_sample_data(self, snippet, database=None, table=None, column=None):
   try:
     db = self._get_db(snippet)
     return _get_sample_data(db, database, table, column)
   except QueryServerException, ex:
     raise QueryError(ex.message)
Beispiel #28
0
          'meta': meta,
          'type': type
      }
    elif content['status'] == 'error':
      tb = content.get('traceback', None)

      if tb is None:
        msg = content.get('ename', 'unknown error')

        evalue = content.get('evalue')
        if evalue is not None:
          msg = '%s: %s' % (msg, evalue)
      else:
        msg = ''.join(tb)

      raise QueryError(msg)

  def download(self, notebook, snippet, format):
    try:
      api = get_spark_api(self.user)
      session = _get_snippet_session(notebook, snippet)
      cell = snippet['result']['handle']['id']

      return spark_download(api, session['id'], cell, format)
    except Exception, e:
      raise PopupException(e)

  def cancel(self, notebook, snippet):
    api = get_spark_api(self.user)
    session = _get_snippet_session(notebook, snippet)
    response = api.cancel(session['id'])
Beispiel #29
0
                        database=None,
                        table=None,
                        column=None,
                        async=False,
                        operation=None):
        try:
            db = self._get_db(snippet, async, interpreter=self.interpreter)
            return _get_sample_data(db,
                                    database,
                                    table,
                                    column,
                                    async,
                                    operation=operation,
                                    cluster=self.interpreter)
        except QueryServerException as ex:
            raise QueryError(ex.message)

    @query_error_handler
    def explain(self, notebook, snippet):
        db = self._get_db(snippet, interpreter=self.interpreter)
        response = self._get_current_statement(notebook, snippet)
        session = self._get_session(notebook, snippet['type'])

        query = self._prepare_hql_query(snippet, response.pop('statement'),
                                        session)

        try:
            db.use(query.database)

            explanation = db.explain(query)
        except QueryServerException as ex:
Beispiel #30
0
 def send_exception(message):
   raise QueryError(message=message)