Beispiel #1
0
def get_log(notebook,
            snippet,
            startFrom=None,
            size=None,
            postdict=None,
            user_id=None):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return ''
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return ''

    info = result.info
    if not startFrom:
        with open(info.get('log_path'), 'r') as f:
            return f.read()
    else:
        count = 0
        data = ''
        with open(info.get('log_path'), 'r') as f:
            for line in f:
                count += 1
                if count <= startFrom:
                    continue
                data += line
        return data
Beispiel #2
0
    def check_status(self, notebook, snippet):
        response = {}
        db = self._get_db(snippet)

        handle = self._get_handle(snippet)
        operation = db.get_operation_status(handle)
        status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

        if status.index in (QueryHistory.STATE.failed.index,
                            QueryHistory.STATE.expired.index):
            if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage:  # Hive case on canceled query
                raise QueryExpired()
            elif operation.errorMessage and re.search(
                    'Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe',
                    str(operation.errorMessage)):
                raise QueryError(message=operation.errorMessage +
                                 _('. Is hive-hcatalog-core.jar registered?'))
            else:
                raise QueryError(operation.errorMessage)

        response['status'] = 'running' if status.index in (
            QueryHistory.STATE.running.index,
            QueryHistory.STATE.submitted.index) else 'available'

        return response
Beispiel #3
0
def get_log(notebook, snippet, startFrom=None, size=None, postdict=None, user_id=None):
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state

  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'):
    return ''
  elif state in states.EXCEPTION_STATES:
    return ''

  if TASK_SERVER.RESULT_CACHE.get():
    return ''
  else:
    if not startFrom:
      with storage.open(_log_key(notebook, snippet), 'r') as f:
        return f.read()
    else:
      count = 0
      output = string_io()
      with storage.open(_log_key(notebook, snippet), 'r') as f:
        for line in f:
          count += 1
          if count <= startFrom:
            continue
          output.write(line)
      return output.getvalue()
Beispiel #4
0
    def fetch_result(self, notebook, snippet, rows, start_over):
        db = self._get_db(snippet, interpreter=self.interpreter)

        handle = self._get_handle(snippet)
        try:
            results = db.fetch(handle, start_over=start_over, rows=rows)
        except QueryServerException as ex:
            if re.search('(client inactivity)|(Invalid query handle)',
                         str(ex)) and ex.message:
                raise QueryExpired(message=ex.message)
            else:
                raise QueryError(ex)

        # No escaping...
        return {
            'has_more':
            results.has_more,
            'data':
            results.rows(),
            'meta': [{
                'name': column.name,
                'type': column.type,
                'comment': column.comment
            } for column in results.data_table.cols()],
            'type':
            'table'
        }
Beispiel #5
0
    def check_status(self, notebook, snippet):
        response = {}
        db = self._get_db(snippet, interpreter=self.interpreter)

        handle = self._get_handle(snippet)
        operation = db.get_operation_status(handle)
        status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

        if status.value in (QueryHistory.STATE.failed.value,
                            QueryHistory.STATE.expired.value):
            if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage:  # Hive case on canceled query
                raise QueryExpired()
            elif operation.errorMessage and re.search(
                    'Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe',
                    str(operation.errorMessage)):
                raise QueryError(message=operation.errorMessage +
                                 _('. Is hive-hcatalog-core.jar registered?'))
            else:
                raise QueryError(operation.errorMessage)

        response['status'] = 'running' if status.value in (
            QueryHistory.STATE.running.value,
            QueryHistory.STATE.submitted.value) else 'available'
        if operation.hasResultSet is not None:
            response[
                'has_result_set'] = operation.hasResultSet  # HIVE-12442 - With LLAP & HIVE_CLI_SERVICE_PROTOCOL_V8, hasResultSet can change after get_operation_status

        return response
Beispiel #6
0
    def autocomplete(self,
                     snippet,
                     database=None,
                     table=None,
                     column=None,
                     nested=None,
                     operation=None):
        db = self._get_db(snippet, interpreter=self.interpreter)
        query = None

        if snippet.get('query'):
            query = snippet.get('query')
        elif snippet.get('source') == 'query':
            document = Document2.objects.get(id=database)
            document.can_read_or_exception(self.user)
            notebook = Notebook(document=document).get_data()
            snippet = notebook['snippets'][0]
            query = self._get_current_statement(notebook, snippet)['statement']
            database, table = '', ''

        resp = _autocomplete(db,
                             database,
                             table,
                             column,
                             nested,
                             query=query,
                             cluster=self.interpreter,
                             operation=operation)

        if resp.get('error'):
            resp['message'] = resp.pop('error')
            if 'Read timed out' in resp['message']:
                raise QueryExpired(resp['message'])

        return resp
Beispiel #7
0
def get_jobs(
        notebook, snippet, logs, **kwargs
):  #Re implement to fetch updated guid in download_to_file from DB
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return []
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return []

    info = result.info
    snippet['result']['handle'] = info.get('handle', {}).copy()

    request = _get_request(**kwargs)
    api = get_api(request, snippet)
    #insiduous problem where each call in hive api transform the guid/secret to binary form. get_log does the transform, but not get_jobs. get_jobs called after get_log so usually not an issue. Our get_log implementation doesn't
    if hasattr(
            api, '_get_handle'
    ):  # This is specific to impala, should be handled in hiveserver2
        api._get_handle(snippet)
    return api.get_jobs(notebook, snippet, logs)
Beispiel #8
0
def close_statement(*args, **kwargs):
  notebook = args[0]
  snippet = args[1]
  task_id = _get_query_key(notebook, snippet)

  result = download_to_file.AsyncResult(task_id)
  state = result.state
  status = 0

  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'):
    status = -1
  elif state in states.EXCEPTION_STATES:
    status = -1

  if status == 0:
    info = result.info
    snippet['result']['handle'] = info.get('handle', {}).copy()
    close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook, snippet))

  result.forget()

  _cleanup(notebook, snippet)

  return {'status': status}
Beispiel #9
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception, e:
         message = force_unicode(str(e))
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             raise QueryError(message)
Beispiel #10
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception as e:
         message = force_unicode(e)
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             raise QueryError, message, sys.exc_info()[2]
Beispiel #11
0
def check_status(*args, **kwargs):
    notebook = args[0]
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()

    return {'status': STATE_MAP[state]}
Beispiel #12
0
def download(*args, **kwargs):
    result = download_to_file.AsyncResult(args[0]['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()

    info = result.wait()  # TODO: Start returning data even if we're not done

    return export_csvxls.file_reader(open(info['file_path'], 'rb'))
Beispiel #13
0
  def fetch_result(self, notebook, snippet, rows, start_over):
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    try:
      results = db.fetch(handle, start_over=start_over, rows=rows)
    except QueryServerException, ex:
      if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message:
        raise QueryExpired(message=ex.message)
      else:
        raise QueryError(ex)
Beispiel #14
0
def fetch_result(notebook, snippet, rows, start_over, **kwargs):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    data = []
    cols = []
    results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'}
    if state == states.PENDING:
        raise QueryExpired()
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return results
    elif state not in [states.SUCCESS, 'AVAILABLE']:
        return results

    info = result.info
    skip = 0
    if not start_over:
        with open(info.get('progress_path'), 'r') as f:
            skip = int(f.read())
    target = skip + rows

    with open(info.get('file_path'), 'r') as f:
        csv_reader = csv.reader(f, delimiter=','.encode('utf-8'))
        first = next(csv_reader)
        for col in first:
            split = col.split('|')
            if len(split) > 1:
                cols.append({
                    'name': split[0],
                    'type': split[1],
                    'comment': None
                })
            else:
                cols.append({
                    'name': split[0],
                    'type': 'STRING_TYPE',
                    'comment': None
                })
        count = 0
        for row in csv_reader:
            count += 1
            if count <= skip:
                continue
            data.append(row)
            if count >= target:
                break

    with open(info.get('progress_path'), 'w') as f:
        f.write(str(count))

    results['has_more'] = count < info.get(
        'row_counter') or state == states.state('PROGRESS')

    return results
Beispiel #15
0
def fetch_result(notebook, snippet, rows, start_over, **kwargs):
    task_id = _get_query_key(notebook, snippet)
    result = download_to_file.AsyncResult(task_id)
    state = result.state
    data = []
    cols = []
    results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'}

    if state == states.PENDING:
        raise QueryExpired()
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return results
    elif state not in [states.SUCCESS, 'AVAILABLE']:
        return results

    info = result.info
    skip = 0
    if not start_over:
        skip = caches[CACHES_CELERY_KEY].get(_fetch_progress_key(
            notebook, snippet),
                                             default=0)
    target = skip + rows

    if info.get('handle', {}).get('has_result_set', False):
        csv.field_size_limit(sys.maxsize)
        count = 0
        headers, csv_reader = _get_data(task_id)

        for col in headers:
            split = col.split('|')
            split_type = split[1] if len(split) > 1 else 'STRING_TYPE'
            cols.append({
                'name': split[0],
                'type': split_type,
                'comment': None
            })
        for row in csv_reader:
            count += 1
            if count <= skip:  # TODO: seek(skip) or [skip:]
                continue
            data.append(row)
            if count >= target:
                break

        caches[CACHES_CELERY_KEY].set(_fetch_progress_key(notebook, snippet),
                                      count,
                                      timeout=None)

        results['has_more'] = count < info.get(
            'row_counter') or state == states.state('PROGRESS')

    return results
Beispiel #16
0
def fetch_result(notebook, snippet, rows, start_over, **kwargs):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    data = []
    cols = []
    results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'}
    if state == states.PENDING:
        raise QueryExpired()
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return results
    elif state not in [states.SUCCESS, 'AVAILABLE']:
        return results

    info = result.info
    skip = 0
    if not start_over:
        skip = caches[CACHES_CELERY_KEY].get(_fetch_progress_key(notebook),
                                             default=0)
    target = skip + rows

    if info.get('handle', {}).get('has_result_set', False):
        csv.field_size_limit(sys.maxsize)
        count = 0
        with storage.open(_result_key(notebook)) as f:
            csv_reader = csv.reader(f, delimiter=','.encode('utf-8'))
            first = next(csv_reader, None)
            if first:  # else no data to read
                for col in first:
                    split = col.split('|')
                    split_type = split[1] if len(split) > 1 else 'STRING_TYPE'
                    cols.append({
                        'name': split[0],
                        'type': split_type,
                        'comment': None
                    })
                for row in csv_reader:
                    count += 1
                    if count <= skip:
                        continue
                    data.append(row)
                    if count >= target:
                        break

        caches[CACHES_CELERY_KEY].set(_fetch_progress_key(notebook),
                                      count,
                                      timeout=None)

        results['has_more'] = count < info.get(
            'row_counter') or state == states.state('PROGRESS')

    return results
Beispiel #17
0
def download(*args, **kwargs):
  notebook = args[0]
  result = download_to_file.AsyncResult(args[0]['uuid'])
  state = result.state

  if state == states.PENDING:
    raise QueryExpired()
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()

  info = result.wait() # TODO: Start returning data even if we're not done

  return export_csvxls.file_reader(storage.open(_result_key(notebook), 'rb'))  # TODO: Convert csv to excel if needed
Beispiel #18
0
def fetch_result_size(*args, **kwargs):
  notebook = args[0]
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'):
    return {'rows': 0}
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()
    return {'rows': 0}

  info = result.info
  return {'rows': info.get('row_counter', 0)}
Beispiel #19
0
  def check_status(self, notebook, snippet):
    guid = snippet['result']['handle']['guid']
    connection = CONNECTIONS.get(guid)

    response = {'status': 'canceled'}

    if connection:
      if snippet['result']['handle']['has_result_set']:
        response['status'] = 'available'
      else:
        response['status'] = 'success'
    else:
      raise QueryExpired()

    return response
Beispiel #20
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except StructuredException as e:
         message = force_unicode(str(e))
         if 'timed out' in message:
             raise OperationTimeout(e)
         else:
             raise QueryError(message)
     except QueryServerException as e:
         message = force_unicode(str(e))
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             raise QueryError(message)
Beispiel #21
0
def progress(notebook, snippet, logs=None, **kwargs):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return 1
    elif state in states.EXCEPTION_STATES:
        return 1

    info = result.info
    snippet['result']['handle'] = info.get('handle', {}).copy()
    request = _get_request(**kwargs)
    api = get_api(request, snippet)
    return api.progress(notebook, snippet, logs=logs)
Beispiel #22
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except OperationalError as e:
         message = str(e)
         if '1045' in message:  # 'Access denied' # MySQL
             raise AuthenticationRequired(message=message)
         else:
             raise e
     except Exception as e:
         message = force_unicode(e)
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             LOG.exception('Query Error')
             raise QueryError(message)
Beispiel #23
0
    def test_notebook_autocomplete(self):

        with patch('notebook.api.get_api') as get_api:
            get_api.return_value = Mock(autocomplete=Mock(
                side_effect=QueryExpired(
                    "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)"
                )))

            response = self.client.post(
                reverse('notebook:api_autocomplete_tables',
                        kwargs={'database': 'database'}),
                {'snippet': json.dumps({'type': 'hive'})})

            data = json.loads(response.content)
            assert_equal(
                data, {'status': 0}
            )  # We get back empty instead of failure with QueryExpired to silence end user messages
Beispiel #24
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index):
      if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query
        raise QueryExpired()
      else:
        raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available'

    return response
Beispiel #25
0
def get_jobs(notebook, snippet, logs, **kwargs): # Re implementation to fetch updated guid in download_to_file from DB
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'):
    return []
  elif state in states.EXCEPTION_STATES:
    return []

  info = result.info
  snippet['result']['handle'] = info.get('handle', {}).copy()

  request = _get_request(**kwargs)
  api = get_api(request, snippet)

  return api.get_jobs(notebook, snippet, logs)
Beispiel #26
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except StructuredException as e:
         message = force_unicode(str(e))
         if 'timed out' in message:
             raise OperationTimeout(e)
         elif 'Connection refused' in message or 'Name or service not known' in message or 'Could not connect to any' in message:
             reset_ha()
         else:
             raise QueryError(message)
     except QueryServerException as e:
         message = force_unicode(str(e))
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             raise QueryError(message)
Beispiel #27
0
    def fetch_result(self, notebook, snippet, rows, start_over):
        guid = snippet['result']['handle']['guid']
        handle = CONNECTIONS.get(guid)

        if handle:
            data = handle['result'].fetchmany(rows)
            meta = handle['meta']
            self._assign_types(data, meta)
        else:
            raise QueryExpired()

        return {
            'has_more': data and len(data) >= rows or False,
            'data': data if data else [],
            'meta': meta if meta else [],
            'type': 'table'
        }
Beispiel #28
0
  def check_status(self, notebook, snippet):
    guid = snippet['result']['handle']['guid']
    connection = CONNECTIONS.get(guid)

    response = {'status': 'canceled'}

    if connection:
      cursor = connection['result'].cursor
      if self.options['url'].startswith('presto://') and cursor and cursor.poll():
        response['status'] = 'running'
      elif snippet['result']['handle']['has_result_set']:
        response['status'] = 'available'
      else:
        response['status'] = 'success'
    else:
      raise QueryExpired()

    return response
Beispiel #29
0
def download(*args, **kwargs):
  result = download_to_file.AsyncResult(args[0]['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()

  info = result.wait()
  response = FileResponse(open(info['file_path'], 'rb'), content_type=FORMAT_TO_CONTENT_TYPE.get('csv', 'application/octet-stream'))
  response['Content-Disposition'] = 'attachment; filename="%s.%s"' % (args[0]['uuid'], 'csv') #TODO: Add support for 3rd party (e.g. nginx file serving)
  response.set_cookie(
      'download-%s' % args[1]['id'],
      json.dumps({
        'truncated': info.get('truncated', False),
        'row_counter': info.get('row_counter', 0)
      }),
      max_age=DOWNLOAD_COOKIE_AGE
    )
  return response
Beispiel #30
0
def close_statement(*args, **kwargs):
  notebook = args[0]
  snippet = args[1]
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'):
    return {'status': -1}
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()
    return {'status': -1}

  info = result.info
  snippet['result']['handle'] = info.get('handle', {})
  close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook))
  result.forget()
  os.remove(info.get('file_path'))
  os.remove(info.get('log_path'))
  os.remove(info.get('progress_path'))
  return {'status': 0}