Beispiel #1
0
def progress(notebook, snippet, logs=None, **kwargs):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return 1
    elif state in states.EXCEPTION_STATES:
        return 1

    info = result.info
    snippet['result']['handle'] = info.get('handle', {}).copy()
    request = _get_request(**kwargs)
    api = get_api(request, snippet)
    return api.progress(notebook, snippet, logs=logs)
Beispiel #2
0
def fetch_result_size(*args, **kwargs):
    notebook = args[0]
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state

    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return {'rows': 0}
    elif state in states.EXCEPTION_STATES:
        result.maybe_reraise()
        return {'rows': 0}

    info = result.info
    return {'rows': info.get('row_counter', 0)}
Beispiel #3
0
def query_error_handler(func):
  def decorator(*args, **kwargs):
    try:
      return func(*args, **kwargs)
    except StructuredException, e:
      message = force_unicode(str(e))
      if 'timed out' in message:
        raise OperationTimeout(e)
      else:
        raise QueryError(message)
    except QueryServerException, e:
      message = force_unicode(str(e))
      if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
        raise QueryExpired(e)
      else:
        raise QueryError(message)
Beispiel #4
0
 def decorator(*args, **kwargs):
   try:
     return func(*args, **kwargs)
   except OperationalError as e:
     message = str(e)
     if '1045' in message: # 'Access denied' # MySQL
       raise AuthenticationRequired(message=message)
     else:
       raise e
   except Exception as e:
     message = force_unicode(e)
     if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
       raise QueryExpired(e)
     else:
       LOG.exception('Query Error')
       raise QueryError(message)
Beispiel #5
0
def fetch_result(notebook, snippet, rows, start_over, **kwargs):
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  data = []
  cols = []
  results = {
      'has_more': False,
      'data': data,
      'meta': cols,
      'type': 'table'
    }
  if state == states.PENDING:
    raise QueryExpired()
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()
    return results
  elif state not in [states.SUCCESS, 'AVAILABLE']:
    return results

  info = result.info
  skip = 0
  if not start_over:
    with open(info.get('progress_path'), 'r') as f:
      skip = int(f.read())
  target = skip + rows

  with open(info.get('file_path'), 'r') as f:
    csv_reader = csv.reader(f, delimiter=','.encode('utf-8'))
    first = next(csv_reader)
    for col in first:
      split = col.split('|')
      cols.append({'name': split[0], 'type': split[1], 'comment': None})
    count = 0
    for row in csv_reader:
      count += 1
      if count <= skip:
        continue
      data.append(row)
      if count >= target:
        break

  with open(info.get('progress_path'), 'w') as f:
    f.write(str(count))

  results['has_more'] = count < info.get('row_counter') or state == states.state('PROGRESS')

  return results
Beispiel #6
0
    def fetch_result(self, notebook, snippet, rows, start_over):
        guid = snippet['result']['handle']['guid']
        handle = CONNECTIONS.get(guid)

        if handle:
            data = handle['result'].fetchmany(rows)
            meta = handle['meta']
            self._assign_types(data, meta)
        else:
            raise QueryExpired()

        return {
            'has_more': data and len(data) >= rows or False,
            'data': data if data else [],
            'meta': meta if meta else [],
            'type': 'table'
        }
Beispiel #7
0
    def test_notebook_autocomplete(self):

        with patch('notebook.api.get_api') as get_api:
            get_api.return_value = Mock(autocomplete=Mock(
                side_effect=QueryExpired(
                    "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)"
                )))

            response = self.client.post(
                reverse('notebook:api_autocomplete_tables',
                        kwargs={'database': 'database'}),
                {'snippet': json.dumps({'type': 'hive'})})

            data = json.loads(response.content)
            assert_equal(
                data, {'status': 0}
            )  # We get back empty instead of failure with QueryExpired to silence end user messages
Beispiel #8
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index):
      if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query
        raise QueryExpired()
      else:
        raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available'

    return response
Beispiel #9
0
 def decorator(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except StructuredException as e:
         message = force_unicode(str(e))
         if 'timed out' in message:
             raise OperationTimeout(e)
         elif 'Could not connect to any' in message:
             reset_ha()
         else:
             raise QueryError(message)
     except QueryServerException as e:
         message = force_unicode(str(e))
         if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
             raise QueryExpired(e)
         else:
             raise QueryError(message)
Beispiel #10
0
def get_jobs(notebook, snippet, logs, **kwargs): # Re implementation to fetch updated guid in download_to_file from DB
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'):
    return []
  elif state in states.EXCEPTION_STATES:
    return []

  info = result.info
  snippet['result']['handle'] = info.get('handle', {}).copy()

  request = _get_request(**kwargs)
  api = get_api(request, snippet)

  return api.get_jobs(notebook, snippet, logs)
Beispiel #11
0
  def check_status(self, notebook, snippet):
    guid = snippet['result']['handle']['guid']
    connection = CONNECTIONS.get(guid)

    response = {'status': 'canceled'}

    if connection:
      cursor = connection['result'].cursor
      if self.options['url'].startswith('presto://') and cursor and cursor.poll():
        response['status'] = 'running'
      elif snippet['result']['handle']['has_result_set']:
        response['status'] = 'available'
      else:
        response['status'] = 'success'
    else:
      raise QueryExpired()

    return response
Beispiel #12
0
def download(*args, **kwargs):
  task_id = args[0]['uuid']
  notebook = args[0]
  result = download_to_file.AsyncResult(task_id)
  state = result.state

  if state == states.PENDING:
    raise QueryExpired()
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()

  info = result.wait()  # TODO: Start returning data even if we're not done

  return export_csvxls.file_reader(  # TODO: Convert csv to excel if needed
    storage.open(
      _result_key(task_id),
      'r'
    )
  )
Beispiel #13
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index):
      if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query
        raise QueryExpired()
      elif  operation.errorMessage and re.search('Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)):
        raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?'))
      else:
        raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available'

    return response
Beispiel #14
0
def download(*args, **kwargs):
  result = download_to_file.AsyncResult(args[0]['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()

  info = result.wait()
  response = FileResponse(open(info['file_path'], 'rb'), content_type=FORMAT_TO_CONTENT_TYPE.get('csv', 'application/octet-stream'))
  response['Content-Disposition'] = 'attachment; filename="%s.%s"' % (args[0]['uuid'], 'csv') #TODO: Add support for 3rd party (e.g. nginx file serving)
  response.set_cookie(
      'download-%s' % args[1]['id'],
      json.dumps({
        'truncated': info.get('truncated', False),
        'row_counter': info.get('row_counter', 0)
      }),
      max_age=DOWNLOAD_COOKIE_AGE
    )
  return response
Beispiel #15
0
def get_jobs(notebook, snippet, logs, **kwargs): #Re implement to fetch updated guid in download_to_file from DB
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'):
    return []
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()
    return []

  info = result.info
  snippet['result']['handle'] = info.get('handle', {})

  request = _get_request(**kwargs)
  api = get_api(request, snippet)
  #insiduous problem where each call in hive api transform the guid/secret to binary form. get_log does the transform, but not get_jobs. get_jobs called after get_log so usually not an issue. Our get_log implementation doesn't
  if hasattr(api, '_get_handle'): # This is specific to impala, should be handled in hiveserver2
    api._get_handle(snippet)
  return api.get_jobs(notebook, snippet, logs)
Beispiel #16
0
  def check_status(self, notebook, snippet):
    response = {}
    db = self._get_db(snippet, interpreter=self.interpreter)

    handle = self._get_handle(snippet)
    operation = db.get_operation_status(handle)
    status = HiveServerQueryHistory.STATE_MAP[operation.operationState]

    if status.value in (QueryHistory.STATE.failed.value, QueryHistory.STATE.expired.value):
      if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage:  # Hive case on canceled query
        raise QueryExpired()
      elif operation.errorMessage and re.search('Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)):
        raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?'))
      else:
        raise QueryError(operation.errorMessage)

    response['status'] = 'running' if status.value in (QueryHistory.STATE.running.value, QueryHistory.STATE.submitted.value) else 'available'
    if operation.hasResultSet is not None:
      response['has_result_set']= operation.hasResultSet  # HIVE-12442 - With LLAP & HIVE_CLI_SERVICE_PROTOCOL_V8, hasResultSet can change after get_operation_status

    return response
Beispiel #17
0
def close_statement(*args, **kwargs):
  notebook = args[0]
  snippet = args[1]
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  status = 0
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'):
    status = -1
  elif state in states.EXCEPTION_STATES:
    status = -1

  if status == 0:
    info = result.info
    snippet['result']['handle'] = info.get('handle', {}).copy()
    close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook))

  result.forget()
  _cleanup(notebook)
  return {'status': status}
Beispiel #18
0
def close_statement(*args, **kwargs):
  notebook = args[0]
  snippet = args[1]
  result = download_to_file.AsyncResult(notebook['uuid'])
  state = result.state
  if state == states.PENDING:
    raise QueryExpired()
  elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'):
    return {'status': -1}
  elif state in states.EXCEPTION_STATES:
    result.maybe_reraise()
    return {'status': -1}

  info = result.info
  snippet['result']['handle'] = info.get('handle', {})
  close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook))
  result.forget()
  os.remove(info.get('file_path'))
  os.remove(info.get('log_path'))
  os.remove(info.get('progress_path'))
  return {'status': 0}
Beispiel #19
0
  def autocomplete(self, snippet, database=None, table=None, column=None, nested=None):
    db = self._get_db(snippet, interpreter=self.interpreter)
    query = None

    if snippet.get('query'):
      query = snippet.get('query')
    elif snippet.get('source') == 'query':
      document = Document2.objects.get(id=database)
      document.can_read_or_exception(self.user)
      notebook = Notebook(document=document).get_data()
      snippet = notebook['snippets'][0]
      query = self._get_current_statement(notebook, snippet)['statement']
      database, table = '', ''

    resp = _autocomplete(db, database, table, column, nested, query=query, cluster=self.interpreter)

    if resp.get('error'):
      resp['message'] = resp.pop('error')
      if 'Read timed out' in resp['message']:
        raise QueryExpired(resp['message'])

    return resp
Beispiel #20
0
  def fetch_result(self, notebook, snippet, rows, start_over):
    db = self._get_db(snippet, interpreter=self.interpreter)

    handle = self._get_handle(snippet)
    try:
      results = db.fetch(handle, start_over=start_over, rows=rows)
    except QueryServerException as ex:
      if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message:
        raise QueryExpired(message=ex.message)
      else:
        raise QueryError(ex)

    # No escaping...
    return {
        'has_more': results.has_more,
        'data': results.rows(),
        'meta': [{
          'name': column.name,
          'type': column.type,
          'comment': column.comment
        } for column in results.data_table.cols()],
        'type': 'table'
    }