def progress(notebook, snippet, logs=None, **kwargs): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return 1 elif state in states.EXCEPTION_STATES: return 1 info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() request = _get_request(**kwargs) api = get_api(request, snippet) return api.progress(notebook, snippet, logs=logs)
def fetch_result_size(*args, **kwargs): notebook = args[0] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return {'rows': 0} elif state in states.EXCEPTION_STATES: result.maybe_reraise() return {'rows': 0} info = result.info return {'rows': info.get('row_counter', 0)}
def query_error_handler(func): def decorator(*args, **kwargs): try: return func(*args, **kwargs) except StructuredException, e: message = force_unicode(str(e)) if 'timed out' in message: raise OperationTimeout(e) else: raise QueryError(message) except QueryServerException, e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except OperationalError as e: message = str(e) if '1045' in message: # 'Access denied' # MySQL raise AuthenticationRequired(message=message) else: raise e except Exception as e: message = force_unicode(e) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: LOG.exception('Query Error') raise QueryError(message)
def fetch_result(notebook, snippet, rows, start_over, **kwargs): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state data = [] cols = [] results = { 'has_more': False, 'data': data, 'meta': cols, 'type': 'table' } if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() return results elif state not in [states.SUCCESS, 'AVAILABLE']: return results info = result.info skip = 0 if not start_over: with open(info.get('progress_path'), 'r') as f: skip = int(f.read()) target = skip + rows with open(info.get('file_path'), 'r') as f: csv_reader = csv.reader(f, delimiter=','.encode('utf-8')) first = next(csv_reader) for col in first: split = col.split('|') cols.append({'name': split[0], 'type': split[1], 'comment': None}) count = 0 for row in csv_reader: count += 1 if count <= skip: continue data.append(row) if count >= target: break with open(info.get('progress_path'), 'w') as f: f.write(str(count)) results['has_more'] = count < info.get('row_counter') or state == states.state('PROGRESS') return results
def fetch_result(self, notebook, snippet, rows, start_over): guid = snippet['result']['handle']['guid'] handle = CONNECTIONS.get(guid) if handle: data = handle['result'].fetchmany(rows) meta = handle['meta'] self._assign_types(data, meta) else: raise QueryExpired() return { 'has_more': data and len(data) >= rows or False, 'data': data if data else [], 'meta': meta if meta else [], 'type': 'table' }
def test_notebook_autocomplete(self): with patch('notebook.api.get_api') as get_api: get_api.return_value = Mock(autocomplete=Mock( side_effect=QueryExpired( "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)" ))) response = self.client.post( reverse('notebook:api_autocomplete_tables', kwargs={'database': 'database'}), {'snippet': json.dumps({'type': 'hive'})}) data = json.loads(response.content) assert_equal( data, {'status': 0} ) # We get back empty instead of failure with QueryExpired to silence end user messages
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except StructuredException as e: message = force_unicode(str(e)) if 'timed out' in message: raise OperationTimeout(e) elif 'Could not connect to any' in message: reset_ha() else: raise QueryError(message) except QueryServerException as e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def get_jobs(notebook, snippet, logs, **kwargs): # Re implementation to fetch updated guid in download_to_file from DB result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'): return [] elif state in states.EXCEPTION_STATES: return [] info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() request = _get_request(**kwargs) api = get_api(request, snippet) return api.get_jobs(notebook, snippet, logs)
def check_status(self, notebook, snippet): guid = snippet['result']['handle']['guid'] connection = CONNECTIONS.get(guid) response = {'status': 'canceled'} if connection: cursor = connection['result'].cursor if self.options['url'].startswith('presto://') and cursor and cursor.poll(): response['status'] = 'running' elif snippet['result']['handle']['has_result_set']: response['status'] = 'available' else: response['status'] = 'success' else: raise QueryExpired() return response
def download(*args, **kwargs): task_id = args[0]['uuid'] notebook = args[0] result = download_to_file.AsyncResult(task_id) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() info = result.wait() # TODO: Start returning data even if we're not done return export_csvxls.file_reader( # TODO: Convert csv to excel if needed storage.open( _result_key(task_id), 'r' ) )
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() elif operation.errorMessage and re.search('Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)): raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?')) else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def download(*args, **kwargs): result = download_to_file.AsyncResult(args[0]['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() info = result.wait() response = FileResponse(open(info['file_path'], 'rb'), content_type=FORMAT_TO_CONTENT_TYPE.get('csv', 'application/octet-stream')) response['Content-Disposition'] = 'attachment; filename="%s.%s"' % (args[0]['uuid'], 'csv') #TODO: Add support for 3rd party (e.g. nginx file serving) response.set_cookie( 'download-%s' % args[1]['id'], json.dumps({ 'truncated': info.get('truncated', False), 'row_counter': info.get('row_counter', 0) }), max_age=DOWNLOAD_COOKIE_AGE ) return response
def get_jobs(notebook, snippet, logs, **kwargs): #Re implement to fetch updated guid in download_to_file from DB result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'): return [] elif state in states.EXCEPTION_STATES: result.maybe_reraise() return [] info = result.info snippet['result']['handle'] = info.get('handle', {}) request = _get_request(**kwargs) api = get_api(request, snippet) #insiduous problem where each call in hive api transform the guid/secret to binary form. get_log does the transform, but not get_jobs. get_jobs called after get_log so usually not an issue. Our get_log implementation doesn't if hasattr(api, '_get_handle'): # This is specific to impala, should be handled in hiveserver2 api._get_handle(snippet) return api.get_jobs(notebook, snippet, logs)
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet, interpreter=self.interpreter) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.value in (QueryHistory.STATE.failed.value, QueryHistory.STATE.expired.value): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() elif operation.errorMessage and re.search('Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)): raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?')) else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.value in (QueryHistory.STATE.running.value, QueryHistory.STATE.submitted.value) else 'available' if operation.hasResultSet is not None: response['has_result_set']= operation.hasResultSet # HIVE-12442 - With LLAP & HIVE_CLI_SERVICE_PROTOCOL_V8, hasResultSet can change after get_operation_status return response
def close_statement(*args, **kwargs): notebook = args[0] snippet = args[1] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state status = 0 if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'): status = -1 elif state in states.EXCEPTION_STATES: status = -1 if status == 0: info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook)) result.forget() _cleanup(notebook) return {'status': status}
def close_statement(*args, **kwargs): notebook = args[0] snippet = args[1] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'): return {'status': -1} elif state in states.EXCEPTION_STATES: result.maybe_reraise() return {'status': -1} info = result.info snippet['result']['handle'] = info.get('handle', {}) close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook)) result.forget() os.remove(info.get('file_path')) os.remove(info.get('log_path')) os.remove(info.get('progress_path')) return {'status': 0}
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None): db = self._get_db(snippet, interpreter=self.interpreter) query = None if snippet.get('query'): query = snippet.get('query') elif snippet.get('source') == 'query': document = Document2.objects.get(id=database) document.can_read_or_exception(self.user) notebook = Notebook(document=document).get_data() snippet = notebook['snippets'][0] query = self._get_current_statement(notebook, snippet)['statement'] database, table = '', '' resp = _autocomplete(db, database, table, column, nested, query=query, cluster=self.interpreter) if resp.get('error'): resp['message'] = resp.pop('error') if 'Read timed out' in resp['message']: raise QueryExpired(resp['message']) return resp
def fetch_result(self, notebook, snippet, rows, start_over): db = self._get_db(snippet, interpreter=self.interpreter) handle = self._get_handle(snippet) try: results = db.fetch(handle, start_over=start_over, rows=rows) except QueryServerException as ex: if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message: raise QueryExpired(message=ex.message) else: raise QueryError(ex) # No escaping... return { 'has_more': results.has_more, 'data': results.rows(), 'meta': [{ 'name': column.name, 'type': column.type, 'comment': column.comment } for column in results.data_table.cols()], 'type': 'table' }