def get_log(notebook, snippet, startFrom=None, size=None, postdict=None, user_id=None): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return '' elif state in states.EXCEPTION_STATES: result.maybe_reraise() return '' info = result.info if not startFrom: with open(info.get('log_path'), 'r') as f: return f.read() else: count = 0 data = '' with open(info.get('log_path'), 'r') as f: for line in f: count += 1 if count <= startFrom: continue data += line return data
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() elif operation.errorMessage and re.search( 'Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)): raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?')) else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in ( QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def get_log(notebook, snippet, startFrom=None, size=None, postdict=None, user_id=None): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'): return '' elif state in states.EXCEPTION_STATES: return '' if TASK_SERVER.RESULT_CACHE.get(): return '' else: if not startFrom: with storage.open(_log_key(notebook, snippet), 'r') as f: return f.read() else: count = 0 output = string_io() with storage.open(_log_key(notebook, snippet), 'r') as f: for line in f: count += 1 if count <= startFrom: continue output.write(line) return output.getvalue()
def fetch_result(self, notebook, snippet, rows, start_over): db = self._get_db(snippet, interpreter=self.interpreter) handle = self._get_handle(snippet) try: results = db.fetch(handle, start_over=start_over, rows=rows) except QueryServerException as ex: if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message: raise QueryExpired(message=ex.message) else: raise QueryError(ex) # No escaping... return { 'has_more': results.has_more, 'data': results.rows(), 'meta': [{ 'name': column.name, 'type': column.type, 'comment': column.comment } for column in results.data_table.cols()], 'type': 'table' }
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet, interpreter=self.interpreter) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.value in (QueryHistory.STATE.failed.value, QueryHistory.STATE.expired.value): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() elif operation.errorMessage and re.search( 'Cannot validate serde: org.apache.hive.hcatalog.data.JsonSerDe', str(operation.errorMessage)): raise QueryError(message=operation.errorMessage + _('. Is hive-hcatalog-core.jar registered?')) else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.value in ( QueryHistory.STATE.running.value, QueryHistory.STATE.submitted.value) else 'available' if operation.hasResultSet is not None: response[ 'has_result_set'] = operation.hasResultSet # HIVE-12442 - With LLAP & HIVE_CLI_SERVICE_PROTOCOL_V8, hasResultSet can change after get_operation_status return response
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): db = self._get_db(snippet, interpreter=self.interpreter) query = None if snippet.get('query'): query = snippet.get('query') elif snippet.get('source') == 'query': document = Document2.objects.get(id=database) document.can_read_or_exception(self.user) notebook = Notebook(document=document).get_data() snippet = notebook['snippets'][0] query = self._get_current_statement(notebook, snippet)['statement'] database, table = '', '' resp = _autocomplete(db, database, table, column, nested, query=query, cluster=self.interpreter, operation=operation) if resp.get('error'): resp['message'] = resp.pop('error') if 'Read timed out' in resp['message']: raise QueryExpired(resp['message']) return resp
def get_jobs( notebook, snippet, logs, **kwargs ): #Re implement to fetch updated guid in download_to_file from DB result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return [] elif state in states.EXCEPTION_STATES: result.maybe_reraise() return [] info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() request = _get_request(**kwargs) api = get_api(request, snippet) #insiduous problem where each call in hive api transform the guid/secret to binary form. get_log does the transform, but not get_jobs. get_jobs called after get_log so usually not an issue. Our get_log implementation doesn't if hasattr( api, '_get_handle' ): # This is specific to impala, should be handled in hiveserver2 api._get_handle(snippet) return api.get_jobs(notebook, snippet, logs)
def close_statement(*args, **kwargs): notebook = args[0] snippet = args[1] task_id = _get_query_key(notebook, snippet) result = download_to_file.AsyncResult(task_id) state = result.state status = 0 if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'): status = -1 elif state in states.EXCEPTION_STATES: status = -1 if status == 0: info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook, snippet)) result.forget() _cleanup(notebook, snippet) return {'status': status}
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except Exception, e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: message = force_unicode(e) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError, message, sys.exc_info()[2]
def check_status(*args, **kwargs): notebook = args[0] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() return {'status': STATE_MAP[state]}
def download(*args, **kwargs): result = download_to_file.AsyncResult(args[0]['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() info = result.wait() # TODO: Start returning data even if we're not done return export_csvxls.file_reader(open(info['file_path'], 'rb'))
def fetch_result(self, notebook, snippet, rows, start_over): db = self._get_db(snippet) handle = self._get_handle(snippet) try: results = db.fetch(handle, start_over=start_over, rows=rows) except QueryServerException, ex: if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message: raise QueryExpired(message=ex.message) else: raise QueryError(ex)
def fetch_result(notebook, snippet, rows, start_over, **kwargs): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state data = [] cols = [] results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'} if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() return results elif state not in [states.SUCCESS, 'AVAILABLE']: return results info = result.info skip = 0 if not start_over: with open(info.get('progress_path'), 'r') as f: skip = int(f.read()) target = skip + rows with open(info.get('file_path'), 'r') as f: csv_reader = csv.reader(f, delimiter=','.encode('utf-8')) first = next(csv_reader) for col in first: split = col.split('|') if len(split) > 1: cols.append({ 'name': split[0], 'type': split[1], 'comment': None }) else: cols.append({ 'name': split[0], 'type': 'STRING_TYPE', 'comment': None }) count = 0 for row in csv_reader: count += 1 if count <= skip: continue data.append(row) if count >= target: break with open(info.get('progress_path'), 'w') as f: f.write(str(count)) results['has_more'] = count < info.get( 'row_counter') or state == states.state('PROGRESS') return results
def fetch_result(notebook, snippet, rows, start_over, **kwargs): task_id = _get_query_key(notebook, snippet) result = download_to_file.AsyncResult(task_id) state = result.state data = [] cols = [] results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'} if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() return results elif state not in [states.SUCCESS, 'AVAILABLE']: return results info = result.info skip = 0 if not start_over: skip = caches[CACHES_CELERY_KEY].get(_fetch_progress_key( notebook, snippet), default=0) target = skip + rows if info.get('handle', {}).get('has_result_set', False): csv.field_size_limit(sys.maxsize) count = 0 headers, csv_reader = _get_data(task_id) for col in headers: split = col.split('|') split_type = split[1] if len(split) > 1 else 'STRING_TYPE' cols.append({ 'name': split[0], 'type': split_type, 'comment': None }) for row in csv_reader: count += 1 if count <= skip: # TODO: seek(skip) or [skip:] continue data.append(row) if count >= target: break caches[CACHES_CELERY_KEY].set(_fetch_progress_key(notebook, snippet), count, timeout=None) results['has_more'] = count < info.get( 'row_counter') or state == states.state('PROGRESS') return results
def fetch_result(notebook, snippet, rows, start_over, **kwargs): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state data = [] cols = [] results = {'has_more': False, 'data': data, 'meta': cols, 'type': 'table'} if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() return results elif state not in [states.SUCCESS, 'AVAILABLE']: return results info = result.info skip = 0 if not start_over: skip = caches[CACHES_CELERY_KEY].get(_fetch_progress_key(notebook), default=0) target = skip + rows if info.get('handle', {}).get('has_result_set', False): csv.field_size_limit(sys.maxsize) count = 0 with storage.open(_result_key(notebook)) as f: csv_reader = csv.reader(f, delimiter=','.encode('utf-8')) first = next(csv_reader, None) if first: # else no data to read for col in first: split = col.split('|') split_type = split[1] if len(split) > 1 else 'STRING_TYPE' cols.append({ 'name': split[0], 'type': split_type, 'comment': None }) for row in csv_reader: count += 1 if count <= skip: continue data.append(row) if count >= target: break caches[CACHES_CELERY_KEY].set(_fetch_progress_key(notebook), count, timeout=None) results['has_more'] = count < info.get( 'row_counter') or state == states.state('PROGRESS') return results
def download(*args, **kwargs): notebook = args[0] result = download_to_file.AsyncResult(args[0]['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() info = result.wait() # TODO: Start returning data even if we're not done return export_csvxls.file_reader(storage.open(_result_key(notebook), 'rb')) # TODO: Convert csv to excel if needed
def fetch_result_size(*args, **kwargs): notebook = args[0] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'): return {'rows': 0} elif state in states.EXCEPTION_STATES: result.maybe_reraise() return {'rows': 0} info = result.info return {'rows': info.get('row_counter', 0)}
def check_status(self, notebook, snippet): guid = snippet['result']['handle']['guid'] connection = CONNECTIONS.get(guid) response = {'status': 'canceled'} if connection: if snippet['result']['handle']['has_result_set']: response['status'] = 'available' else: response['status'] = 'success' else: raise QueryExpired() return response
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except StructuredException as e: message = force_unicode(str(e)) if 'timed out' in message: raise OperationTimeout(e) else: raise QueryError(message) except QueryServerException as e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def progress(notebook, snippet, logs=None, **kwargs): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return 1 elif state in states.EXCEPTION_STATES: return 1 info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() request = _get_request(**kwargs) api = get_api(request, snippet) return api.progress(notebook, snippet, logs=logs)
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except OperationalError as e: message = str(e) if '1045' in message: # 'Access denied' # MySQL raise AuthenticationRequired(message=message) else: raise e except Exception as e: message = force_unicode(e) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: LOG.exception('Query Error') raise QueryError(message)
def test_notebook_autocomplete(self): with patch('notebook.api.get_api') as get_api: get_api.return_value = Mock(autocomplete=Mock( side_effect=QueryExpired( "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)" ))) response = self.client.post( reverse('notebook:api_autocomplete_tables', kwargs={'database': 'database'}), {'snippet': json.dumps({'type': 'hive'})}) data = json.loads(response.content) assert_equal( data, {'status': 0} ) # We get back empty instead of failure with QueryExpired to silence end user messages
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def get_jobs(notebook, snippet, logs, **kwargs): # Re implementation to fetch updated guid in download_to_file from DB result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state('PROGRESS'): return [] elif state in states.EXCEPTION_STATES: return [] info = result.info snippet['result']['handle'] = info.get('handle', {}).copy() request = _get_request(**kwargs) api = get_api(request, snippet) return api.get_jobs(notebook, snippet, logs)
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except StructuredException as e: message = force_unicode(str(e)) if 'timed out' in message: raise OperationTimeout(e) elif 'Connection refused' in message or 'Name or service not known' in message or 'Could not connect to any' in message: reset_ha() else: raise QueryError(message) except QueryServerException as e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def fetch_result(self, notebook, snippet, rows, start_over): guid = snippet['result']['handle']['guid'] handle = CONNECTIONS.get(guid) if handle: data = handle['result'].fetchmany(rows) meta = handle['meta'] self._assign_types(data, meta) else: raise QueryExpired() return { 'has_more': data and len(data) >= rows or False, 'data': data if data else [], 'meta': meta if meta else [], 'type': 'table' }
def check_status(self, notebook, snippet): guid = snippet['result']['handle']['guid'] connection = CONNECTIONS.get(guid) response = {'status': 'canceled'} if connection: cursor = connection['result'].cursor if self.options['url'].startswith('presto://') and cursor and cursor.poll(): response['status'] = 'running' elif snippet['result']['handle']['has_result_set']: response['status'] = 'available' else: response['status'] = 'success' else: raise QueryExpired() return response
def download(*args, **kwargs): result = download_to_file.AsyncResult(args[0]['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state in states.EXCEPTION_STATES: result.maybe_reraise() info = result.wait() response = FileResponse(open(info['file_path'], 'rb'), content_type=FORMAT_TO_CONTENT_TYPE.get('csv', 'application/octet-stream')) response['Content-Disposition'] = 'attachment; filename="%s.%s"' % (args[0]['uuid'], 'csv') #TODO: Add support for 3rd party (e.g. nginx file serving) response.set_cookie( 'download-%s' % args[1]['id'], json.dumps({ 'truncated': info.get('truncated', False), 'row_counter': info.get('row_counter', 0) }), max_age=DOWNLOAD_COOKIE_AGE ) return response
def close_statement(*args, **kwargs): notebook = args[0] snippet = args[1] result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(result.state) < states.state('PROGRESS'): return {'status': -1} elif state in states.EXCEPTION_STATES: result.maybe_reraise() return {'status': -1} info = result.info snippet['result']['handle'] = info.get('handle', {}) close_statement_async.apply_async(args=args, kwargs=kwargs, task_id=_close_statement_async_id(notebook)) result.forget() os.remove(info.get('file_path')) os.remove(info.get('log_path')) os.remove(info.get('progress_path')) return {'status': 0}