def explain(self, notebook, snippet): db = self._get_db(snippet) response = self._get_current_statement(db, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, response.pop('statement'), session) try: db.use(query.database) explanation = db.explain(query) except QueryServerException, ex: raise QueryError(ex.message)
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def execute(self, notebook, snippet): statement = snippet['statement'] handle = DataEng(self.user).submit_hive_job(self.cluster_name, statement, params=None, job_xml=None) job = handle['jobs'][0] if job['status'] not in RUNNING_STATES: raise QueryError('Submission failure', handle=job['status']) return { 'id': job['jobId'], 'crn': job['crn'], 'has_result_set': False, }
def execute(self, notebook, snippet): db = self._get_db(snippet) statement = self._get_current_statement(db, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, statement['statement'], session) try: if statement.get('statement_id') == 0: db.use(query.database) handle = db.client.query(query, withMultipleSession=True) except QueryServerException, ex: raise QueryError(ex.message, handle=statement)
def check_status(self, notebook, snippet): response = {'status': 'running'} job_id = snippet['result']['handle']['id'] handle = DataEng(self.user).list_jobs(job_ids=[job_id]) job = handle['jobs'][0] if job['status'] in RUNNING_STATES: return response elif job['status'] in ('failed', 'terminated'): raise QueryError(_('Job was %s') % job['status']) else: response['status'] = 'available' return response
def _get_query_key(notebook, snippet): if ENABLE_NOTEBOOK_2.get(): if snippet.get('executable'): query_key = snippet['executable']['id'] elif snippet.get('executor'): query_key = snippet['executor']['executables'][0].get( 'history', {}).get('uuid') else: query_key = notebook['uuid'] # get_logs() else: query_key = notebook['uuid'] if not query_key: raise QueryError('Query Key Missing') else: return query_key
def _get_data(task_id): result_key = _result_key(task_id) if TASK_SERVER.RESULT_CACHE.get(): csv_reader = caches[CACHES_CELERY_QUERY_RESULT_KEY].get(result_key) # TODO check if expired if csv_reader is None: raise QueryError('Cached results %s not found.' % result_key) headers = csv_reader[0] if csv_reader else [] # TODO check size csv_reader = csv_reader[1:] if csv_reader else [] else: f = storage.open(result_key) delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8') csv_reader = csv.reader(f, delimiter=delimiter) headers = next(csv_reader, []) return headers, csv_reader
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except OperationalError as e: message = str(e) if '1045' in message: # 'Access denied' # MySQL raise AuthenticationRequired(message=message) else: raise e except Exception as e: message = force_unicode(e) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: LOG.exception('Query Error') raise QueryError(message)
def query_error_handler(func): def decorator(*args, **kwargs): try: return func(*args, **kwargs) except StructuredException, e: message = force_unicode(str(e)) if 'timed out' in message: raise OperationTimeout(e) else: raise QueryError(message) except QueryServerException, e: message = force_unicode(str(e)) if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: raise QueryError(message)
def execute(self, notebook, snippet): from search.conf import SOLR_URL api = NativeSolrApi(SOLR_URL.get(), self.user.username) collection = self.options.get('collection') or snippet.get('database') if not collection or collection == 'default': collection = api.collections2()[0] response = api.sql(collection, snippet['statement']) info = response['result-set']['docs'].pop( -1) # EOF, RESPONSE_TIME, EXCEPTION if info.get('EXCEPTION'): raise QueryError(info['EXCEPTION']) headers = [] for row in response['result-set']['docs']: for col in list(row.keys()): if col not in headers: headers.append(col) data = [[doc.get(col) for col in headers] for doc in response['result-set']['docs']] has_result_set = bool(data) return { 'sync': True, 'has_result_set': has_result_set, 'modified_row_count': 0, 'result': { 'has_more': False, 'data': data if has_result_set else [], 'meta': [{ 'name': col, 'type': '', 'comment': '' } for col in headers] if has_result_set else [], 'type': 'table' }, 'statement_id': 0, 'has_more_statements': False, 'statements_count': 1 }
def execute(self, notebook, snippet): db = self._get_db(snippet, cluster=snippet.get('selectedCompute')) statement = self._get_current_statement(db, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, statement['statement'], session) try: if statement.get('statement_id') == 0: if query.database and not statement['statement'].lower( ).startswith('set'): db.use(query.database) handle = db.client.query(query, with_multiple_session=True) except QueryServerException, ex: raise QueryError(ex.message, handle=statement)
def check_status(self, notebook, snippet): response = {} db = self._get_db(snippet) handle = self._get_handle(snippet) operation = db.get_operation_status(handle) status = HiveServerQueryHistory.STATE_MAP[operation.operationState] if status.index in (QueryHistory.STATE.failed.index, QueryHistory.STATE.expired.index): if operation.errorMessage and 'transition from CANCELED to ERROR' in operation.errorMessage: # Hive case on canceled query raise QueryExpired() else: raise QueryError(operation.errorMessage) response['status'] = 'running' if status.index in (QueryHistory.STATE.running.index, QueryHistory.STATE.submitted.index) else 'available' return response
def check_status(self, notebook, snippet): job_id = snippet['result']['handle']['id'] request = MockRequest(self.user, self.fs, self.jt) oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow) if is_really_done and not oozie_workflow.is_running(): if oozie_workflow.status in ('KILLED', 'FAILED'): raise QueryError(_('The script failed to run and was stopped')) status = 'available' elif oozie_workflow.is_running(): status = 'running' else: status = 'failed' return { 'status': status }
def fetch_result_size(self, notebook, snippet): resp = {'rows': None, 'size': None, 'message': ''} if snippet.get('status') != 'available': raise QueryError(_('Result status is not available')) if snippet['type'] not in ('hive', 'impala'): raise OperationNotSupported( _('Cannot fetch result metadata for snippet type: %s') % snippet['type']) if snippet['type'] == 'hive': resp['rows'], resp['size'], resp[ 'message'] = self._get_hive_result_size(notebook, snippet) else: # Impala resp['rows'], resp['size'], resp[ 'message'] = self._get_impala_result_size(notebook, snippet) return resp
def explain(self, notebook, snippet): db = self._get_db(snippet, interpreter=self.interpreter) response = self._get_current_statement(notebook, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, response.pop('statement'), session) try: db.use(query.database) explanation = db.explain(query) except QueryServerException as ex: raise QueryError(ex.message) return { 'status': 0, 'explanation': explanation.textual, 'statement': query.get_query_statement(0), }
def check_status(self, notebook, snippet): job_id = snippet['result']['handle']['id'] oozie_workflow = check_job_access_permission(self.request, job_id) logs, workflow_actions, is_really_done = self._get_output( oozie_workflow) if is_really_done and not oozie_workflow.is_running(): if oozie_workflow.status in ('KILLED', 'FAILED'): raise QueryError(_('The script failed to run and was stopped')) if logs: status = 'available' else: status = 'running' # Tricky case when the logs are being moved by YARN at job completion elif oozie_workflow.is_running(): status = 'running' else: status = 'failed' return {'status': status}
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): try: db = self._get_db(snippet, is_async=is_async, interpreter=self.interpreter) return _get_sample_data(db, database, table, column, is_async, operation=operation, cluster=self.interpreter) except QueryServerException as ex: raise QueryError(ex.message)
def execute(self, notebook, snippet): db = self._get_db(snippet, interpreter=self.interpreter) statement = self._get_current_statement(notebook, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, statement['statement'], session) _session = self._get_session_by_id(notebook, snippet['type']) try: if statement.get('statement_id') == 0: # TODO: move this to client if query.database and not statement['statement'].lower( ).startswith('set'): result = db.use(query.database, session=_session) if result.session: _session = result.session handle = db.client.query(query, session=_session) except QueryServerException as ex: raise QueryError(ex.message, handle=statement) # All good server_id, server_guid = handle.get() if sys.version_info[0] > 2: server_id = server_id.decode('utf-8') server_guid = server_guid.decode('utf-8') response = { 'secret': server_id, 'guid': server_guid, 'operation_type': handle.operation_type, 'has_result_set': handle.has_result_set, 'modified_row_count': handle.modified_row_count, 'log_context': handle.log_context, 'session_guid': handle.session_guid, 'session_id': handle.session_id, 'session_type': snippet['type'] } response.update(statement) return response
def check_status(self, notebook, snippet): response = {'status': 'running'} job_id = snippet['result']['handle']['id'] oozie_job = check_job_access_permission(self.request, job_id) if oozie_job.is_running(): return response elif oozie_job.status in ('KILLED', 'FAILED'): raise QueryError(_('Job was %s') % oozie_job.status) else: # Check if job results are actually available, since YARN takes a while to move logs to JHS, log_output = self.get_log(notebook, snippet) if log_output: results = self._get_results(log_output, snippet['type']) if results: response['status'] = 'available' else: response['status'] = 'failed' return response
def create_session(self, lang='scala', properties=None): api = self.get_api() stored_session_info = self._get_session_info_from_user() if stored_session_info: session_present = self._check_session(stored_session_info) if session_present: return stored_session_info if not properties and USE_DEFAULT_CONFIGURATION.get(): user_config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user) if user_config is not None: properties = user_config.properties_list props = self.get_livy_props(lang, properties) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session is %s and could not be created in the cluster: %s') % (status['state'], info)) new_session_info = { 'type': lang, 'id': response['id'], 'properties': self.to_properties(props) } self._set_session_info_to_user(new_session_info) return new_session_info
def fetch_result(self, notebook, snippet, rows, start_over): db = self._get_db(snippet, interpreter=self.interpreter) handle = self._get_handle(snippet) try: results = db.fetch(handle, start_over=start_over, rows=rows) except QueryServerException as ex: if re.search('(client inactivity)|(Invalid query handle)', str(ex)) and ex.message: raise QueryExpired(message=ex.message) else: raise QueryError(ex) # No escaping... return { 'has_more': results.has_more, 'data': results.rows(), 'meta': [{ 'name': column.name, 'type': column.type, 'comment': column.comment } for column in results.data_table.cols()], 'type': 'table' }
def create_session(self, lang='scala', properties=None): if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session could not be created in the cluster: %s') % info) return { 'type': lang, 'id': response['id'], 'properties': properties }
def create_session(self, lang='scala', properties=None): if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} # HUE-4761: Hue's session request is causing Livy to fail with "JsonMappingException: Can not deserialize # instance of scala.collection.immutable.List out of VALUE_STRING token" due to List type values # not being formed properly, they are quoted csv strings (without brackets) instead of proper List # types, this is for keys; archives, jars, files and pyFiles. The Mako frontend probably should be # modified to pass the values as Livy expects but for now we coerce these types to be Lists. # Issue only occurs when non-default values are used because the default path properly sets the # empty list '[]' for these four values. # Note also that Livy has a 90 second timeout for the session request to complete, this needs to # be increased for requests that take longer, for example when loading large archives. tmparchives = props['archives'] if type(tmparchives) is not list: props['archives'] = tmparchives.split(",") LOG.debug("Check List type: archives was not a list") tmpjars = props['jars'] if type(tmpjars) is not list: props['jars'] = tmpjars.split(",") LOG.debug("Check List type: jars was not a list") tmpfiles = props['files'] if type(tmpfiles) is not list: props['files'] = tmpfiles.split(",") LOG.debug("Check List type: files was not a list") tmppyFiles = props['pyFiles'] if type(tmppyFiles) is not list: props['pyFiles'] = tmppyFiles.split(",") LOG.debug("Check List type: pyFiles was not a list") # Convert the conf list to a dict for Livy listitems = props['conf'] LOG.debug("Property Spark Conf kvp list from UI is: " + str(listitems)) confDict = {} for i in range(len(listitems)): kvp = listitems[i] LOG.debug("Property Spark Conf key " + str(i) + " = " + str(kvp.get('key'))) LOG.debug("Property Spark Conf value " + str(i) + " = " + str(kvp.get('value'))) confDict[kvp.get('key')] = kvp.get('value') props['conf'] = confDict LOG.debug("Property Spark Conf dictionary is: " + str(confDict)) props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError( _('The Spark session could not be created in the cluster: %s') % info) return {'type': lang, 'id': response['id'], 'properties': properties}
def decorator(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: message = force_unicode(str(e)) raise QueryError(message)
def fetch_result(self, notebook, snippet, rows, start_over): api = self.get_api() session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) except Exception as e: message = force_unicode(str(e)).lower() if re.search("session ('\d+' )?not found", message): raise SessionExpired(e) else: raise e content = response['output'] if content['status'] == 'ok': data = content['data'] images = [] try: table = data['application/vnd.livy.table.v1+json'] except KeyError: try: images = [data['image/png']] except KeyError: images = [] if 'application/json' in data: result = data['application/json'] data = result['data'] meta = [{ 'name': field['name'], 'type': field['type'], 'comment': '' } for field in result['schema']['fields']] type = 'table' else: data = [[data['text/plain']]] meta = [{ 'name': 'Header', 'type': 'STRING_TYPE', 'comment': '' }] type = 'text' else: data = table['data'] headers = table['headers'] meta = [{ 'name': h['name'], 'type': h['type'], 'comment': '' } for h in headers] type = 'table' # Non start_over not supported if not start_over: data = [] return {'data': data, 'images': images, 'meta': meta, 'type': type} elif content['status'] == 'error': tb = content.get('traceback', None) if tb is None or not tb: msg = content.get('ename', 'unknown error') evalue = content.get('evalue') if evalue is not None: msg = '%s: %s' % (msg, evalue) else: msg = ''.join(tb) raise QueryError(msg)
def _create_engine(self): if '${' in self.options['url']: # URL parameters substitution vars = {'USER': self.user.username} if '${PASSWORD}' in self.options['url']: auth_provided = False if 'session' in self.options: for _prop in self.options['session']['properties']: if _prop['name'] == 'user': vars['USER'] = _prop['value'] auth_provided = True if _prop['name'] == 'password': vars['PASSWORD'] = _prop['value'] auth_provided = True if not auth_provided: raise AuthenticationRequired( message='Missing username and/or password') raw_url = Template(self.options['url']) url = raw_url.safe_substitute(**vars) else: url = self.options['url'] if url.startswith('awsathena+rest://'): url = url.replace(url[17:37], urllib_quote_plus(url[17:37])) url = url.replace(url[38:50], urllib_quote_plus(url[38:50])) s3_staging_dir = url.rsplit('s3_staging_dir=', 1)[1] url = url.replace(s3_staging_dir, urllib_quote_plus(s3_staging_dir)) if self.options.get('has_impersonation'): m = re.search(URL_PATTERN, url) driver_name = m.group('driver_name') if not driver_name: raise QueryError( 'Driver name of %(url)s could not be found and impersonation is turned on' % {'url': url}) url = url.replace( driver_name, '%(driver_name)s%(username)s@' % { 'driver_name': driver_name, 'username': self.user.username }) if self.options.get('credentials_json'): self.options['credentials_info'] = json.loads( self.options.pop('credentials_json')) # Enables various SqlAlchemy args to be passed along for both Hive & Presto connectors # Refer to SqlAlchemy pyhive for more details if self.options.get('connect_args'): self.options['connect_args'] = json.loads( self.options.pop('connect_args')) options = self.options.copy() options.pop('session', None) options.pop('url', None) options.pop('has_ssh', None) options.pop('has_impersonation', None) options.pop('ssh_server_host', None) options['pool_pre_ping'] = not url.startswith( 'phoenix://' ) # Should be moved to dialect when connectors always on return create_engine(url, **options)
def get_sample_data(self, snippet, database=None, table=None, column=None): try: db = self._get_db(snippet) return _get_sample_data(db, database, table, column) except QueryServerException, ex: raise QueryError(ex.message)
'meta': meta, 'type': type } elif content['status'] == 'error': tb = content.get('traceback', None) if tb is None: msg = content.get('ename', 'unknown error') evalue = content.get('evalue') if evalue is not None: msg = '%s: %s' % (msg, evalue) else: msg = ''.join(tb) raise QueryError(msg) def download(self, notebook, snippet, format): try: api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] return spark_download(api, session['id'], cell, format) except Exception, e: raise PopupException(e) def cancel(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) response = api.cancel(session['id'])
database=None, table=None, column=None, async=False, operation=None): try: db = self._get_db(snippet, async, interpreter=self.interpreter) return _get_sample_data(db, database, table, column, async, operation=operation, cluster=self.interpreter) except QueryServerException as ex: raise QueryError(ex.message) @query_error_handler def explain(self, notebook, snippet): db = self._get_db(snippet, interpreter=self.interpreter) response = self._get_current_statement(notebook, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, response.pop('statement'), session) try: db.use(query.database) explanation = db.explain(query) except QueryServerException as ex:
def send_exception(message): raise QueryError(message=message)