def create_session(self, lang='scala', properties=None): properties = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} properties['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**properties) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': raise QueryError('\n'.join(status['log'])) return { 'type': lang, 'id': response['id'], 'properties': [] }
def create_session(self, lang="scala", properties=None): if properties is None: settings = { "executor_cores": 1, # Some props only in YARN mode "executor_count": 1, "executor_memory": "1G", "driver_cores": 1, "driver_memory": "1G", } api = get_spark_api(self.user) print "TODO: we should use the settings %s for creating the new sessions" % settings response = api.create_session(kind=lang) status = api.get_session(response["id"]) count = 0 while status["state"] == "starting" and count < 120: status = api.get_session(response["id"]) count += 1 time.sleep(1) if status["state"] != "idle": raise QueryError("\n".join(status["log"])) return {"type": lang, "id": response["id"], "properties": settings}
def execute(self, notebook, snippet): api = get_spark_api(self.user) if snippet['type'] == 'jar': properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), } elif snippet['type'] == 'py': properties = { 'file': snippet['properties'].get('py_file'), 'args': snippet['properties'].get('argument'), } else: properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), 'pyFiles': snippet['properties'].get('py_file'), 'files': snippet['properties'].get('files'), # driverMemory # driverCores # executorMemory # executorCores # archives } response = api.submit_batch(properties) return { 'id': response['id'], 'has_result_set': True, 'properties': [] }
def create_session(self, lang='scala', properties=None): if not properties: config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session could not be created in the cluster: %s') % info) return { 'type': lang, 'id': response['id'], 'properties': properties }
def check_status(self, notebook, snippet): api = get_spark_api(self.user) state = api.get_batch_status(snippet['result']['handle']['id']) return { 'status': state, }
def create_session(self, lang='scala', properties=None): props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session could not be created in the cluster: %s') % info) return { 'type': lang, 'id': response['id'], 'properties': properties }
def execute(self, notebook, snippet): api = get_spark_api(self.user) if snippet['type'] == 'jar': properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), } elif snippet['type'] == 'py': properties = { 'file': snippet['properties'].get('py_file'), 'args': snippet['properties'].get('argument'), } else: properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), 'pyFiles': snippet['properties'].get('py_file'), 'files': snippet['properties'].get('files'), # driverMemory # driverCores # executorMemory # executorCores # archives } response = api.submit_batch(properties) return {'id': response['id'], 'has_result_set': True, 'properties': []}
def execute(self, notebook, snippet): api = get_spark_api(self.user) properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': [arg['value'] for arg in snippet['properties'].get('arguments')], 'pyFiles': snippet['properties'].get('py_file'), # files # driverMemory # driverCores # executorMemory # executorCores # archives } response = api.submit_batch(properties) return { 'id': response['id'], 'has_result_set': True, }
def create_session(self, lang='scala', properties=None): properties = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} properties['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**properties) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session could not be created in the cluster: %s') % info) return { 'type': lang, 'id': response['id'], 'properties': [] }
def close_session(self, session): api = get_spark_api(self.user) if session['id'] is not None: api.close(session['id']) return {'session': session['id'], 'status': 0} else: return {'status': -1}
def close_statement(self, snippet): api = get_spark_api(self.user) session_id = snippet["result"]["handle"]["id"] if session_id is not None: api.close_batch(session_id) return {"session": session_id, "status": 0} else: return {"status": -1} # skipped
def download(self, notebook, snippet, format): try: api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] return spark_download(api, session['id'], cell, format) except Exception, e: raise PopupException(e)
def close(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) if session["id"] is not None: api.close(session["id"]) return {"session": session["id"], "status": 0} else: return {"status": -1}
def close_statement(self, snippet): api = get_spark_api(self.user) session_id = snippet['result']['handle']['id'] if session_id is not None: api.close_batch(session_id) return {'session': session_id, 'status': 0} else: return {'status': -1} # skipped
def close_session(self, session): api = get_spark_api(self.user) if session["id"] is not None: try: api.close(session["id"]) return {"session": session["id"], "status": 0} except RestException, e: if e.code == 404 or e.code == 500: # TODO remove the 500 raise SessionExpired(e)
def close_session(self, session): api = get_spark_api(self.user) if session['id'] is not None: try: api.close(session['id']) return {'session': session['id'], 'status': 0} except RestException, e: if e.code == 404 or e.code == 500: # TODO remove the 500 raise SessionExpired(e)
def close_session(self, session): api = get_spark_api(self.user) if session['id'] is not None: api.close(session['id']) return { 'session': session['id'], 'status': 0 } else: return {'status': -1}
def close(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) if session['id'] is not None: api.close(session['id']) return { 'session': session['id'], 'status': 'closed' } else: return {'status': 'skipped'}
def close(self, snippet): api = get_spark_api(self.user) session_id = snippet['result']['handle']['id'] if session_id is not None: api.close_batch(session_id) return { 'session': session_id, 'status': 'closed' } else: return {'status': 'skipped'}
def create_session(self, lang="scala"): api = get_spark_api(self.user) response = api.create_session(kind=lang) status = api.get_session(response["id"]) count = 0 while status["state"] == "starting" and count < 120: status = api.get_session(response["id"]) count += 1 time.sleep(1) return {"type": lang, "id": response["id"]}
def close_session(self, session): api = get_spark_api(self.user) if session['id'] is not None: try: api.close(session['id']) return { 'session': session['id'], 'status': 0 } except RestException, e: if e.code == 404 or e.code == 500: # TODO remove the 500 raise SessionExpired(e)
def create_session(self, lang='scala'): api = get_spark_api(self.user) response = api.create_session(kind=lang) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) return {'type': lang, 'id': response['id']}
def fetch_result(self, notebook, snippet, rows, start_over): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) except Exception, e: message = force_unicode(str(e)).lower() if 'session not found' in message: raise SessionExpired(e) else: raise e
def check_status(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet["result"]["handle"]["id"] try: response = api.fetch_data(session["id"], cell) return {"status": response["state"]} except Exception, e: message = force_unicode(str(e)).lower() if "session not found" in message: raise SessionExpired(e) else: raise e
def create_session(self, lang='scala'): api = get_spark_api(self.user) response = api.create_session(kind=lang) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) return { 'type': lang, 'id': response['id'] }
def execute(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) try: response = api.submit_statement(session['id'], snippet['statement']) return { 'id': response['id'], 'has_result_set': True, } except Exception, e: message = force_unicode(str(e)).lower() if 'session not found' in message or 'connection refused' in message or 'session is in state busy' in message: raise SessionExpired(e) else: raise e
def check_status(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) return { 'status': response['state'], } except Exception, e: message = force_unicode(str(e)).lower() if 'session not found' in message: raise SessionExpired(e) else: raise e
def execute(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) try: response = api.submit_statement(session["id"], snippet["statement"]) return {"id": response["id"], "has_result_set": True} except Exception, e: message = force_unicode(str(e)).lower() if ( "session not found" in message or "connection refused" in message or "session is in state busy" in message ): raise SessionExpired(e) else: raise e
def execute(self, notebook, snippet): api = get_spark_api(self.user) properties = { "file": snippet["properties"].get("app_jar"), "className": snippet["properties"].get("class"), "args": [arg["value"] for arg in snippet["properties"].get("arguments")], "pyFiles": snippet["properties"].get("py_file"), # files # driverMemory # driverCores # executorMemory # executorCores # archives } response = api.submit_batch(properties) return {"id": response["id"], "has_result_set": True}
def download(self, notebook, snippet, format, user_agent=None, max_rows=None, store_data_type_in_header=False): try: api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] return spark_download(api, session['id'], cell, format, user_agent=None) except Exception, e: raise PopupException(e)
def create_session(self, lang='scala'): api = get_spark_api(self.user) response = api.create_session(kind=lang) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': raise QueryError('\n'.join(status['log'])) return { 'type': lang, 'id': response['id'] }
def create_session(self, lang="scala", properties=None): properties = dict([(p["name"], p["value"]) for p in properties]) if properties is not None else {} properties["kind"] = lang api = get_spark_api(self.user) response = api.create_session(**properties) status = api.get_session(response["id"]) count = 0 while status["state"] == "starting" and count < 120: status = api.get_session(response["id"]) count += 1 time.sleep(1) if status["state"] != "idle": raise QueryError("\n".join(status["log"])) return {"type": lang, "id": response["id"], "properties": []}
def create_session(self, lang='scala', properties=None): if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user(app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError(_('The Spark session could not be created in the cluster: %s') % info) return { 'type': lang, 'id': response['id'], 'properties': properties }
def cancel(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) response = api.cancel(session['id']) return {'status': 0}
def get_log(self, snippet): api = get_spark_api(self.user) response = api.get_batch(snippet['result']['handle']['id']) return '\n'.join(response['lines'])
def get_log(self, snippet, startFrom=0, size=None): api = get_spark_api(self.user) return api.get_batch_log(snippet['result']['handle']['id'], startFrom=startFrom, size=size)
def get_log(self, snippet, startFrom=0, size=None): api = get_spark_api(self.user) return api.get_batch_log(snippet["result"]["handle"]["id"], startFrom=startFrom, size=size)
def get_log(self, notebook, snippet, startFrom=0, size=None): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) return api.get_log(session['id'], startFrom=startFrom, size=size)
def create_session(self, lang='scala'): api = get_spark_api(self.user) response = api.create_session(lang=lang) return {'type': lang, 'id': response['id']}
def create_session(self, lang='scala', properties=None): if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} # HUE-4761: Hue's session request is causing Livy to fail with "JsonMappingException: Can not deserialize # instance of scala.collection.immutable.List out of VALUE_STRING token" due to List type values # not being formed properly, they are quoted csv strings (without brackets) instead of proper List # types, this is for keys; archives, jars, files and pyFiles. The Mako frontend probably should be # modified to pass the values as Livy expects but for now we coerce these types to be Lists. # Issue only occurs when non-default values are used because the default path properly sets the # empty list '[]' for these four values. # Note also that Livy has a 90 second timeout for the session request to complete, this needs to # be increased for requests that take longer, for example when loading large archives. tmparchives = props['archives'] if type(tmparchives) is not list: props['archives'] = tmparchives.split(",") LOG.debug("Check List type: archives was not a list") tmpjars = props['jars'] if type(tmpjars) is not list: props['jars'] = tmpjars.split(",") LOG.debug("Check List type: jars was not a list") tmpfiles = props['files'] if type(tmpfiles) is not list: props['files'] = tmpfiles.split(",") LOG.debug("Check List type: files was not a list") tmppyFiles = props['pyFiles'] if type(tmppyFiles) is not list: props['pyFiles'] = tmppyFiles.split(",") LOG.debug("Check List type: pyFiles was not a list") # Convert the conf list to a dict for Livy listitems = props['conf'] LOG.debug("Property Spark Conf kvp list from UI is: " + str(listitems)) confDict = {} for i in range(len(listitems)): kvp = listitems[i] LOG.debug("Property Spark Conf key " + str(i) + " = " + str(kvp.get('key'))) LOG.debug("Property Spark Conf value " + str(i) + " = " + str(kvp.get('value'))) confDict[kvp.get('key')] = kvp.get('value') props['conf'] = confDict LOG.debug("Property Spark Conf dictionary is: " + str(confDict)) props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError( _('The Spark session could not be created in the cluster: %s') % info) return {'type': lang, 'id': response['id'], 'properties': properties}
def check_status(self, notebook, snippet): api = get_spark_api(self.user) state = api.get_batch_status(snippet["result"]["handle"]["id"]) return {"status": state}
def fetch_result(self, notebook, snippet, rows, start_over): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) except Exception as e: message = force_unicode(str(e)).lower() if re.search("session ('\d+' )?not found", message): raise SessionExpired(e) else: raise e content = response['output'] if content['status'] == 'ok': data = content['data'] images = [] try: table = data['application/vnd.livy.table.v1+json'] except KeyError: try: images = [data['image/png']] except KeyError: images = [] if 'application/json' in data: result = data['application/json'] data = result['data'] meta = [{ 'name': field['name'], 'type': field['type'], 'comment': '' } for field in result['schema']['fields']] type = 'table' else: data = [[data['text/plain']]] meta = [{ 'name': 'Header', 'type': 'STRING_TYPE', 'comment': '' }] type = 'text' else: data = table['data'] headers = table['headers'] meta = [{ 'name': h['name'], 'type': h['type'], 'comment': '' } for h in headers] type = 'table' # Non start_over not supported if not start_over: data = [] return {'data': data, 'images': images, 'meta': meta, 'type': type} elif content['status'] == 'error': tb = content.get('traceback', None) if tb is None or not tb: msg = content.get('ename', 'unknown error') evalue = content.get('evalue') if evalue is not None: msg = '%s: %s' % (msg, evalue) else: msg = ''.join(tb) raise QueryError(msg)