def submit_application(): try: job_id = get_job_id_from_attached_file() job_settings = request.form.get('job_settings', "").split() job_args = request.form.get('job_args', "").split() main_class = request.form.get('main_class', None) spark_dir = utils.get_os_env('SPARK_DIR') import subprocess if main_class is not None: job_watchers[job_id] = subprocess.Popen([ spark_dir + "/bin/spark-submit", "--class", main_class, "--master", get_master_uri(), "--jars", "/tmp/" + job_id ] + job_settings + ["/tmp/" + job_id] + job_args, universal_newlines=True, stdout=subprocess.PIPE) else: job_watchers[job_id] = subprocess.Popen([ spark_dir + "/bin/spark-submit", "--master", get_master_uri(), "--py-files", "/tmp/" + job_id ] + job_settings + ["/tmp/" + job_id] + job_args, universal_newlines=True, stdout=subprocess.PIPE) return job_id except: return "Failed!", 500
def submit_application(): try: job_id = get_job_id_from_attached_file() job_settings = request.form.get('job_settings', "").split() job_args = request.form.get('job_args', "").split() main_class = request.form.get('main_class', None) spark_dir = utils.get_os_env('SPARK_DIR') import subprocess if main_class is not None: job_watchers[job_id] = subprocess.Popen([spark_dir + "/bin/spark-submit", "--class", main_class, "--master", get_master_uri(), "--jars", "/tmp/" + job_id] + job_settings + ["/tmp/" + job_id] + job_args, universal_newlines=True, stdout=subprocess.PIPE) else: job_watchers[job_id] = subprocess.Popen([spark_dir + "/bin/spark-submit", "--master", get_master_uri(), "--py-files", "/tmp/" + job_id] + job_settings + ["/tmp/" + job_id] + job_args, universal_newlines=True, stdout=subprocess.PIPE) return job_id except: return "Failed!", 500
def create_worker_process(masterUri): global start_worker global log_watchers if start_worker == 'true': if utils.get_os_env('START_MASTER').lower() != 'true': os.environ['SPARK_WORKER_PORT'] = "7077" logging.info("Spark master daemon not started, worker daemon will bind to port 7077.") else: logging.info("Spark master daemon running on port 7077, worker bind to random port.") worker_log = subprocess.check_output([spark_dir + "/sbin/start-slave.sh", masterUri], universal_newlines=True) log_watchers['Worker'] = subprocess.Popen(["tail", "-f", worker_log.rsplit(None, 1)[-1]])
def send_query(): try: job_id = get_job_id_from_attached_file() hive_vars = request.form.get('hive_vars', "").split() arg_hivevar = [] if len(hive_vars) > 0: for hive_var in hive_vars: arg_hivevar += ['--hivevar', hive_var] hive_confs = request.form.get('hive_confs', "").split() arg_hiveconf = [] if len(hive_confs) > 0: for hive_conf in hive_confs: arg_hiveconf += ['--hiveconf', hive_conf] application_id = utils.get_os_env('APPLICATION_ID') application_version = utils.get_os_env('APPLICATION_VERSION') hostname = utils.get_os_env('HOSTNAME') default_username = application_id + "-" + application_version + "-" + hostname username = request.form.get('username', default_username) arg_username = ['-n', username] password = request.form.get('password', "") arg_password = [] if password != "": arg_password = ['-p', password] import subprocess job_watchers[job_id] = subprocess.Popen( ["/opt/beeline/beeline", "-u", get_thrift_server_uri()] + arg_hivevar + arg_hiveconf + arg_username + arg_password + ["-f", "/tmp/" + job_id], universal_newlines=True, stdout=subprocess.PIPE) return job_id except: return "Failed!", 500
def send_query(): try: job_id = get_job_id_from_attached_file() hive_vars = request.form.get('hive_vars', "").split() arg_hivevar = [] if len(hive_vars) > 0: for hive_var in hive_vars: arg_hivevar += ['--hivevar', hive_var] hive_confs = request.form.get('hive_confs', "").split() arg_hiveconf = [] if len(hive_confs) > 0: for hive_conf in hive_confs: arg_hiveconf += ['--hiveconf', hive_conf] application_id = utils.get_os_env('APPLICATION_ID') application_version = utils.get_os_env('APPLICATION_VERSION') hostname = utils.get_os_env('HOSTNAME') default_username = application_id + "-" + application_version + "-" + hostname username = request.form.get('username', default_username) arg_username = ['-n', username] password = request.form.get('password', "") arg_password = [] if password != "": arg_password = ['-p', password] import subprocess job_watchers[job_id] = subprocess.Popen(["/opt/beeline/beeline", "-u", get_thrift_server_uri()] + arg_hivevar + arg_hiveconf + arg_username + arg_password + ["-f", "/tmp/" + job_id], universal_newlines=True, stdout=subprocess.PIPE) return job_id except: return "Failed!", 500
def create_worker_process(masterUri): global start_worker global log_watchers if start_worker == 'true': if utils.get_os_env('START_MASTER').lower() != 'true': os.environ['SPARK_WORKER_PORT'] = "7077" logging.info( "Spark master daemon not started, worker daemon will bind to port 7077." ) else: logging.info( "Spark master daemon running on port 7077, worker bind to random port." ) worker_log = subprocess.check_output( [spark_dir + "/sbin/start-slave.sh", masterUri], universal_newlines=True) log_watchers['Worker'] = subprocess.Popen( ["tail", "-f", worker_log.rsplit(None, 1)[-1]])
#!/usr/bin/env python3 import os import boto3 import logging import subprocess import utils from time import sleep logging.basicConfig(level=getattr(logging, 'INFO', None)) spark_dir = utils.get_os_env('SPARK_DIR') utils.set_ec2_identities() zk_conn_str = "" if utils.get_os_env('ZOOKEEPER_STACK_NAME') != "": zk_conn_str = utils.generate_zk_conn_str() os.environ['SPARK_DAEMON_JAVA_OPTS'] = "-Dspark.deploy.recoveryMode=ZOOKEEPER " \ "-Dspark.deploy.zookeeper.url=" + zk_conn_str logging.info("HA mode enabled with ZooKeeper connection string " + zk_conn_str) os.environ['ZOOKEEPER_CONN_STR'] = zk_conn_str if utils.get_os_env('HIVE_SITE_XML') != "": hive_site_xml = utils.get_os_env('HIVE_SITE_XML') path = hive_site_xml[5:] bucket = path[:path.find('/')] file_key = path[path.find('/')+1:] s3 = boto3.resource('s3') try:
return retval def get_job_output(job_id): if job_id in job_watchers: status = job_watchers[job_id].poll() if status is None: return "Job is still running, try again later.", 404 elif status == 0: return get_output_stream(job_watchers[job_id]), 200 else: return "Job failed!\n" + get_output_stream( job_watchers[job_id]), 500 else: return "ID ot found!", 404 logging.basicConfig(level=getattr(logging, 'INFO', None)) api_args = { 'auth_url': utils.get_os_env('AUTH_URL'), 'tokeninfo_url': utils.get_os_env('TOKENINFO_URL'), 'oauth2_scope': utils.get_os_env('OAUTH2_SCOPE') } webapp = connexion.App(__name__, port=8000, debug=True, server='gevent') webapp.add_api('swagger.yaml', arguments=api_args) application = webapp.app if __name__ == '__main__': webapp.run()
#!/usr/bin/env python3 import os from boto3.s3.transfer import S3Transfer import boto3 import logging import subprocess import utils from time import sleep logging.basicConfig(level=getattr(logging, 'INFO', None)) spark_dir = utils.get_os_env('SPARK_DIR') if utils.get_os_env('PYTHON_LIBS') != "": python_libs = utils.get_os_env('PYTHON_LIBS').split(',') subprocess.Popen(["pip3", "install", "--upgrade"] + python_libs) utils.set_ec2_identities() zk_conn_str = "" if utils.get_os_env('ZOOKEEPER_STACK_NAME') != "": zk_conn_str = utils.generate_zk_conn_str() os.environ['SPARK_DAEMON_JAVA_OPTS'] = "-Dspark.deploy.recoveryMode=ZOOKEEPER " \ "-Dspark.deploy.zookeeper.url=" + zk_conn_str logging.info("HA mode enabled with ZooKeeper connection string " + zk_conn_str) os.environ['ZOOKEEPER_CONN_STR'] = zk_conn_str if utils.get_os_env('HIVE_SITE_XML') != "":
def get_output_stream(proc): retval = "" for line in iter(proc.stdout.readline, ''): retval += line.rstrip() + "\n" return retval def get_job_output(job_id): if job_id in job_watchers: status = job_watchers[job_id].poll() if status is None: return "Job is still running, try again later.", 404 elif status == 0: return get_output_stream(job_watchers[job_id]), 200 else: return "Job failed!\n" + get_output_stream(job_watchers[job_id]), 500 else: return "ID ot found!", 404 logging.basicConfig(level=getattr(logging, 'INFO', None)) api_args = {'auth_url': utils.get_os_env('AUTH_URL'), 'tokeninfo_url': utils.get_os_env('TOKENINFO_URL'), 'oauth2_scope': utils.get_os_env('OAUTH2_SCOPE')} webapp = connexion.App(__name__, port=8000, debug=True, server='gevent') webapp.add_api('swagger.yaml', arguments=api_args) application = webapp.app if __name__ == '__main__': webapp.run()