def get_sdk_worker(): """ Get the path to the portability framework SDK worker script. Returns: the path to sdk_worker.sh """ return os.path.join(util.get_flink_conf_dir(), "sdk_worker.sh")
def start_beam_jobserver(flink_session_name, artifacts_dir="Resources", jobserver_jar=None, sdk_worker_parallelism=1): """ Start the Java Beam job server that connects to the flink session cluster. User needs to provide the job name that started the Flink session and optionally the worker parallelism. Args: :flink_session_name: Job name that runs the Flink session. :sdk_worker_parallelism: Default parallelism for SDK worker processes. This option is only applied when the pipeline option sdkWorkerParallelism is set to 0.Default is 1, If 0, worker parallelism will be dynamically decided by runner.See also: sdkWorkerParallelism Pipeline Option (default: 1). For further documentation, please refer to Apache Beam docs. Returns: artifact_port, expansion_port, job_host, job_port, jobserver.pid """ if jobserver_jar is None: jobserver_jar = os.path.join( util.get_flink_conf_dir(), "beam-runners-flink-1.8-job-server-2.15.0.jar") # Get Flink master URL (flink session cluster) from an ExecutionDTO method = constants.HTTP_CONFIG.HTTP_GET resource_url = constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \ "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \ flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \ "executions" + \ "?limit=1&offset=0&sort_by=submissionTime:desc" response = util.send_request(method, resource_url) response_object = response.json() flink_master_url = response_object['items'][0]['flinkMasterURL'] artifact_port = randint(10000, 65000) expansion_port = randint(10000, 65000) job_port = randint(10000, 65000) job_host = socket.getfqdn() log_base_path = "" if 'LOG_DIRS' in os.environ: log_base_path += os.environ['LOG_DIRS'] + "/" beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \ "-" + str(job_port) + ".log" # copy jar to local with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log, "wb") as err: jobserver = subprocess.Popen( [ "java", "-jar", jobserver_jar, "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir, "--flink-master-url=%s" % flink_master_url, "--artifact-port=%d" % artifact_port, "--expansion-port=%d" % expansion_port, "--job-host=%s" % job_host, "--job-port=%d" % job_port, "--sdk-worker-parallelism=%d" % sdk_worker_parallelism ], stdout=out, stderr=err, preexec_fn=util._on_executor_exit('SIGTERM')) global clusters clusters.append(flink_session_name) global jobserver_host jobserver_host = job_host global jobserver_port jobserver_port = job_port return { "jobserver_log": beam_jobserver_log, "artifact_port": artifact_port, "expansion_port": expansion_port, "job_host": job_host, "job_port": job_port, "jobserver.pid": jobserver.pid }
def start_beam_jobserver(flink_session_name, artifacts_dir="Resources", jobserver_jar=None): """ Start the Java Beam job server that connects to the flink session cluster. User needs to provide the job name that started the Flink session and optionally the worker parallelism. Args: :flink_session_name: Job name that runs the Flink session. :artifacts_dir: Default dataset to store artifacts. :jobserver_jar: Portability framework jar filename. Returns: artifact_port, expansion_port, job_host, job_port, jobserver.pid """ if jobserver_jar is None: jobserver_jar = os.path.join(util.get_flink_conf_dir(), "beam-runners-flink-1.9-job-server-2.19.0.jar") # Get Flink master URL (flink session cluster) from an ExecutionDTO method = constants.HTTP_CONFIG.HTTP_GET resource_url = constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \ hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \ "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \ flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \ "executions" + \ "?limit=1&offset=0&sort_by=submissionTime:desc" response = util.send_request(method, resource_url) response_object = response.json() flink_master_url = response_object['items'][0]['flinkMasterURL'] artifact_port = randint(10000, 65000) expansion_port = randint(10000, 65000) job_port = randint(10000, 65000) job_host = socket.getfqdn() log_base_path = "" if 'LOG_DIRS' in os.environ: log_base_path += os.environ['LOG_DIRS'] + "/" beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \ "-" + str(job_port) + ".log" # copy jar to local with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log, "wb") as err: jobserver = subprocess.Popen(["java", "-jar", jobserver_jar, "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir, "--flink-master-url=%s" % flink_master_url, "--artifact-port=%d" % artifact_port, "--expansion-port=%d" % expansion_port, "--job-host=%s" % job_host, "--job-port=%d" % job_port], stdout=out, stderr=err, preexec_fn=util._on_executor_exit('SIGTERM')) global clusters clusters.append(flink_session_name) global jobserver_host jobserver_host = job_host global jobserver_port jobserver_port = job_port return {"jobserver_log": beam_jobserver_log, "artifact_port": artifact_port, "expansion_port": expansion_port, "job_host": job_host, "job_port": job_port, "jobserver.pid": jobserver.pid}