Beispiel #1
0
def get_sdk_worker():
    """
    Get the path to the portability framework SDK worker script.

    Returns:
        the path to sdk_worker.sh
    """
    return os.path.join(util.get_flink_conf_dir(), "sdk_worker.sh")
Beispiel #2
0
def start_beam_jobserver(flink_session_name,
                         artifacts_dir="Resources",
                         jobserver_jar=None,
                         sdk_worker_parallelism=1):
    """
    Start the Java Beam job server that connects to the flink session cluster. User needs to provide the
    job name that started the Flink session and optionally the worker parallelism.

    Args:
      :flink_session_name: Job name that runs the Flink session.
      :sdk_worker_parallelism: Default parallelism for SDK worker processes. This option is only applied when the
      pipeline option sdkWorkerParallelism is set to 0.Default is 1, If 0, worker parallelism will be dynamically
      decided by runner.See also: sdkWorkerParallelism Pipeline Option (default: 1). For further documentation,
      please refer to Apache Beam docs.
    Returns:
        artifact_port, expansion_port, job_host, job_port, jobserver.pid
    """
    if jobserver_jar is None:
        jobserver_jar = os.path.join(
            util.get_flink_conf_dir(),
            "beam-runners-flink-1.8-job-server-2.15.0.jar")
    # Get Flink master URL (flink session cluster) from an ExecutionDTO
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \
                   flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \
                   "executions" + \
                   "?limit=1&offset=0&sort_by=submissionTime:desc"
    response = util.send_request(method, resource_url)
    response_object = response.json()
    flink_master_url = response_object['items'][0]['flinkMasterURL']
    artifact_port = randint(10000, 65000)
    expansion_port = randint(10000, 65000)
    job_port = randint(10000, 65000)
    job_host = socket.getfqdn()
    log_base_path = ""
    if 'LOG_DIRS' in os.environ:
        log_base_path += os.environ['LOG_DIRS'] + "/"

    beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \
                          "-" + str(job_port) + ".log"
    # copy jar to local
    with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log,
                                                     "wb") as err:
        jobserver = subprocess.Popen(
            [
                "java", "-jar", jobserver_jar,
                "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir,
                "--flink-master-url=%s" % flink_master_url,
                "--artifact-port=%d" % artifact_port,
                "--expansion-port=%d" % expansion_port,
                "--job-host=%s" % job_host,
                "--job-port=%d" % job_port,
                "--sdk-worker-parallelism=%d" % sdk_worker_parallelism
            ],
            stdout=out,
            stderr=err,
            preexec_fn=util._on_executor_exit('SIGTERM'))
    global clusters
    clusters.append(flink_session_name)
    global jobserver_host
    jobserver_host = job_host
    global jobserver_port
    jobserver_port = job_port
    return {
        "jobserver_log": beam_jobserver_log,
        "artifact_port": artifact_port,
        "expansion_port": expansion_port,
        "job_host": job_host,
        "job_port": job_port,
        "jobserver.pid": jobserver.pid
    }
Beispiel #3
0
def start_beam_jobserver(flink_session_name,
                         artifacts_dir="Resources",
                         jobserver_jar=None):
    """
    Start the Java Beam job server that connects to the flink session cluster. User needs to provide the
    job name that started the Flink session and optionally the worker parallelism.

    Args:
      :flink_session_name: Job name that runs the Flink session.
      :artifacts_dir: Default dataset to store artifacts.
      :jobserver_jar: Portability framework jar filename.
    Returns:
        artifact_port, expansion_port, job_host, job_port, jobserver.pid
    """
    if jobserver_jar is None:
        jobserver_jar = os.path.join(util.get_flink_conf_dir(), "beam-runners-flink-1.9-job-server-2.19.0.jar")
    # Get Flink master URL (flink session cluster) from an ExecutionDTO
    method = constants.HTTP_CONFIG.HTTP_GET
    resource_url = constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_REST_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   constants.REST_CONFIG.HOPSWORKS_PROJECT_RESOURCE + constants.DELIMITERS.SLASH_DELIMITER + \
                   hopsfs.project_id() + constants.DELIMITERS.SLASH_DELIMITER + \
                   "jobs" + constants.DELIMITERS.SLASH_DELIMITER + \
                   flink_session_name + constants.DELIMITERS.SLASH_DELIMITER + \
                   "executions" + \
                   "?limit=1&offset=0&sort_by=submissionTime:desc"
    response = util.send_request(method, resource_url)
    response_object = response.json()
    flink_master_url = response_object['items'][0]['flinkMasterURL']
    artifact_port = randint(10000, 65000)
    expansion_port = randint(10000, 65000)
    job_port = randint(10000, 65000)
    job_host = socket.getfqdn()
    log_base_path = ""
    if 'LOG_DIRS' in os.environ:
        log_base_path += os.environ['LOG_DIRS'] + "/"

    beam_jobserver_log = log_base_path + "beamjobserver-" + hopsfs.project_name().lower() + "-" + flink_session_name + \
                          "-" + str(job_port) + ".log"
    # copy jar to local
    with open(beam_jobserver_log, "wb") as out, open(beam_jobserver_log, "wb") as err:
        jobserver = subprocess.Popen(["java",
                                       "-jar", jobserver_jar,
                                       "--artifacts-dir=%s" % hopsfs.project_path() + artifacts_dir,
                                       "--flink-master-url=%s" % flink_master_url,
                                       "--artifact-port=%d" % artifact_port,
                                       "--expansion-port=%d" % expansion_port,
                                       "--job-host=%s" % job_host,
                                       "--job-port=%d" % job_port],
                                      stdout=out,
                                      stderr=err,
                                      preexec_fn=util._on_executor_exit('SIGTERM'))
    global clusters
    clusters.append(flink_session_name)
    global jobserver_host
    jobserver_host = job_host
    global jobserver_port
    jobserver_port = job_port
    return {"jobserver_log": beam_jobserver_log,
            "artifact_port": artifact_port,
            "expansion_port": expansion_port,
            "job_host": job_host,
            "job_port": job_port,
            "jobserver.pid": jobserver.pid}