Ejemplo n.º 1
0
def start_runner(runner_name):
    """
    Start the runner. Submits an http request to the HOPSWORKS REST API to start the job

    Returns:
        The runner execution status.
    """
    # Auto-generate a runner name
    return jobs.start_job(runner_name)
Ejemplo n.º 2
0
if execution is None or execution['count'] == 0:
    # Create Flink Hopsworks job and start it. This effectively creates a Flink cluster to submit jobs to
    type = "flinkJobConfiguration"
    job_config = {
        "type": type,
        "amQueue": "default",
        "jobmanager.heap.size": args.yarnjobManagerMemory,
        "amVCores": "1",
        "numberOfTaskManagers": args.task_managers,
        "taskmanager.heap.size": args.yarntaskManagerMemory,
        "taskmanager.numberOfTaskSlots": args.yarnslots
    }
    jobs.create_job(args.job, job_config)

    print("Waiting for flink cluster to start...")
    jobs.start_job(args.job)
    # Wait 90 seconds until runner is in status "RUNNING",
    wait = 90
    wait_count = 0
    execution = jobs.get_executions(
        args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0]
    state = execution['state']
    while wait_count < wait and state != "RUNNING":
        time.sleep(5)
        wait_count += 5
        execution = jobs.get_executions(
            args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0]
        state = execution['state']

    if state != "RUNNING":
        print("Flink cluster did not start, check job logs for details")
    data = json_file.read()

data = data.replace("{PROJECT_NAME}", project_name) \
    .replace("{DATASET_NAME}", dataset_name) \
    .replace("{APP_FILE}", ntpath.basename(app_file)) \
    .replace("{JOB_NAME}", job_name) \
    .replace("{DEPENDENCY}", dependency + ".zip")

data = json.loads(data)

print("Jobs configuration:\n")
print(data)
print("===============================\n")

# Zip the folder containing the code
shutil.make_archive(dependency, "zip", app_folder)
hopsworks_url = args.hopsworks_url.split(":")
project.connect(project_name,
                hopsworks_url[0],
                port=hopsworks_url[1],
                api_key=args.apikey)
print("Connected to project: " + project_name)

dataset.upload(os.path.join(app_folder, app_file), dataset_name)
dataset.upload(dependency + ".zip", dataset_name)
print("===============================\n")
print("Uploaded program to Hopsworks.")
jobs.create_job(job_name, data)
jobs.start_job(job_name, " ".join(args.cmd))
print("===============================\n")
print("Started job: " + job_name)