Esempio n. 1
0
def upload_request():

    system_addr = EXT_TRANSFER_MACHINE_INTERNAL
    system_name = EXT_TRANSFER_MACHINE_PUBLIC

    targetPath = request.form.get("targetPath",
                                  None)  # path to save file in cluster
    v = validate_input(targetPath)
    if v != "":
        return jsonify(description="Failed to upload file",
                       error=f"'targetPath' {v}"), 400

    sourcePath = request.form.get("sourcePath", None)  # path from the local FS
    v = validate_input(sourcePath)
    if v != "":
        return jsonify(description="Failed to upload file",
                       error=f"'sourcePath' {v}"), 400

    [headers, ID] = get_tracing_headers(request)
    # checks if targetPath is a valid path
    check = is_valid_dir(targetPath, headers, system_name, system_addr)

    if not check["result"]:
        return jsonify(description="sourcePath error"), 400, check["headers"]

    # obtain new task from Tasks microservice
    task_id = create_task(headers, service="storage")

    if task_id == -1:
        return jsonify(error="Error creating task"), 400

    # asynchronous task creation
    try:
        update_task(task_id, headers, async_task.QUEUED)

        aTask = threading.Thread(target=upload_task,
                                 name=ID,
                                 args=(headers, system_name, system_addr,
                                       targetPath, sourcePath, task_id))

        storage_tasks[task_id] = aTask

        storage_tasks[task_id].start()

        task_url = f"{KONG_URL}/tasks/{task_id}"

        data = jsonify(success="Task created",
                       task_url=task_url,
                       task_id=task_id)
        return data, 201

    except Exception as e:
        data = jsonify(error=e)
        return data, 400
Esempio n. 2
0
def acct():
    auth_header = request.headers[AUTH_HEADER_NAME]
    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header = {"X-Machine-Does-Not-Exists": "Machine does not exists"}
        return jsonify(description="Failed to retrieve account information", error="Machine does not exists"), 400, header

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to retrieve account information"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to retrieve account information"), 404, header

    #check if startime (--startime=) param is set:
    start_time_opt = ""

    try:
        starttime = request.args.get("starttime","")
        if starttime != "":
            # check if starttime parameter is correctly encoded
            if check_sacctTime(starttime):
                start_time_opt  = " --starttime={start_time} ".format(start_time=starttime)
            else:
                app.logger.warning("starttime wrongly encoded")

        # check if endtime (--endtime=) param is set:
        end_time_opt = ""
        endtime   =  request.args.get("endtime","")
        if endtime != "":
            # check if endtime parameter is correctly encoded
            if check_sacctTime(endtime):
                end_time_opt = " --endtime={end_time} ".format(end_time=endtime)
            else:
                app.logger.warning("endtime wrongly encoded")
    except Exception as e:
        data = jsonify(description="Failed to retrieve account information", error=e)
        return data, 400


    # check optional parameter jobs=jobidA,jobidB,jobidC
    jobs_opt = ""

    jobs = request.args.get("jobs","")

    if jobs != "":
        jobs_opt = " --jobs={jobs} ".format(jobs=jobs)

    # sacct
    # -X so no step information is shown (ie: just jobname, not jobname.batch or jobname.0, etc)
    # --starttime={start_time_opt} starts accounting info
    # --endtime={start_time_opt} end accounting info
    # --jobs={job1,job2,job3} list of jobs to be reported
    # format: 0 - jobid  1-partition 2-jobname 3-user 4-job sTate,
    #         5 - start time, 6-elapsed time , 7-end time
    #          8 - nodes allocated and 9 - resources
    # --parsable2 = limits with | character not ending with it

    action = "sacct -X {starttime} {endtime} {jobs_opt} " \
             "--format='jobid,partition,jobname,user,state,start,cputime,end,NNodes,NodeList' " \
              "--noheader --parsable2".format(starttime=start_time_opt,endtime=end_time_opt, jobs_opt=jobs_opt)

    try:
        # obtain new task from Tasks microservice
        task_id = create_task(auth_header,service="compute")

        # if error in creating task:
        if task_id == -1:
            return jsonify(description="Failed to retrieve account information",error='Error creating task'), 400


        update_task(task_id, auth_header, async_task.QUEUED)

        # asynchronous task creation
        aTask = threading.Thread(target=acct_task,
                                 args=(auth_header, system_name, system_addr, action, task_id))

        aTask.start()
        task_url = "{KONG_URL}/tasks/{task_id}".format(KONG_URL=KONG_URL, task_id=task_id)

        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 200

    except Exception as e:
        data = jsonify(description="Failed to retrieve account information",error=e)
        return data, 400
Esempio n. 3
0
def cancel_job(jobid):

    auth_header = request.headers[AUTH_HEADER_NAME]

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header = {"X-Machine-Does-Not-Exists": "Machine does not exists"}
        return jsonify(description="Failed to delete job", error="Machine does not exists"), 400, header

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to delete job"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to delete job"), 404, header


    app.logger.info(f"Cancel SLURM job={jobid} from {system_name} ({system_addr})")

    # scancel with verbose in order to show correctly the error
    action = f"scancel -v {jobid}"

    try:
        # obtain new task from TASKS microservice.
        task_id = create_task(auth_header,service="compute")

        # if error in creating task:
        if task_id == -1:
            return jsonify(description="Failed to delete job",error='Error creating task'), 400

        # asynchronous task creation
        aTask = threading.Thread(target=cancel_job_task,
                             args=(auth_header, system_name, system_addr, action, task_id))

        aTask.start()

        update_task(task_id, auth_header, async_task.QUEUED)

        task_url = f"{KONG_URL}/tasks/{task_id}"

        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 200

    except Exception as e:
        data = jsonify(description="Failed to delete job",error=e)
        return data, 400
Esempio n. 4
0
def list_job(jobid):

    auth_header = request.headers[AUTH_HEADER_NAME]

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header = {"X-Machine-Does-Not-Exists": "Machine does not exists"}
        return jsonify(description="Failed to retrieve job information", error="Machine does not exists"), 400, header

    #check if jobid is a valid jobid for SLURM
    if not is_jobid(jobid):
        return jsonify(description="Failed to retrieve job information", error=f"{jobid} is not a valid job ID"), 400

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to retrieve job information"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to retrieve job information"), 404, header

    username = get_username(auth_header)
    app.logger.info(f"Getting SLURM information of job={jobid} from {system_name} ({system_addr})")

    # format: jobid (i) partition (P) jobname (j) user (u) job sTate (T),
    #          start time (S), job time (M), left time (L)
    #           nodes allocated (M) and resources (R)
    action = "squeue -u {username} --format='%i|%P|%j|%u|%T|%M|%S|%L|%D|%R' --noheader -j {jobid}".\
        format(username=username,jobid=jobid)

    try:
        # obtain new task from Tasks microservice
        task_id = create_task(auth_header,service="compute")

        # if error in creating task:
        if task_id == -1:
            return jsonify(description="Failed to retrieve job information",error='Error creating task'), 400

        update_task(task_id, auth_header, async_task.QUEUED)

        # asynchronous task creation
        aTask = threading.Thread(target=list_job_task,
                                 args=(auth_header, system_name, system_addr, action, task_id, 1, 1))

        aTask.start()

        task_url = "{KONG_URL}/tasks/{task_id}".format(KONG_URL=KONG_URL, task_id=task_id)

        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 200

    except Exception as e:
        data = jsonify(description="Failed to retrieve job information",error=e)
        return data, 400
Esempio n. 5
0
def list_jobs():

    auth_header = request.headers[AUTH_HEADER_NAME]

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header = {"X-Machine-Does-Not-Exists": "Machine does not exists"}
        return jsonify(description="Failed to retrieve jobs information", error="Machine does not exists"), 400, header

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to retrieve jobs information"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to retrieve jobs information"), 404, header

    username = get_username(auth_header)

    app.logger.info(f"Getting SLURM information of jobs from {system_name} ({system_addr})")

    # job list comma separated:
    jobs        = request.args.get("jobs", None)
    pageSize    = request.args.get("pageSize", None)
    pageNumber  = request.args.get("pageNumber", None)

    if pageSize != None and pageNumber != None:
        try:
            pageNumber  = int(pageNumber)
            pageSize    = int(pageSize)

            if pageSize not in [10,25,50,100]:
                pageSize = 25

        except ValueError:
            pageNumber = 0
            pageSize = 25
            app.logger.error("Wrong pageNumber and/or pageSize")
    else:
        # if not set, by default
        pageNumber  = 0
        pageSize    = 25

    # by default empty
    job_list = ""
    if jobs != None:
        try:
            # check if input is correct:
            job_aux_list = jobs.split(",")
            if '' in job_aux_list:
                return jsonify(error="Jobs list wrong format",description="Failed to retrieve job information"), 400

            for jobid in job_aux_list:
                if not is_jobid(jobid):
                    return jsonify(error=f"{jobid} is not a valid job ID", description="Failed to retrieve job information"), 400

            job_list="--job={jobs}".format(jobs=jobs)
        except:
            return jsonify(error="Jobs list wrong format",description="Failed to retrieve job information"), 400

    # format: jobid (i) partition (P) jobname (j) user (u) job sTate (T),
    #          start time (S), job time (M), left time (L)
    #           nodes allocated (M) and resources (R)
    action = f"squeue -u {username} {job_list} --format='%i|%P|%j|%u|%T|%M|%S|%L|%D|%R' --noheader"

    try:
        task_id = create_task(auth_header,service="compute")

        # if error in creating task:
        if task_id == -1:
            return jsonify(description="Failed to retrieve job information",error='Error creating task'), 400

        update_task(task_id, auth_header, async_task.QUEUED)

        # asynchronous task creation
        aTask = threading.Thread(target=list_job_task,
                                 args=(auth_header, system_name, system_addr, action, task_id, pageSize, pageNumber))

        aTask.start()

        task_url = f"{KONG_URL}/tasks/{task_id}"

        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 200

    except Exception as e:
        data = jsonify(description="Failed to retrieve job information",error=e)
        return data, 400
Esempio n. 6
0
def submit_job_path():
    auth_header = request.headers[AUTH_HEADER_NAME]

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="Failed to submit job", error="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header={"X-Machine-Does-Not-Exists":"Machine does not exists"}
        return jsonify(description="Failed to submit job",error="Machine does not exists"), 400, header

    # iterate over SYSTEMS_PUBLIC list and find the endpoint matching same order

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to submit job"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to submit job"), 404, header

    try:
        targetPath = request.form["targetPath"]
    except KeyError as e:
        data = jsonify(description="Failed to submit job", error="'targetPath' parameter not set in request")
        return data, 400

    if targetPath == None:
        data = jsonify(description="Failed to submit job", error="'targetPath' parameter not set in request")
        return data, 400

    if targetPath == "":
        data = jsonify(description="Failed to submit job", error="'targetPath' parameter value is empty")
        return data, 400


    # checks if targetPath is a valid path for this user in this machine
    check = is_valid_file(targetPath, auth_header, system_name, system_addr)

    if not check["result"]:
        return jsonify(description="Failed to submit job"), 400, check["headers"]

    # creates the async task related to the job submission
    task_id = create_task(auth_header,service="compute")
    # if error in creating task:
    if task_id == -1:
        return jsonify(description="Failed to submit job",error='Error creating task'), 400

    # if targetPath = "/home/testuser/test/sbatch.sh/"
    # split by / and discard last element (the file name): ['', 'home', 'testuser', 'test']
    job_dir_splitted = targetPath.split("/")[:-1]
    # in case the targetPath ends with /, like: "/home/testuser/test/sbatch.sh/"
    # =>  ['', 'home', 'testuser', 'test', ''], then last element of the list is discarded
    if job_dir_splitted[-1] == "":
        job_dir_splitted = job_dir_splitted[:-1]

    job_dir = "/".join(job_dir_splitted)


    try:
        # asynchronous task creation
        aTask = threading.Thread(target=submit_job_path_task,
                             args=(auth_header, system_name, system_addr, targetPath, job_dir, task_id))

        aTask.start()
        retval = update_task(task_id, auth_header, async_task.QUEUED, TASKS_URL)

        task_url = "{KONG_URL}/tasks/{task_id}".format(KONG_URL=KONG_URL, task_id=task_id)
        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 201

    except Exception as e:
        data = jsonify(description="Failed to submit job",error=e)
        return data, 400
Esempio n. 7
0
def submit_job_upload():

    auth_header = request.headers[AUTH_HEADER_NAME]

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header={"X-Machine-Does-Not-Exists":"Machine does not exists"}
        return jsonify(description="Failed to submit job file",error="Machine does not exists"), 400, header

    # iterate over SYSTEMS_PUBLIC list and find the endpoint matching same order

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]

    # check if machine is accessible by user:
    # exec test remote command
    resp = exec_remote_command(auth_header, system_name, system_addr, "true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to submit job file"), 400, header
        if in_str(error_str,"Permission") or in_str(error_str,"OPENSSH"):
            header = {"X-Permission-Denied": "User does not have permissions to access machine or path"}
            return jsonify(description="Failed to submit job file"), 404, header

    job_base_fs = COMPUTE_BASE_FS[system_idx]

    try:
        # check if the post request has the file part
        if 'file' not in request.files:
            app.logger.error('No batch file part')
            error = jsonify(description="Failed to submit job file", error='No batch file part')
            return error, 400

        job_file = {'filename': secure_filename(request.files['file'].filename), 'content': request.files['file'].read()}

        # if user does not select file, browser also
        # submit an empty part without filename
        if job_file['filename'] == '':
            app.logger.error('No batch file selected')
            error = jsonify(description="Failed to submit job file", error='No batch file selected')
            return error, 400

    except RequestEntityTooLarge as re:
        app.logger.error(re.description)
        data = jsonify(description="Failed to submit job file", error=f"File is bigger than {MAX_FILE_SIZE} MB")
        return data, 413
    except Exception as e:
        data = jsonify(description="Failed to submit job file",error=e)
        return data, 400


    task_id = create_task(auth_header,service="compute")
    # if error in creating task:
    if task_id == -1:
        return jsonify(description="Failed to submit job file",error='Error creating task'), 400

    # create tmp file with timestamp
    # using hash_id from Tasks, which is user-task_id (internal)
    tmpdir = "{task_id}".format(task_id=task_id)

    username = get_username(auth_header)

    job_dir = f"{job_base_fs}/{username}/firecrest/{tmpdir}"

    app.logger.info(f"Job dir: {job_dir}")

    try:
        # asynchronous task creation
        aTask = threading.Thread(target=submit_job_task,
                             args=(auth_header, system_name, system_addr, job_file, job_dir, task_id))

        aTask.start()
        retval = update_task(task_id, auth_header,async_task.QUEUED)

        task_url = f"{KONG_URL}/tasks/{task_id}"
        data = jsonify(success="Task created", task_id=task_id, task_url=task_url)
        return data, 201

    except Exception as e:
        data = jsonify(description="Failed to submit job",error=e)
        return data, 400
Esempio n. 8
0
def submit_job_path():

    try:
        system_name = request.headers["X-Machine-Name"]
    except KeyError as e:
        app.logger.error("No machinename given")
        return jsonify(description="Failed to submit job",
                       error="No machine name given"), 400

    # public endpoints from Kong to users
    if system_name not in SYSTEMS_PUBLIC:
        header = {"X-Machine-Does-Not-Exists": "Machine does not exists"}
        return jsonify(description="Failed to submit job",
                       error="Machine does not exists"), 400, header

    # iterate over SYSTEMS_PUBLIC list and find the endpoint matching same order

    # select index in the list corresponding with machine name
    system_idx = SYSTEMS_PUBLIC.index(system_name)
    system_addr = SYS_INTERNALS[system_idx]
    use_plugin = USE_SPANK_PLUGIN[system_idx]

    targetPath = request.form.get("targetPath", None)
    v = validate_input(targetPath)
    if v != "":
        return jsonify(description="Failed to submit job",
                       error=f"'targetPath' {v}"), 400

    # check "account parameter"
    account = request.form.get("account", None)
    if account != None:
        v = validate_input(account)
        if v != "":
            return jsonify(description="Invalid account",
                           error=f"'account' {v}"), 400

    [headers, ID] = get_tracing_headers(request)
    # check if machine is accessible by user:
    resp = exec_remote_command(headers, system_name, system_addr,
                               f"ID={ID} true")

    if resp["error"] != 0:
        error_str = resp["msg"]
        if resp["error"] == -2:
            header = {"X-Machine-Not-Available": "Machine is not available"}
            return jsonify(description="Failed to submit job"), 400, header
        if in_str(error_str, "Permission") or in_str(error_str, "OPENSSH"):
            header = {
                "X-Permission-Denied":
                "User does not have permissions to access machine or path"
            }
            return jsonify(description="Failed to submit job"), 404, header

    # checks if targetPath is a valid path for this user in this machine
    check = is_valid_file(targetPath, headers, system_name, system_addr)

    if not check["result"]:
        return jsonify(
            description="Failed to submit job"), 400, check["headers"]

    # creates the async task related to the job submission
    task_id = create_task(headers, service="compute")
    # if error in creating task:
    if task_id == -1:
        return jsonify(description="Failed to submit job",
                       error='Error creating task'), 400

    # if targetPath = "/home/testuser/test/sbatch.sh/"
    # split by / and discard last element (the file name): ['', 'home', 'testuser', 'test']
    job_dir_splitted = targetPath.split("/")[:-1]
    # in case the targetPath ends with /, like: "/home/testuser/test/sbatch.sh/"
    # =>  ['', 'home', 'testuser', 'test', ''], then last element of the list is discarded
    if job_dir_splitted[-1] == "":
        job_dir_splitted = job_dir_splitted[:-1]

    job_dir = "/".join(job_dir_splitted)

    try:
        # asynchronous task creation
        aTask = threading.Thread(target=submit_job_path_task,
                                 name=ID,
                                 args=(headers, system_name, system_addr,
                                       targetPath, job_dir, account,
                                       use_plugin, task_id))

        aTask.start()
        retval = update_task(task_id, headers, async_task.QUEUED, TASKS_URL)

        task_url = f"{KONG_URL}/tasks/{task_id}"
        data = jsonify(success="Task created",
                       task_id=task_id,
                       task_url=task_url)
        return data, 201

    except Exception as e:
        data = jsonify(description="Failed to submit job", error=e)
        return data, 400