예제 #1
0
    def _wait_for_child_processes(self):
        output, error = process_utils.run_command_return_output('pgrep -P ' + str(self._process_id), self._logger)
        if error is not None:
            self._logger.error("[SandboxAgent] wait_for_child_processes: Failed to get children process ids: %s", str(error))
            return

        children_pids = set(output.split())
        self._logger.info("[SandboxAgent] wait_for_child_processes: Parent pid: %s, Children pids: %s", str(self._process_id), str(children_pids))

        for jrh_process in self._javarequesthandler_process_list:
            if str(jrh_process.pid) in children_pids:
                children_pids.remove(str(jrh_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on JavaRequestHandler pid: %s", str(jrh_process.pid))

        ## find fluentbit PID
        output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
        fbpid = output.strip()
        if fbpid in children_pids:
            children_pids.remove(fbpid)
            self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on fluent-bit pid: %s", fbpid)

        if self._queue_service_process is not None:
            if str(self._queue_service_process.pid) in children_pids:
                children_pids.remove(str(self._queue_service_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on queue service pid: %s", str(self._queue_service_process.pid))

        if self._frontend_process is not None:
            if str(self._frontend_process.pid) in children_pids:
                children_pids.remove(str(self._frontend_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on frontend pid: %s", str(self._frontend_process.pid))

        if not children_pids:
            self._logger.info("[SandboxAgent] wait_for_child_processes: No remaining pids to wait for")
            return

        while True:
            try:
                cpid, status = os.waitpid(-1, 0)
                self._logger.info("[SandboxAgent] wait_for_child_processes: Status changed for pid: %s, Status: %s", str(cpid), str(status))
                if str(cpid) not in children_pids:
                    #print('wait_for_child_processes: ' + str(cpid) + "Not found in children_pids")
                    continue
                children_pids.remove(str(cpid))
                if not children_pids:
                    self._logger.info("[SandboxAgent] wait_for_child_processes: No remaining pids to wait for")
                    break
            except Exception as exc:
                self._logger.error('[SandboxAgent] wait_for_child_processes: %s', str(exc))
예제 #2
0
    def set_child_process(self, which, process, command_args_map):
        pid = process.pid
        if which == "qs":
            self._queue_service_process = process
        elif which == "fe":
            self._frontend_process = process
        elif which == "fb":
            self._fluentbit_process = process
            output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
            fbpid = int(output.strip())
            self._fluentbit_actual_pid = fbpid
            pid = fbpid

        # store command and args
        self._child_process_command_args_map[pid] = command_args_map
예제 #3
0
    def get_all_children_pids(self):
        children_pids = []
        for state in self._functionworker_process_map:
            p = self._functionworker_process_map[state]
            children_pids.append(p.pid)

        for jrhp in self._javarequesthandler_process_list:
            children_pids.append(jrhp.pid)

        children_pids.append(self._queue_service_process.pid)
        children_pids.append(self._frontend_process.pid)

        # looks like this pid does not match the actual process; perhaps because it also spawns another process?
        #children_pids.append(self._fluentbit_process.pid)
        ## find actual fluentbit pid
        output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
        fbpid = int(output.strip())
        self._fluentbit_actual_pid = fbpid
        children_pids.append(fbpid)

        return children_pids
예제 #4
0
    def process_deployment_info(self):
        has_error = False
        errmsg = ""

        if self._deployment_info is not None and self._deployment_info != "":
            try:
                self._deployment_info = json.loads(self._deployment_info)
                self._logger.debug("Deployment info: %s", json.dumps(self._deployment_info))
            except Exception as exc:
                errmsg = "Could not parse deployment info: " + str(exc)
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg
        else:
            errmsg = "Empty deployment info."
            has_error = True
            return has_error, errmsg

        if "workflow" not in self._deployment_info or "resources" not in self._deployment_info:
            errmsg = "Incomplete deployment info: " + json.dumps(self._deployment_info)
            self._logger.error(errmsg)
            has_error = True
            return has_error, errmsg

        # get workflow info
        workflow_info = self._deployment_info["workflow"]
        sid = workflow_info["sandboxId"]
        if sid != self._sandboxid:
            warnmsg = "WARN: workflow info sandboxid doesn't match provided sandboxid ("+sid+" <-> "+workflow_info["sandboxId"]+")"
            self._logger.info(warnmsg)
        wid = workflow_info["workflowId"]
        if wid != self._workflowid:
            warnmsg = "WARN: workflow info workflowid doesn't match provided workflowid ("+wid+" <-> "+workflow_info["workflowId"]+")"
            print(warnmsg)
        wf_type = workflow_info["workflowType"]

        usertoken = ''
        if "usertoken" in workflow_info:
            usertoken = workflow_info["usertoken"]
        os.environ["USERTOKEN"] = usertoken

        # get workflow json, parse workflow json and init params
        workflow_json = self._global_data_layer_client.get(workflow_info["json_ref"])
        if workflow_json is None or workflow_json == "":
            has_error = True
            errmsg = "Empty workflow description."
            return has_error, errmsg

        try:
            workflow_json = base64.b64decode(workflow_json).decode()
        except Exception as exc:
            has_error = True
            errmsg = "Invalid value for workflow json: " + str(exc)
            return has_error, errmsg

        self._workflow = Workflow(self._userid, sid, wid, wf_type, workflow_json, self._logger)

        has_error = self._workflow.has_error()
        if has_error:
            errmsg = "Problem in workflow description: " + str(workflow_json)
            self._logger.error(errmsg)
            return has_error, errmsg

        # get workflow nodes
        workflow_nodes = self._workflow.getWorkflowNodeMap()

        # get resources info and find functions
        resource_map = {}
        resource_info_map = self._deployment_info["resources"]

        if any(resource_info_map[res_name]["runtime"] == "Java" for res_name in resource_info_map):
            # run setup_maven.sh to update the proxy settings at runtime
            # (i.e., the sandbox image may have been built on a machine with a proxy, or vice versa)
            cmd_maven_proxy_initer = "/opt/mfn/JavaRequestHandler/./setup_maven.sh"
            self._logger.info("Updating maven proxy settings...")
            error, _ = process_utils.run_command(cmd_maven_proxy_initer, self._logger, wait_output=True)
            if error is not None:
                has_error = True
                errmsg = "Could not reinitialize maven proxy settings: " + error
                return has_error, errmsg
            self._logger.info("Finished updating maven proxy settings.")

        # for pip installable dependencies for python functions
        req_map = {}
        t_start_download = time.time()
        # store functions in local filesystem
        for resource_name in resource_info_map:
            resource_info = resource_info_map[resource_name]
            resource_info["runtime"] = resource_info["runtime"].lower()

            if resource_info["type"] == "code":
                error, resource_dirpath = self._retrieve_and_store_function_code(resource_name, resource_info)
            else:
                error, resource_dirpath = self._retrieve_and_store_function_zip(resource_name, resource_info)

            if error is not None:
                errmsg = "Could not retrieve and store function: " + resource_name + " " + error
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            # these requirements can now be also for java maven dependencies
            resource_id = resource_info["id"]
            greq = self._global_data_layer_client.get("grain_requirements_" + resource_id)
            mvndeps = None
            if greq is not None and greq != "":
                greq = base64.b64decode(greq).decode()
                if resource_info["runtime"].find("python") == 0:
                    # get function requirements and put it into a map
                    lines = greq.strip().split("\n")
                    for line in lines:
                        req_map[line] = True
                elif resource_info["runtime"].find("java") == 0:
                    mvndeps = greq

            # get function environment variables
            env_var_list = []
            genv = self._global_data_layer_client.get("grain_environment_variables_" + resource_id)
            if genv is not None and genv != "":
                genv = base64.b64decode(genv).decode()
                lines = genv.split("\n")
                env_var_list = lines

            resource = {}
            resource["name"] = resource_name
            resource["dirpath"] = resource_dirpath
            resource["runtime"] = resource_info["runtime"]
            resource["env_var_list"] = env_var_list
            resource_map[resource_name] = resource

            # compile the java sources
            if resource["runtime"].find("java") == 0:
                # even if it was just a single java file
                # or a jar file uploaded with source files
                # or a jar file with just class files,
                # the following function will
                # 1. download maven dependencies (if there is a pom.xml in the jar or was separately uploaded)
                # 2. compile the source files if any
                error = self._compile_java_resources_if_necessary(resource, mvndeps)

                if error is not None:
                    errmsg = "Could not compile Java function resources: " + resource_name + " " + error
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg

        total_time_download = (time.time() - t_start_download) * 1000.0
        self._logger.info("Download time for all function code: %s (ms)", str(total_time_download))

        t_start_requirements = time.time()
        # this list will only contain pip installable dependencies
        # java maven dependencies will be handled while compiling the java resources
        sbox_req_list = []
        for req_line in req_map:
            sbox_req_list.append(req_line)

        # install sandbox requirements
        req = workflow_info["sandbox_requirements"]
        req["requirements"] = sbox_req_list
        error = self._install_sandbox_requirements(req)
        if error is not None:
            errmsg = "Could not install sandbox requirements. " + str(error)
            self._logger.error(errmsg)
            has_error = True
            return has_error, errmsg

        total_time_requirements = (time.time() - t_start_requirements) * 1000.0
        self._logger.info("Requirements install time: %s (ms)", str(total_time_requirements))

        t_start_storage = time.time()
        # initialize local data layer space for user and workflow
        self._initialize_data_layer_storage()
        total_time_storage = (time.time() - t_start_storage) * 1000.0
        self._logger.info("Storage initialization time: %s (ms)", str(total_time_storage))

        self._local_queue_client = LocalQueueClient(connect=self._queue)

        self._local_queue_client.addTopic(self._workflow.getWorkflowExitTopic())

        t_start_launch = time.time()
        # accummulate all java worker params into one
        # later, we'll launch a single JVM to handle all java functions
        if SINGLE_JVM_FOR_FUNCTIONS:
            single_jvm_worker_params = {}
            any_java_function = False

        total_time_state = 0.0
        for function_topic in workflow_nodes:
            wf_node = workflow_nodes[function_topic]
            resource_name = wf_node.get_resource_name()

            t_start_state = time.time()
            if resource_name == "":
                # this is an ASL state without a resource (i.e., function) attached to it
                error, resource = state_utils.create_dummy_resource_for_asl_state(wf_node)
                if error is not None:
                    errmsg = "Could not create non-resource state. " + str(error)
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg
            else:
                resource = resource_map[resource_name]

            error, state = state_utils.create_state(wf_node, resource, self._logger)
            if error is not None:
                errmsg = "Could not create state: " + str(error)
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            total_time_state += (time.time() - t_start_state) * 1000.0

            self._local_queue_client.addTopic(function_topic)

            # compile worker parameters
            worker_params = self._populate_worker_params(function_topic, wf_node, state)
            # store worker parameters as a local file
            params_filename = state["dirpath"] + "worker_params.json"

            with open(params_filename, "w") as paramsf:
                json.dump(worker_params, paramsf, indent=4)

            if state["resource_runtime"].find("java") != -1:
                java_worker_params = {}
                java_worker_params["functionPath"] = worker_params["ffolder"]
                java_worker_params["functionName"] = worker_params["fname"]
                java_worker_params["serverSocketFilename"] = "/tmp/java_handler_" + worker_params["functionstatename"] + ".uds"

                if SINGLE_JVM_FOR_FUNCTIONS:
                    any_java_function = True
                    single_jvm_worker_params[worker_params["functionstatename"]] = java_worker_params
                else:
                    java_params_filename = state["dirpath"] + "java_worker_params.json"
                    with open(java_params_filename, "w") as javaparamsf:
                        json.dump(java_worker_params, javaparamsf, indent=4)

            # launch function workers with the params parsed from workflow info
            error = self._start_function_worker(worker_params, state["resource_runtime"], state["resource_env_var_list"])

            if error is not None:
                errmsg = "Problem launching function worker for: " + worker_params["fname"]
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            # add the new function worker to the local list
            self._workflow.addLocalFunction(function_topic)

        # all function workers have been launched; update them with locally running functions
        # prepare update message to be used by all
        local_functions = self._workflow.getWorkflowLocalFunctions()
        lqcm_update = self._prepare_update_for_locally_running(local_functions)
        for function_topic in workflow_nodes:
            self._update_function_worker(function_topic, lqcm_update)

        if SINGLE_JVM_FOR_FUNCTIONS:
            if any_java_function:
                single_jvm_params_filename = "/opt/mfn/workflow/states/single_jvm_worker_params.json"
                with open(single_jvm_params_filename, "w") as jvmparamsf:
                    json.dump(single_jvm_worker_params, jvmparamsf, indent=4)

                self._logger.info("Launching a single JavaRequestHandler for all Java states...")
                cmdjavahandler = "java -jar /opt/mfn/JavaRequestHandler/target/javaworker.jar "
                cmdjavahandler += single_jvm_params_filename

                error, process = process_utils.run_command(cmdjavahandler, self._logger, wait_until="Waiting for requests on:")
                if error is not None:
                    errmsg = "Problem launching JavaRequestHandler for Java states: " + error
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg
                else:
                    self._javarequesthandler_process_list.append(process)

        self._logger.info("State creation for all function workers: %s (ms)", str(total_time_state))

        total_time_launch = (time.time() - t_start_launch) * 1000.0
        self._logger.info("Launch time for all function workers: %s (ms)", str(total_time_launch))

        if not has_error:
            # check whether all function workers have launched successfully
            # give some time for function workers to come up
            cmd = "pgrep -P " + str(self._process_id) + " -a"
            output, error = process_utils.run_command_return_output(cmd, self._logger)
            if error is not None:
                self._logger.error("[SandboxAgent] check health of function workers: failed to get FunctionWorker processes: %s", str(error))
                has_error = True
                errmsg = "Could not get FunctionWorker processes."

        if not has_error:
            fwlines = set(output.split("\n"))
            fwpids = []
            for line in fwlines:
                if "FunctionWorker.py" in line:
                    pid = line.split(" ")[0]
                    fwpids.append(pid)

            if str(self._fluentbit_process.pid) in fwpids:
                fwpids.remove(str(self._fluentbit_process.pid))

            self._logger.info(str(len(fwpids)) + " " + str(len(self._functionworker_process_map)))
            #self._logger.info(str(fwpids) + " " + str(self._functionworker_process_map))

            if len(fwpids) != len(self._functionworker_process_map):
                has_error = True
                errmsg = "One or more function workers could not be launched:\n"

                for state_name in self._functionworker_process_map:
                    fwp = self._functionworker_process_map[state_name]
                    if fwp.pid not in fwpids:
                        errmsg += state_name + "\n"

        self._global_data_layer_client.shutdown()

        return has_error, errmsg
예제 #5
0
    def _compile_java_resources_if_necessary(self, resource, mvndeps):
        error = None

        cmdmkdir = "mkdir -p " + resource["dirpath"] + "target/classes"

        self._logger.info("Preparing for compilation of Java function resources: %s", resource["name"])
        error, _ = process_utils.run_command(cmdmkdir, self._logger, wait_output=True)
        if error is not None:
            error = "Could not create target directory for resource: " + resource["name"] + " " + error
            self._logger.error(error)
            return error

        #cmdjavac = "javac -classpath /opt/mfn/JavaRequestHandler/mfnapi.jar -d " + resource["dirpath"] + "target/classes "
        #cmdjavac += resource["dirpath"] + resource["name"] + ".java"

        cmdfind = "find " + resource["dirpath"] + " -name *.java"
        output, error = process_utils.run_command_return_output(cmdfind, self._logger)
        if error is not None:
            self._logger.error("[SandboxAgent] could not search for any Java sources: %s", str(error))
            error = "Could not search for any Java sources: " + resource["name"] + " " + str(error)
            return error
        source_files = set(output.split("\n"))
        source_files = ' '.join(source_files).strip()
        should_compile = False
        if source_files != "":
            should_compile = True
            self._logger.info("Found following Java sources: %s", str(source_files))
        else:
            self._logger.info("No java sources to compile.")

        # 2. check for pom.xml or the requirements; if it is there, then:
        if mvndeps is not None and not os.path.exists(resource["dirpath"] + "pom.xml"):
            # write the content of mvndeps into the pom.xml
            self._logger.info("Writing maven build file: %spom.xml", resource["dirpath"])
            with open(resource["dirpath"] + "pom.xml", "w") as fpom:
                fpom.write(mvndeps)

        # we either had a pom.xml file in the archive or non-empty mvndeps from uploaded requirements, which we wrote as the pom.xml file
        # regardless, if there is a pom file, then resolve and copy maven dependencies
        if os.path.exists(resource["dirpath"] + "pom.xml"):
            cmdmvn = "mvn -Duser.home=/tmp -DskipTests -gs /opt/mfn/JavaRequestHandler/maven/sandbox-mvn-settings.xml -f " + resource["dirpath"]
            cmdmvn += " dependency:copy-dependencies -DoutputDirectory=" + resource["dirpath"] + "target/classes"

            self._logger.info("Copying maven dependencies for Java function: %s", resource["name"])
            error, _ = process_utils.run_command(cmdmvn, self._logger, wait_output=True)
            if error is not None:
                error = "Could not copy maven dependencies: " + resource["name"] + " " + error
                self._logger.error(error)
                return error
            self._logger.info("Finished copying dependencies for Java function: %s", resource["name"])

        if should_compile:
            cmdjavac = "javac -classpath /opt/mfn/JavaRequestHandler/mfnapi.jar:"
            cmdjavac += resource["dirpath"] + "target/classes/* "
            cmdjavac += "-d " +  resource["dirpath"] + "target/classes " + source_files

            self._logger.info("Compiling Java function resources: %s", resource["name"])
            self._logger.info(cmdjavac)
            error, _ = process_utils.run_command(cmdjavac, self._logger, wait_output=True)
            if error is not None:
                error = "Could not compile resource: " + resource["name"] + " " + error
                self._logger.error(error)
                return error
            self._logger.info("Finished compiling Java function resources: %s", resource["name"])

        return error