Beispiel #1
0
class Deployment:

    def __init__(self, deployment_info, hostname, userid, sandboxid, workflowid, workflowname, queue, datalayer, logger, external_endpoint, internal_endpoint, management_endpoints):
        self._logger = logger
        self._deployment_info = deployment_info
        self._hostname = hostname
        self._userid = userid
        self._sandboxid = sandboxid
        self._workflowid = workflowid
        self._workflowname = workflowname
        self._queue = queue
        self._datalayer = datalayer
        self._external_endpoint = external_endpoint
        self._internal_endpoint = internal_endpoint
        self._management_endpoints = management_endpoints

        self._python_version = sys.version_info

        self._storage_userid = self._userid.replace("@", "AT")
        self._storage_userid = self._storage_userid.replace("-", "_").replace(".", "_")

        self._process_id = os.getpid()

        self._functionworker_process_map = {}
        self._javarequesthandler_process_list = []
        self._queue_service_process = None
        self._frontend_process = None
        self._fluentbit_process = None
        # it will be probably updated to be something else
        self._fluentbit_actual_pid = -1

        self._child_process_command_args_map = {}

        # to be declared later when parsing the deployment info
        self._workflow = None

        self._global_data_layer_client = DataLayerClient(locality=1, suid=self._storage_userid, connect=self._datalayer)

        self._local_queue_client = None

    def get_workflow(self):
        return self._workflow

    def set_child_process(self, which, process, command_args_map):
        pid = process.pid
        if which == "qs":
            self._queue_service_process = process
        elif which == "fe":
            self._frontend_process = process
        elif which == "fb":
            self._fluentbit_process = process
            output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
            fbpid = int(output.strip())
            self._fluentbit_actual_pid = fbpid
            pid = fbpid

        # store command and args
        self._child_process_command_args_map[pid] = command_args_map

    def get_all_children_pids(self):
        children_pids = []
        for state in self._functionworker_process_map:
            p = self._functionworker_process_map[state]
            children_pids.append(p.pid)

        for jrhp in self._javarequesthandler_process_list:
            children_pids.append(jrhp.pid)

        children_pids.append(self._queue_service_process.pid)
        children_pids.append(self._frontend_process.pid)

        # looks like this pid does not match the actual process; perhaps because it also spawns another process?
        #children_pids.append(self._fluentbit_process.pid)
        ## find actual fluentbit pid
        output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
        fbpid = int(output.strip())
        self._fluentbit_actual_pid = fbpid
        children_pids.append(fbpid)

        return children_pids

    def check_child_process(self):
        pid, status = os.waitpid(-1, os.WNOHANG|os.WUNTRACED|os.WCONTINUED)
        failed_process_name = ""
        if os.WIFCONTINUED(status) or os.WIFSTOPPED(status):
            return False, _
        if os.WIFSIGNALED(status) or os.WIFEXITED(status):
            self._logger.error("Process with pid: " + str(pid) + " stopped.")
            if pid == self._fluentbit_actual_pid:
                failed_process_name = "Fluent-bit"
            elif pid == self._queue_service_process.pid:
                failed_process_name = "Queue service"
            elif pid == self._frontend_process.pid:
                failed_process_name = "Frontend"
            else:
                for jrhp in self._javarequesthandler_process_list:
                    if pid == jrhp.pid:
                        failed_process_name = "Java request handler"
                        break
                for state_name in self._functionworker_process_map:
                    process = self._functionworker_process_map[state_name]
                    if pid == process.pid:
                        failed_process_name = "Function worker (" + state_name + ")"
                        del self._functionworker_process_map[state_name]
                        break

            self._logger.error("Failed process name: " + failed_process_name)

        if os.path.exists('/var/run/secrets/kubernetes.io'):
            return True, pid, failed_process_name
        else:
            # TODO: try to relaunch some of the processes (FWs, fluentbit, frontend)
            self._logger.info(self._child_process_command_args_map[pid])
            return True, pid, failed_process_name

    def shutdown(self):
        shutdown_message = {}
        shutdown_message["action"] = "stop"

        lqcm_shutdown = LocalQueueClientMessage(key="0l", value=json.dumps(shutdown_message))

        workflow_nodes = self._workflow.getWorkflowNodeMap()
        for function_topic in workflow_nodes:
            ack = self._local_queue_client.addMessage(function_topic, lqcm_shutdown, True)
            while not ack:
                ack = self._local_queue_client.addMessage(function_topic, lqcm_shutdown, True)

        self._logger.info("Waiting for function workers to shutdown")
        self._wait_for_child_processes()

        for jrh_process in self._javarequesthandler_process_list:
            process_utils.terminate_and_wait_child(jrh_process, "JavaRequestHandler", 5, self._logger)

        self._local_queue_client.shutdown()

    def force_shutdown(self):
        # called when the queue service has crashed and we need to shut down the function workers
        for state in self._functionworker_process_map:
            p = self._functionworker_process_map[state]
            process_utils.terminate_and_wait_child(p, "FunctionWorker", 5, self._logger)

        for jrh_process in self._javarequesthandler_process_list:
            process_utils.terminate_and_wait_child(jrh_process, "JavaRequestHandler", 5, self._logger)

        self._local_queue_client.shutdown()

    def _wait_for_child_processes(self):
        output, error = process_utils.run_command_return_output('pgrep -P ' + str(self._process_id), self._logger)
        if error is not None:
            self._logger.error("[SandboxAgent] wait_for_child_processes: Failed to get children process ids: %s", str(error))
            return

        children_pids = set(output.split())
        self._logger.info("[SandboxAgent] wait_for_child_processes: Parent pid: %s, Children pids: %s", str(self._process_id), str(children_pids))

        for jrh_process in self._javarequesthandler_process_list:
            if str(jrh_process.pid) in children_pids:
                children_pids.remove(str(jrh_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on JavaRequestHandler pid: %s", str(jrh_process.pid))

        ## find fluentbit PID
        output, error = process_utils.run_command_return_output('ps --no-headers -o pid -C fluent-bit', self._logger)
        fbpid = output.strip()
        if fbpid in children_pids:
            children_pids.remove(fbpid)
            self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on fluent-bit pid: %s", fbpid)

        if self._queue_service_process is not None:
            if str(self._queue_service_process.pid) in children_pids:
                children_pids.remove(str(self._queue_service_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on queue service pid: %s", str(self._queue_service_process.pid))

        if self._frontend_process is not None:
            if str(self._frontend_process.pid) in children_pids:
                children_pids.remove(str(self._frontend_process.pid))
                self._logger.info("[SandboxAgent] wait_for_child_processes: Not waiting on frontend pid: %s", str(self._frontend_process.pid))

        if not children_pids:
            self._logger.info("[SandboxAgent] wait_for_child_processes: No remaining pids to wait for")
            return

        while True:
            try:
                cpid, status = os.waitpid(-1, 0)
                self._logger.info("[SandboxAgent] wait_for_child_processes: Status changed for pid: %s, Status: %s", str(cpid), str(status))
                if str(cpid) not in children_pids:
                    #print('wait_for_child_processes: ' + str(cpid) + "Not found in children_pids")
                    continue
                children_pids.remove(str(cpid))
                if not children_pids:
                    self._logger.info("[SandboxAgent] wait_for_child_processes: No remaining pids to wait for")
                    break
            except Exception as exc:
                self._logger.error('[SandboxAgent] wait_for_child_processes: %s', str(exc))

    def _start_python_function_worker(self, worker_params, env_var_list):
        error = None
        function_name = worker_params["fname"]
        state_name = worker_params["functionstatename"]
        custom_env = os.environ.copy()
        old_ld_library_path = ""
        if "LD_LIBRARY_PATH" in custom_env:
            old_ld_library_path = custom_env["LD_LIBRARY_PATH"]
        custom_env["LD_LIBRARY_PATH"] = "/opt/mfn/workflow/states/" + state_name + "/" + function_name + ":/opt/mfn/workflow/states/" + state_name + "/" + function_name + "/lib"

        if old_ld_library_path != "":
            custom_env["LD_LIBRARY_PATH"] = custom_env["LD_LIBRARY_PATH"] + ":" + old_ld_library_path

        #custom_env["PYTHONPATH"] = "/opt/mfn/workflow/states/" + state_name + "/" + function_name

        for env_var in env_var_list:
            idx = env_var.find("=")
            if idx == -1:
                continue
            env_var_key = env_var[0:idx]
            env_var_value = env_var[idx+1:]
            custom_env[env_var_key] = env_var_value

        #self._logger.info("environment variables (after user env vars): %s", str(custom_env))

        if self._python_version >= (3, ):
            cmd = "python3 "
        else:
            cmd = "python "
        cmd = cmd + "/opt/mfn/FunctionWorker/python/FunctionWorker.py"
        cmd = cmd + " " + '\"/opt/mfn/workflow/states/%s/worker_params.json\"' % state_name # state_name can contain whitespace

        filename = '/opt/mfn/logs/function_' + state_name + '.log'
        log_handle = open(filename, 'a')

        # store command arguments for when/if we need to restart the process if it fails
        command_args_map = {}
        command_args_map["command"] = cmd
        command_args_map["custom_env"] = custom_env
        command_args_map["log_filename"] = filename

        #self._logger.info("Starting function worker: " + state_name + "  with stdout/stderr redirected to: " + filename)
        error, process = process_utils.run_command(cmd, self._logger, custom_env=custom_env, process_log_handle=log_handle)
        if error is None:
            self._functionworker_process_map[state_name] = process
            self._child_process_command_args_map[process.pid] = command_args_map
            self._logger.info("Started function worker: %s, pid: %s, with stdout/stderr redirected to: %s", state_name, str(process.pid), filename)
        return error

    def _start_function_worker(self, worker_params, runtime, env_var_list):
        error = None

        if runtime.find("python") != -1:
            error = self._start_python_function_worker(worker_params, env_var_list)
        elif runtime.find("java") != -1:
            # TODO: environment/JVM variables need to be utilized by the java request handler, not by the function worker

            if SINGLE_JVM_FOR_FUNCTIONS:
                # _XXX_: we'll launch the single JVM handling all java functions later
                error = self._start_python_function_worker(worker_params, env_var_list)
            else:
                # if jar, the contents have already been extracted as if it was a zip archive
                # start the java request handler if self._function_runtime == "java"
                # we wrote the parameters to json file at the state directory
                self._logger.info("Launching JavaRequestHandler for state: %s", worker_params["functionstatename"])
                cmdjavahandler = "java -jar /opt/mfn/JavaRequestHandler/target/javaworker.jar "
                cmdjavahandler += "/opt/mfn/workflow/states/" + worker_params["functionstatename"] + "/java_worker_params.json"

                error, process = process_utils.run_command(cmdjavahandler, self._logger, wait_until="Waiting for requests on:")
                if error is not None:
                    error = "Could not launch JavaRequestHandler: " + worker_params["fname"] + " " + error
                    self._logger.error(error)
                else:
                    self._javarequesthandler_process_list.append(process)
                    error = self._start_python_function_worker(worker_params, env_var_list)
        else:
            error = "Unsupported function runtime: " + runtime

        return error

    def _prepare_update_for_locally_running(self, local_functions):
        update = {}
        update["action"] = "update-local-functions"
        update["localFunctions"] = local_functions
        update = json.dumps(update)

        lqcm_update = LocalQueueClientMessage(key="0l", value=update)

        return lqcm_update

    def _update_function_worker(self, topic, lqcm_update):
        ack = self._local_queue_client.addMessage(topic, lqcm_update, True)
        while not ack:
            ack = self._local_queue_client.addMessage(topic, lqcm_update, True)

    def _update_remaining_function_workers(self, excluded_function_topic, lqcm_update=None):
        local_functions = self._workflow.getWorkflowLocalFunctions()
        if lqcm_update is None:
            lqcm_update = self._prepare_update_for_locally_running(local_functions)

        for locally_running_ft in local_functions:
            if locally_running_ft == excluded_function_topic:
                continue
            self._update_function_worker(locally_running_ft, lqcm_update)

    def stop_function_worker(self, function_topic):
        # remove from locally running functions
        self._workflow.removeLocalFunction(function_topic)

        # first, update locally running functions with remaining functions
        self._update_remaining_function_workers(function_topic)

        # send stop message to function worker's queue
        stop = {}
        stop["action"] = "stop"
        stop = json.dumps(stop)
        lqcm_update = LocalQueueClientMessage(key="0l", value=stop)
        self._update_function_worker(function_topic, lqcm_update)

    def _install_sandbox_requirements(self, parameters):
        error = None
        installer = parameters["installer"]
        requirements = parameters["requirements"]
        additional_installer_options = {}
        if "additional_installer_options" in parameters:
            additional_installer_options = parameters["additional_installer_options"]

        if requirements:
            # TODO: other installers (e.g., apt-get)?
            if installer == "pip":
                # launch 'pip install' with any parameters related to proxy etc.
                # store requirements into /opt/mfn/requirements.txt
                reqfname = "/opt/mfn/requirements.txt"
                with open(reqfname, "w+") as reqf:
                    for req in requirements:
                        reqf.write(req + "\n")

                # modify command to add additional installer options
                if self._python_version >= (3, ):
                    cmd = "python3 "
                else:
                    cmd = "python "
                cmd = cmd + "-m pip install --user"
                cmd += " --no-compile --no-clean"
                for opt in additional_installer_options:
                    cmd = cmd + " " + opt + " " + additional_installer_options[opt]

                cmd = cmd + " -r " + reqfname

                # launch 'pip install [additional_options] -r /opt/mfn/requirements.txt
                error, _ = process_utils.run_command(cmd, self._logger, wait_output=True)

            else:
                error = "Unsupported installer: " + installer

        return error

    def _retrieve_and_store_function_code(self, resource_name, resource_info):
        error = None

        rpath = "/opt/mfn/code/resources/" + resource_name + "/"
        fpath = rpath + resource_name

        if resource_info["runtime"].find("python") != -1:
            fpath = fpath + ".py"
        elif resource_info["runtime"].find("java") != -1:
            fpath = fpath + ".java"
        else:
            error = "Unsupported runtime: " + resource_info["runtime"]
            return (error, None)

        if not os.path.exists(os.path.dirname(fpath)):
            try:
                os.makedirs(os.path.dirname(fpath))
            except OSError as err:
                if err.errno != os.errno.EEXIST:
                    error = err
                    return (error, None)

        resource_code = self._global_data_layer_client.get(resource_info["ref"])

        if resource_code is None:
            error = "Empty function code."
            return (error, None)

        try:
            resource_code = base64.b64decode(resource_code).decode()
        except Exception as exc:
            error = "Invalid value for code: " + str(exc)
            self._logger.error(error)
            return (error, None)

        with open(fpath, "w") as funcf:
            funcf.write(resource_code)

        return (error, rpath)

    def _retrieve_and_store_function_zip(self, resource_name, resource_info):
        error = None

        zipref = resource_info["ref"]
        num_chunks_str = self._global_data_layer_client.get(zipref)

        try:
            num_chunks = int(num_chunks_str)
        except Exception as exc:
            error = "Invalid value for key " + zipref + "; expected number of chunks: " + str(exc)
            self._logger.error(error)
            return (error, None)

        zip_content = ""
        ind = zipref.find("num_chunks_")
        gid = zipref[ind+11:]
        pref = zipref[0:ind] + gid + "_chunk_"
        for i in range(num_chunks):
            chunkref = pref + str(i)
            chunk = self._global_data_layer_client.get(chunkref)
            if chunk is None:
                error = "Empty zip chunk."
                return (error, None)

            zip_content = zip_content + chunk

        old_len = len(zip_content)
        rem = old_len % 4
        if rem > 0:
            num_pad = 4 - rem
            for i in range(num_pad):
                zip_content = zip_content + "="

        try:
            decodedzip = base64.b64decode(zip_content)
        except Exception as exc:
            error = "Invalid value for assembled chunks; couldn't decode base64: " + str(exc)
            self._logger.error(error)
            return (error, None)

        runtime = resource_info["runtime"]

        # 1. store zip file
        zipfname = "/opt/mfn/code/zips/" + resource_name + ".zip"
        if not os.path.exists(os.path.dirname(zipfname)):
            try:
                os.makedirs(os.path.dirname(zipfname))
            except OSError as err:
                if err.errno != os.errno.EEXIST:
                    error = err
                    return (error, None)

        with open(zipfname, "wb") as zipfile:
            zipfile.write(decodedzip)

        gextractedpath = "/opt/mfn/code/resources/" + resource_name + "/"
        # 2. extract zip file
        if not os.path.exists(os.path.dirname(gextractedpath)):
            try:
                os.makedirs(os.path.dirname(gextractedpath))
            except OSError as err:
                if err.errno != os.errno.EEXIST:
                    error = err
                    return (error, None)

        cmdunzip = "unzip " + zipfname + " -d " + gextractedpath
        error, _ = process_utils.run_command(cmdunzip, self._logger, wait_output=True)

        if error is not None:
            error = "Could not extract zip file: " + resource_name + " " + error
            self._logger.error(error)
            return (error, None)

        # 3. need to set executable permissions for the extracted libs
        cmdperm = "sh -c \"find " + gextractedpath + "| xargs -I {} file {}"
        cmdperm = cmdperm + "| grep ELF" + "| grep -v grep"
        cmdperm = cmdperm + "| awk -F ':' '{print $1}'"
        cmdperm = cmdperm + "| xargs -I {} chmod +x {}\""

        error, _ = process_utils.run_command(cmdperm, self._logger, wait_output=True)

        if error is not None:
            error = "Could not set lib permissions: " + resource_name + " " + error
            self._logger.error(error)
            return (error, None)

        if runtime.find("python") != -1:
            fpath = gextractedpath + resource_name
            fpath = fpath + ".py"

            resource_code = self._global_data_layer_client.get("grain_source_" + resource_info["id"])
            if resource_code is not None or resource_code != "":
                try:
                    resource_code = base64.b64decode(resource_code).decode()
                except Exception as exc:
                    error = "Invalid value for function code: " + str(exc)
                    self._logger.error(error)
                    return (error, None)

                self._logger.info("Overwriting zip resource file with the updated resource code...")
                with open(fpath, "w") as funcf:
                    funcf.write(resource_code)

        elif runtime.find("java") != -1:
            # TODO: try to retrieve the updated resource?
            # To do that, we'd need to know the actual state name (i.e., in the workflow description),
            # which (for now) has to be the same as the Java class.
            # This class name can differ from the resource name
            # (e.g., one jar containing multiple classes with handle functions, such that each function is used as a separate state)
            # that means, we'd need to do the code update just at the beginning of when we create the state and also the compilation,
            # but before copying the resource to each state's separate location
            # TODO: double check whether this is also the case for python
            pass

        else:
            error = "Unsupported runtime: " + resource_info["runtime"]
            return (error, None)

        return (error, gextractedpath)

    def _initialize_data_layer_storage(self):
        # each data layer client will automatically create the local keyspace and tables
        # upon instantiation

        # mfn internal tables
        local_dlc = DataLayerClient(locality=0, for_mfn=True, sid=self._sandboxid, wid=self._workflowid, connect=self._datalayer, init_tables=True)
        local_dlc.shutdown()

        # user storage tables
        local_dlc = DataLayerClient(locality=0, suid=self._storage_userid, connect=self._datalayer, init_tables=True)
        local_dlc.shutdown()

        # workflow private tables
        local_dlc = DataLayerClient(locality=0, is_wf_private=True, sid=self._sandboxid, wid=self._workflowid, connect=self._datalayer, init_tables=True)
        local_dlc.shutdown()

        # for global access, (re)create; it's okay because the operations are idempotent
        # user storage is created by management service
        # mfn internal tables
        global_dlc = DataLayerClient(locality=1, for_mfn=True, sid=self._sandboxid, wid=self._workflowid, connect=self._datalayer, init_tables=True)
        global_dlc.shutdown()

        # workflow private tables
        global_dlc = DataLayerClient(locality=1, is_wf_private=True, sid=self._sandboxid, wid=self._workflowid, connect=self._datalayer, init_tables=True)
        global_dlc.shutdown()

    def _populate_worker_params(self, function_topic, wf_node, state):
        worker_params = {}
        worker_params["userid"] = self._userid
        worker_params["storageuserid"] = self._storage_userid
        worker_params["sandboxid"] = self._sandboxid
        worker_params["workflowid"] = self._workflowid
        worker_params["workflowname"] = self._workflowname
        worker_params["ffolder"] = state["resource_dirpath"]
        worker_params["fpath"] = state["resource_filepath"]
        worker_params["fname"] = state["resource_filename"]
        worker_params["fruntime"] = state["resource_runtime"]
        worker_params["ftopic"] = function_topic
        worker_params["hostname"] = self._hostname
        worker_params["queue"] = self._queue
        worker_params["datalayer"] = self._datalayer
        worker_params["externalendpoint"] = self._external_endpoint
        worker_params["internalendpoint"] = self._internal_endpoint
        worker_params["managementendpoints"] = self._management_endpoints
        worker_params["fnext"] = wf_node.getNextMap()
        worker_params["fpotnext"] = wf_node.getPotentialNextMap()
        worker_params["functionstatetype"] = wf_node.getGWFType()
        worker_params["functionstatename"] = wf_node.getGWFStateName()
        worker_params["functionstateinfo"] = wf_node.getGWFStateInfo()
        worker_params["workflowfunctionlist"] = self._workflow.getWorkflowFunctionMap()
        worker_params["workflowexit"] = self._workflow.getWorkflowExitPoint()
        worker_params["sessionworkflow"] = self._workflow.is_session_workflow()
        worker_params["sessionfunction"] = wf_node.is_session_function()
        worker_params["sessionfunctionparameters"] = wf_node.get_session_function_parameters()
        worker_params["shouldcheckpoint"] = self._workflow.are_checkpoints_enabled()

        return worker_params

    def _compile_java_resources_if_necessary(self, resource, mvndeps):
        error = None

        cmdmkdir = "mkdir -p " + resource["dirpath"] + "target/classes"

        self._logger.info("Preparing for compilation of Java function resources: %s", resource["name"])
        error, _ = process_utils.run_command(cmdmkdir, self._logger, wait_output=True)
        if error is not None:
            error = "Could not create target directory for resource: " + resource["name"] + " " + error
            self._logger.error(error)
            return error

        #cmdjavac = "javac -classpath /opt/mfn/JavaRequestHandler/mfnapi.jar -d " + resource["dirpath"] + "target/classes "
        #cmdjavac += resource["dirpath"] + resource["name"] + ".java"

        cmdfind = "find " + resource["dirpath"] + " -name *.java"
        output, error = process_utils.run_command_return_output(cmdfind, self._logger)
        if error is not None:
            self._logger.error("[SandboxAgent] could not search for any Java sources: %s", str(error))
            error = "Could not search for any Java sources: " + resource["name"] + " " + str(error)
            return error
        source_files = set(output.split("\n"))
        source_files = ' '.join(source_files).strip()
        should_compile = False
        if source_files != "":
            should_compile = True
            self._logger.info("Found following Java sources: %s", str(source_files))
        else:
            self._logger.info("No java sources to compile.")

        # 2. check for pom.xml or the requirements; if it is there, then:
        if mvndeps is not None and not os.path.exists(resource["dirpath"] + "pom.xml"):
            # write the content of mvndeps into the pom.xml
            self._logger.info("Writing maven build file: %spom.xml", resource["dirpath"])
            with open(resource["dirpath"] + "pom.xml", "w") as fpom:
                fpom.write(mvndeps)

        # we either had a pom.xml file in the archive or non-empty mvndeps from uploaded requirements, which we wrote as the pom.xml file
        # regardless, if there is a pom file, then resolve and copy maven dependencies
        if os.path.exists(resource["dirpath"] + "pom.xml"):
            cmdmvn = "mvn -Duser.home=/tmp -DskipTests -gs /opt/mfn/JavaRequestHandler/maven/sandbox-mvn-settings.xml -f " + resource["dirpath"]
            cmdmvn += " dependency:copy-dependencies -DoutputDirectory=" + resource["dirpath"] + "target/classes"

            self._logger.info("Copying maven dependencies for Java function: %s", resource["name"])
            error, _ = process_utils.run_command(cmdmvn, self._logger, wait_output=True)
            if error is not None:
                error = "Could not copy maven dependencies: " + resource["name"] + " " + error
                self._logger.error(error)
                return error
            self._logger.info("Finished copying dependencies for Java function: %s", resource["name"])

        if should_compile:
            cmdjavac = "javac -classpath /opt/mfn/JavaRequestHandler/mfnapi.jar:"
            cmdjavac += resource["dirpath"] + "target/classes/* "
            cmdjavac += "-d " +  resource["dirpath"] + "target/classes " + source_files

            self._logger.info("Compiling Java function resources: %s", resource["name"])
            self._logger.info(cmdjavac)
            error, _ = process_utils.run_command(cmdjavac, self._logger, wait_output=True)
            if error is not None:
                error = "Could not compile resource: " + resource["name"] + " " + error
                self._logger.error(error)
                return error
            self._logger.info("Finished compiling Java function resources: %s", resource["name"])

        return error

    def process_deployment_info(self):
        has_error = False
        errmsg = ""

        if self._deployment_info is not None and self._deployment_info != "":
            try:
                self._deployment_info = json.loads(self._deployment_info)
                self._logger.debug("Deployment info: %s", json.dumps(self._deployment_info))
            except Exception as exc:
                errmsg = "Could not parse deployment info: " + str(exc)
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg
        else:
            errmsg = "Empty deployment info."
            has_error = True
            return has_error, errmsg

        if "workflow" not in self._deployment_info or "resources" not in self._deployment_info:
            errmsg = "Incomplete deployment info: " + json.dumps(self._deployment_info)
            self._logger.error(errmsg)
            has_error = True
            return has_error, errmsg

        # get workflow info
        workflow_info = self._deployment_info["workflow"]
        sid = workflow_info["sandboxId"]
        if sid != self._sandboxid:
            warnmsg = "WARN: workflow info sandboxid doesn't match provided sandboxid ("+sid+" <-> "+workflow_info["sandboxId"]+")"
            self._logger.info(warnmsg)
        wid = workflow_info["workflowId"]
        if wid != self._workflowid:
            warnmsg = "WARN: workflow info workflowid doesn't match provided workflowid ("+wid+" <-> "+workflow_info["workflowId"]+")"
            print(warnmsg)
        wf_type = workflow_info["workflowType"]

        usertoken = ''
        if "usertoken" in workflow_info:
            usertoken = workflow_info["usertoken"]
        os.environ["USERTOKEN"] = usertoken

        # get workflow json, parse workflow json and init params
        workflow_json = self._global_data_layer_client.get(workflow_info["json_ref"])
        if workflow_json is None or workflow_json == "":
            has_error = True
            errmsg = "Empty workflow description."
            return has_error, errmsg

        try:
            workflow_json = base64.b64decode(workflow_json).decode()
        except Exception as exc:
            has_error = True
            errmsg = "Invalid value for workflow json: " + str(exc)
            return has_error, errmsg

        self._workflow = Workflow(self._userid, sid, wid, wf_type, workflow_json, self._logger)

        has_error = self._workflow.has_error()
        if has_error:
            errmsg = "Problem in workflow description: " + str(workflow_json)
            self._logger.error(errmsg)
            return has_error, errmsg

        # get workflow nodes
        workflow_nodes = self._workflow.getWorkflowNodeMap()

        # get resources info and find functions
        resource_map = {}
        resource_info_map = self._deployment_info["resources"]

        if any(resource_info_map[res_name]["runtime"] == "Java" for res_name in resource_info_map):
            # run setup_maven.sh to update the proxy settings at runtime
            # (i.e., the sandbox image may have been built on a machine with a proxy, or vice versa)
            cmd_maven_proxy_initer = "/opt/mfn/JavaRequestHandler/./setup_maven.sh"
            self._logger.info("Updating maven proxy settings...")
            error, _ = process_utils.run_command(cmd_maven_proxy_initer, self._logger, wait_output=True)
            if error is not None:
                has_error = True
                errmsg = "Could not reinitialize maven proxy settings: " + error
                return has_error, errmsg
            self._logger.info("Finished updating maven proxy settings.")

        # for pip installable dependencies for python functions
        req_map = {}
        t_start_download = time.time()
        # store functions in local filesystem
        for resource_name in resource_info_map:
            resource_info = resource_info_map[resource_name]
            resource_info["runtime"] = resource_info["runtime"].lower()

            if resource_info["type"] == "code":
                error, resource_dirpath = self._retrieve_and_store_function_code(resource_name, resource_info)
            else:
                error, resource_dirpath = self._retrieve_and_store_function_zip(resource_name, resource_info)

            if error is not None:
                errmsg = "Could not retrieve and store function: " + resource_name + " " + error
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            # these requirements can now be also for java maven dependencies
            resource_id = resource_info["id"]
            greq = self._global_data_layer_client.get("grain_requirements_" + resource_id)
            mvndeps = None
            if greq is not None and greq != "":
                greq = base64.b64decode(greq).decode()
                if resource_info["runtime"].find("python") == 0:
                    # get function requirements and put it into a map
                    lines = greq.strip().split("\n")
                    for line in lines:
                        req_map[line] = True
                elif resource_info["runtime"].find("java") == 0:
                    mvndeps = greq

            # get function environment variables
            env_var_list = []
            genv = self._global_data_layer_client.get("grain_environment_variables_" + resource_id)
            if genv is not None and genv != "":
                genv = base64.b64decode(genv).decode()
                lines = genv.split("\n")
                env_var_list = lines

            resource = {}
            resource["name"] = resource_name
            resource["dirpath"] = resource_dirpath
            resource["runtime"] = resource_info["runtime"]
            resource["env_var_list"] = env_var_list
            resource_map[resource_name] = resource

            # compile the java sources
            if resource["runtime"].find("java") == 0:
                # even if it was just a single java file
                # or a jar file uploaded with source files
                # or a jar file with just class files,
                # the following function will
                # 1. download maven dependencies (if there is a pom.xml in the jar or was separately uploaded)
                # 2. compile the source files if any
                error = self._compile_java_resources_if_necessary(resource, mvndeps)

                if error is not None:
                    errmsg = "Could not compile Java function resources: " + resource_name + " " + error
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg

        total_time_download = (time.time() - t_start_download) * 1000.0
        self._logger.info("Download time for all function code: %s (ms)", str(total_time_download))

        t_start_requirements = time.time()
        # this list will only contain pip installable dependencies
        # java maven dependencies will be handled while compiling the java resources
        sbox_req_list = []
        for req_line in req_map:
            sbox_req_list.append(req_line)

        # install sandbox requirements
        req = workflow_info["sandbox_requirements"]
        req["requirements"] = sbox_req_list
        error = self._install_sandbox_requirements(req)
        if error is not None:
            errmsg = "Could not install sandbox requirements. " + str(error)
            self._logger.error(errmsg)
            has_error = True
            return has_error, errmsg

        total_time_requirements = (time.time() - t_start_requirements) * 1000.0
        self._logger.info("Requirements install time: %s (ms)", str(total_time_requirements))

        t_start_storage = time.time()
        # initialize local data layer space for user and workflow
        self._initialize_data_layer_storage()
        total_time_storage = (time.time() - t_start_storage) * 1000.0
        self._logger.info("Storage initialization time: %s (ms)", str(total_time_storage))

        self._local_queue_client = LocalQueueClient(connect=self._queue)

        self._local_queue_client.addTopic(self._workflow.getWorkflowExitTopic())

        t_start_launch = time.time()
        # accummulate all java worker params into one
        # later, we'll launch a single JVM to handle all java functions
        if SINGLE_JVM_FOR_FUNCTIONS:
            single_jvm_worker_params = {}
            any_java_function = False

        total_time_state = 0.0
        for function_topic in workflow_nodes:
            wf_node = workflow_nodes[function_topic]
            resource_name = wf_node.get_resource_name()

            t_start_state = time.time()
            if resource_name == "":
                # this is an ASL state without a resource (i.e., function) attached to it
                error, resource = state_utils.create_dummy_resource_for_asl_state(wf_node)
                if error is not None:
                    errmsg = "Could not create non-resource state. " + str(error)
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg
            else:
                resource = resource_map[resource_name]

            error, state = state_utils.create_state(wf_node, resource, self._logger)
            if error is not None:
                errmsg = "Could not create state: " + str(error)
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            total_time_state += (time.time() - t_start_state) * 1000.0

            self._local_queue_client.addTopic(function_topic)

            # compile worker parameters
            worker_params = self._populate_worker_params(function_topic, wf_node, state)
            # store worker parameters as a local file
            params_filename = state["dirpath"] + "worker_params.json"

            with open(params_filename, "w") as paramsf:
                json.dump(worker_params, paramsf, indent=4)

            if state["resource_runtime"].find("java") != -1:
                java_worker_params = {}
                java_worker_params["functionPath"] = worker_params["ffolder"]
                java_worker_params["functionName"] = worker_params["fname"]
                java_worker_params["serverSocketFilename"] = "/tmp/java_handler_" + worker_params["functionstatename"] + ".uds"

                if SINGLE_JVM_FOR_FUNCTIONS:
                    any_java_function = True
                    single_jvm_worker_params[worker_params["functionstatename"]] = java_worker_params
                else:
                    java_params_filename = state["dirpath"] + "java_worker_params.json"
                    with open(java_params_filename, "w") as javaparamsf:
                        json.dump(java_worker_params, javaparamsf, indent=4)

            # launch function workers with the params parsed from workflow info
            error = self._start_function_worker(worker_params, state["resource_runtime"], state["resource_env_var_list"])

            if error is not None:
                errmsg = "Problem launching function worker for: " + worker_params["fname"]
                self._logger.error(errmsg)
                has_error = True
                return has_error, errmsg

            # add the new function worker to the local list
            self._workflow.addLocalFunction(function_topic)

        # all function workers have been launched; update them with locally running functions
        # prepare update message to be used by all
        local_functions = self._workflow.getWorkflowLocalFunctions()
        lqcm_update = self._prepare_update_for_locally_running(local_functions)
        for function_topic in workflow_nodes:
            self._update_function_worker(function_topic, lqcm_update)

        if SINGLE_JVM_FOR_FUNCTIONS:
            if any_java_function:
                single_jvm_params_filename = "/opt/mfn/workflow/states/single_jvm_worker_params.json"
                with open(single_jvm_params_filename, "w") as jvmparamsf:
                    json.dump(single_jvm_worker_params, jvmparamsf, indent=4)

                self._logger.info("Launching a single JavaRequestHandler for all Java states...")
                cmdjavahandler = "java -jar /opt/mfn/JavaRequestHandler/target/javaworker.jar "
                cmdjavahandler += single_jvm_params_filename

                error, process = process_utils.run_command(cmdjavahandler, self._logger, wait_until="Waiting for requests on:")
                if error is not None:
                    errmsg = "Problem launching JavaRequestHandler for Java states: " + error
                    self._logger.error(errmsg)
                    has_error = True
                    return has_error, errmsg
                else:
                    self._javarequesthandler_process_list.append(process)

        self._logger.info("State creation for all function workers: %s (ms)", str(total_time_state))

        total_time_launch = (time.time() - t_start_launch) * 1000.0
        self._logger.info("Launch time for all function workers: %s (ms)", str(total_time_launch))

        if not has_error:
            # check whether all function workers have launched successfully
            # give some time for function workers to come up
            cmd = "pgrep -P " + str(self._process_id) + " -a"
            output, error = process_utils.run_command_return_output(cmd, self._logger)
            if error is not None:
                self._logger.error("[SandboxAgent] check health of function workers: failed to get FunctionWorker processes: %s", str(error))
                has_error = True
                errmsg = "Could not get FunctionWorker processes."

        if not has_error:
            fwlines = set(output.split("\n"))
            fwpids = []
            for line in fwlines:
                if "FunctionWorker.py" in line:
                    pid = line.split(" ")[0]
                    fwpids.append(pid)

            if str(self._fluentbit_process.pid) in fwpids:
                fwpids.remove(str(self._fluentbit_process.pid))

            self._logger.info(str(len(fwpids)) + " " + str(len(self._functionworker_process_map)))
            #self._logger.info(str(fwpids) + " " + str(self._functionworker_process_map))

            if len(fwpids) != len(self._functionworker_process_map):
                has_error = True
                errmsg = "One or more function workers could not be launched:\n"

                for state_name in self._functionworker_process_map:
                    fwp = self._functionworker_process_map[state_name]
                    if fwp.pid not in fwpids:
                        errmsg += state_name + "\n"

        self._global_data_layer_client.shutdown()

        return has_error, errmsg