def compss_worker(tracing, task_id, storage_conf, params): """ Worker main method (invocated from __main__). :param tracing: Tracing boolean :param task_id: Task identifier :param storage_conf: Storage configuration file :param params: Parameters following the common order of the workers :return: Exit code """ if __debug__: logger = logging.getLogger('pycompss.worker.gat.worker') logger.debug("Starting Worker") # Set the binding in worker mode import pycompss.util.context as context context.set_pycompss_context(context.WORKER) result = execute_task("Task " + task_id, storage_conf, params, tracing, logger, None) exit_code, new_types, new_values, timed_out, except_msg = result if __debug__: logger.debug("Finishing Worker") return exit_code
def compss_worker(tracing, task_id, storage_conf, params, log_json): # type: (bool, str, str, list, str) -> int """ Worker main method (invoked from __main__). :param tracing: Tracing boolean :param task_id: Task identifier :param storage_conf: Storage configuration file :param params: Parameters following the common order of the workers :param log_json: Logger configuration file. :return: Exit code """ if __debug__: logger = logging.getLogger('pycompss.worker.gat.worker') logger.debug("Starting Worker") # Set the binding in worker mode import pycompss.util.context as context context.set_pycompss_context(context.WORKER) result = execute_task("".join( ("Task ", task_id)), storage_conf, params, tracing, logger, log_json, (), False, dict(), None, None) # Result contains: # exit_code, new_types, new_values, timed_out, except_msg = result exit_code, _, _, _, _ = result if __debug__: logger.debug("Finishing Worker") return exit_code
def process_task( current_line, # type: str process_name, # type: str logger, # type: ... log_json, # type: str logger_handlers, # type: ... logger_level, # type: int logger_formatter # type: ... ): # type: (...) -> (str, str) """ Process command received from the current_line. :param current_line: Current command (line) to process. :param process_name: Process name for logger messages. :param logger: Logger. :param log_json: Logger configuration file. :param logger_handlers: Logger handlers. :param logger_level: Logger level. :param logger_formatter: Logger formatter. :return: exit_value and message. """ # Process properties stdout = sys.stdout stderr = sys.stderr job_id = None if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Received message: %s" % (str(process_name), str(current_line))) current_line = current_line.split() if current_line[0] == EXECUTE_TASK_TAG: num_collection_params = int(current_line[-1]) if num_collection_params > 0: collections_layouts = dict() raw_layouts = current_line[((num_collection_params * -4) - 1):-1] for i in range(num_collection_params): param = raw_layouts[i * 4] layout = [ int(raw_layouts[(i * 4) + 1]), int(raw_layouts[(i * 4) + 2]), int(raw_layouts[(i * 4) + 3]) ] collections_layouts[param] = layout else: collections_layouts = None # Remove the last elements: cpu and gpu bindings and collection params current_line = current_line[0:-3] # task jobId command job_id = current_line[1] job_out = current_line[2] job_err = current_line[3] # current_line[4] = <boolean> = tracing # current_line[5] = <integer> = task id # current_line[6] = <boolean> = debug # current_line[7] = <string> = storage conf. # current_line[8] = <string> = operation type (e.g. METHOD) # current_line[9] = <string> = module # current_line[10]= <string> = method # current_line[11]= <string> = time out # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line[12 + #nodes] = <integer> = computing units # current_line[13 + #nodes] = <boolean> = has target # current_line[14 + #nodes] = <string> = has return (always 'null') # current_line[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug("[PYTHON EXECUTOR] [%s] - TASK CMD: %s" % (str(process_name), str(current_line))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) if __debug__: logger.debug("Received task in process: %s" % str(process_name)) logger.debug(" - TASK CMD: %s" % str(current_line)) try: # Setup out/err wrappers out = open(job_out, 'a') err = open(job_err, 'a') sys.stdout = out sys.stderr = err # Setup process environment cn = int(current_line[11]) cn_names = ','.join(current_line[12:12 + cn]) os.environ["COMPSS_NUM_NODES"] = str(cn) os.environ["COMPSS_HOSTNAMES"] = cn_names if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) # Execute task storage_conf = "null" tracing = False python_mpi = True result = execute_task(process_name, storage_conf, current_line[9:], tracing, logger, log_json, (job_out, job_err), python_mpi, collections_layouts) exit_value, new_types, new_values, time_out, except_msg = result # Restore out/err wrappers sys.stdout = stdout sys.stderr = stderr sys.stdout.flush() sys.stderr.flush() out.close() err.close() # To reduce if necessary: # global_exit_value = MPI.COMM_WORLD.reduce(exit_value, # op=MPI.SUM, # root=0) # message = "" # if MPI.COMM_WORLD.rank == 0 and global_exit_value == 0: if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message params = build_return_params_message(new_types, new_values) message = " ".join((END_TASK_TAG, str(job_id), str(exit_value), str(params) + "\n")) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg = except_msg.replace(" ", "_") message = " ".join((COMPSS_EXCEPTION_TAG, str(job_id), str(except_msg) + "\n")) if __debug__: logger.debug("%s - COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str(except_msg))) else: # elif MPI.COMM_WORLD.rank == 0 and global_exit_value != 0: # An exception has been raised in task message = " ".join( (END_TASK_TAG, str(job_id), str(exit_value) + "\n")) if __debug__: logger.debug("%s - END TASK MESSAGE: %s" % (str(process_name), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - 'null' if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) exit_value = 7 message = " ".join( (END_TASK_TAG, str(job_id), str(exit_value) + "\n")) # Clean environment variables if __debug__: logger.debug("Cleaning environment.") del os.environ['COMPSS_HOSTNAMES'] # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) for handler in logger_handlers: logger.addHandler(handler) if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Finished task with id: %s" % (str(process_name), str(job_id))) # return SUCCESS_SIG, # "{0} -- Task Ended Successfully!".format(str(process_name)) else: if __debug__: logger.debug("[PYTHON EXECUTOR] [%s] Unexpected message: %s" % (str(process_name), str(current_line))) exit_value = 7 message = " ".join((END_TASK_TAG, str(job_id), str(exit_value) + "\n")) return exit_value, message
def process_task(current_line, process_name, pipe, queue, tracing, logger, logger_handlers, logger_level, logger_formatter, storage_conf, storage_loggers, storage_loggers_handlers): """ Process command received from the runtime through a pipe. :param current_line: Current command (line) to process :param process_name: Process name for logger messages :param pipe: Pipe where to write the result :param queue: Queue where to drop the process exceptions :param tracing: Tracing :param logger: Logger :param logger_handlers: Logger handlers :param logger_level: Logger level :param logger_formatter: Logger formatter :param storage_conf: Storage configuration :param storage_loggers: Storage loggers :param storage_loggers_handlers: Storage loggers handlers :return: <Boolean> True if processed successfully, False otherwise. """ stdout = sys.stdout stderr = sys.stderr affinity_ok = True if __debug__: logger.debug(HEADER + "[%s] Received message: %s" % (str(process_name), str(current_line))) current_line = current_line.split() if current_line[0] == EXECUTE_TASK_TAG: # CPU binding cpus = current_line[-3] if cpus != "-": affinity_ok = bind_cpus(cpus, process_name, logger) # GPU binding gpus = current_line[-2] if gpus != "-": bind_gpus(gpus, process_name, logger) # Remove the last elements: cpu and gpu bindings current_line = current_line[0:-3] # task jobId command job_id = current_line[1] job_out = current_line[2] job_err = current_line[3] # current_line[4] = <boolean> = tracing # current_line[5] = <integer> = task id # current_line[6] = <boolean> = debug # current_line[7] = <string> = storage conf. # current_line[8] = <string> = operation type (e.g. METHOD) # current_line[9] = <string> = module # current_line[10]= <string> = method # current_line[11]= <string> = time out # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line[12 + #nodes] = <integer> = computing units # current_line[13 + #nodes] = <boolean> = has target # current_line[14 + #nodes] = <string> = has return (always 'null') # current_line[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug(HEADER + "[%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug(HEADER + "[%s] - TASK CMD: %s" % (str(process_name), str(current_line))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) for storage_logger in storage_loggers: for log_handler in storage_logger.handlers: storage_logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) for storage_logger in storage_loggers: storage_logger.addHandler(out_file_handler) storage_logger.addHandler(err_file_handler) if __debug__: logger.debug("Received task in process: %s" % str(process_name)) logger.debug(" - TASK CMD: %s" % str(current_line)) try: # Setup out/err wrappers out = open(job_out, 'a') err = open(job_err, 'a') sys.stdout = out sys.stderr = err # Check thread affinity if not affinity_ok: err.write( "WARNING: This task is going to be executed with default thread affinity %s" % # noqa: E501 thread_affinity.getaffinity()) # Setup process environment cn = int(current_line[12]) cn_names = ','.join(current_line[13:13 + cn]) cu = current_line[13 + cn] os.environ["COMPSS_NUM_NODES"] = str(cn) os.environ["COMPSS_HOSTNAMES"] = cn_names os.environ["COMPSS_NUM_THREADS"] = cu os.environ["OMP_NUM_THREADS"] = cu if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) logger.debug("\t - Number of threads: %s" % (str(cu))) # Execute task from pycompss.worker.commons.worker import execute_task result = execute_task(process_name, storage_conf, current_line[9:], tracing, logger) exit_value = result[0] new_types = result[1] new_values = result[2] timed_out = result[3] except_msg = result[4] # Restore out/err wrappers sys.stdout = stdout sys.stderr = stderr sys.stdout.flush() sys.stderr.flush() out.close() err.close() if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message params = build_return_params_message(new_types, new_values) message = END_TASK_TAG + " " + str(job_id) message += " " + str(exit_value) + " " + str(params) + "\n" if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg = except_msg.replace(" ", "_") message = COMPSS_EXCEPTION_TAG + " " + str(job_id) message += " " + str(except_msg) + "\n" if __debug__: logger.debug( "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(except_msg))) else: # An exception other than COMPSsException has been raised # within the task message = END_TASK_TAG + " " + str(job_id) message += " " + str(exit_value) + "\n" if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - 'null' if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. pipe.write(message) except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) if queue: queue.put("EXCEPTION") # Clean environment variables if __debug__: logger.debug("Cleaning environment.") if cpus != "-": del os.environ['COMPSS_BINDED_CPUS'] if gpus != "-": del os.environ['COMPSS_BINDED_GPUS'] del os.environ['CUDA_VISIBLE_DEVICES'] del os.environ['GPU_DEVICE_ORDINAL'] del os.environ['COMPSS_HOSTNAMES'] # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) for handler in logger_handlers: logger.addHandler(handler) i = 0 for storage_logger in storage_loggers: storage_logger.removeHandler(out_file_handler) storage_logger.removeHandler(err_file_handler) for handler in storage_loggers_handlers[i]: storage_logger.addHandler(handler) i += 1 if __debug__: logger.debug(HEADER + "[%s] Finished task with id: %s" % (str(process_name), str(job_id))) elif current_line[0] == PING_TAG: pipe.write(PONG_TAG) elif current_line[0] == QUIT_TAG: # Received quit message -> Suicide if __debug__: logger.debug(HEADER + "[%s] Received quit." % str(process_name)) return False else: if __debug__: logger.debug(HEADER + "[%s] Unexpected message: %s" % (str(process_name), str(current_line))) raise Exception("Unexpected message: %s" % str(current_line)) return True
def main(): # type: (...) -> int """ Main method to process the task execution. :return: Exit value """ # Log initialisation if __debug__: LOGGER.debug("Initialising Python worker inside the container...") # Parse arguments if __debug__: LOGGER.debug("Parsing Python function and arguments...") # TODO: Enhance the received parameters from ContainerInvoker.java func_file_path = str(sys.argv[1]) func_name = str(sys.argv[2]) num_slaves = 0 timeout = 0 cus = 1 has_target = str(sys.argv[3]).lower() == "true" return_type = str(sys.argv[4]) return_length = int(sys.argv[5]) num_params = int(sys.argv[6]) func_params = sys.argv[7:] execute_task_params = [ func_file_path, func_name, num_slaves, timeout, cus, has_target, return_type, return_length, num_params ] + func_params if __debug__: LOGGER.debug("- File: " + str(func_file_path)) LOGGER.debug("- Function: " + str(func_name)) LOGGER.debug("- HasTarget: " + str(has_target)) LOGGER.debug("- ReturnType: " + str(return_type)) LOGGER.debug("- Num Returns: " + str(return_length)) LOGGER.debug("- Num Parameters: " + str(num_params)) LOGGER.debug("- Parameters: " + str(func_params)) LOGGER.debug("DONE Parsing Python function and arguments") # Process task if __debug__: LOGGER.debug("Processing task...") process_name = "ContainerInvoker" storage_conf = "null" tracing = False log_files = None python_mpi = False collections_layouts = None context.set_pycompss_context(context.WORKER) result = execute_task( process_name, storage_conf, execute_task_params, tracing, LOGGER, None, log_files, # noqa python_mpi, collections_layouts # noqa ) # The ignored result is time out exit_value, new_types, new_values, _, except_msg = result if __debug__: LOGGER.debug("DONE Processing task") # Process results if __debug__: LOGGER.debug("Processing results...") LOGGER.debug("Task exit value = " + str(exit_value)) if exit_value == 0: # Task has finished without exceptions if __debug__: LOGGER.debug("Building return parameters...") LOGGER.debug("New Types: " + str(new_types)) LOGGER.debug("New Values: " + str(new_values)) build_return_params_message(new_types, new_values) if __debug__: LOGGER.debug("DONE Building return parameters") elif exit_value == 2: # Task has finished with a COMPSs Exception if __debug__: except_msg = except_msg.replace(" ", "_") LOGGER.debug("Registered COMPSs Exception: %s" % str(except_msg)) else: # An exception has been raised in task if __debug__: except_msg = except_msg.replace(" ", "_") LOGGER.debug("Registered Exception in task execution %s" % str(except_msg)) # Return if exit_value != 0: LOGGER.debug( "ERROR: Task execution finished with non-zero exit value (%s != 0)" % str(exit_value)) # noqa: E501 else: LOGGER.debug("Task execution finished SUCCESSFULLY!") return exit_value
def process_task( current_line, # type: list process_name, # type: str pipe, # type: Pipe queue, # type: ... tracing, # type: bool logger, # type: ... logger_handlers, # type: list logger_level, # type: int logger_formatter, # type: ... storage_conf, # type: str storage_loggers, # type: list storage_loggers_handlers # type: list ): # type: (...) -> bool """ Process command received from the runtime through a pipe. :param current_line: Current command (line) to process. :param process_name: Process name for logger messages. :param pipe: Pipe where to write the result. :param queue: Queue where to drop the process exceptions. :param tracing: Tracing. :param logger: Logger. :param logger_handlers: Logger handlers. :param logger_level: Logger level. :param logger_formatter: Logger formatter. :param storage_conf: Storage configuration. :param storage_loggers: Storage loggers. :param storage_loggers_handlers: Storage loggers handlers. :return: True if processed successfully, False otherwise. """ affinity_ok = True # CPU binding cpus = current_line[-3] if cpus != "-" and THREAD_AFFINITY: affinity_ok = bind_cpus(cpus, process_name, logger) # GPU binding gpus = current_line[-2] if gpus != "-": bind_gpus(gpus, process_name, logger) # Remove the last elements: cpu and gpu bindings current_line = current_line[0:-3] # task jobId command job_id, job_out, job_err = current_line[1:4] # 4th is not taken # current_line[4] = <boolean> = tracing # current_line[5] = <integer> = task id # current_line[6] = <boolean> = debug # current_line[7] = <string> = storage conf. # current_line[8] = <string> = operation type (e.g. METHOD) # current_line[9] = <string> = module # current_line[10]= <string> = method # current_line[11]= <string> = time out # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line[12 + #nodes] = <integer> = computing units # current_line[13 + #nodes] = <boolean> = has target # current_line[14 + #nodes] = <string> = has return (always 'null') # current_line[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug(HEADER + "[%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug(HEADER + "[%s] - TASK CMD: %s" % (str(process_name), str(current_line))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) for storage_logger in storage_loggers: for log_handler in storage_logger.handlers: storage_logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) for storage_logger in storage_loggers: storage_logger.addHandler(out_file_handler) storage_logger.addHandler(err_file_handler) if __debug__: logger.debug("Received task in process: %s" % str(process_name)) logger.debug(" - TASK CMD: %s" % str(current_line)) try: # Check thread affinity if not affinity_ok and THREAD_AFFINITY: logger.warning( "This task is going to be executed with default thread affinity %s" % # noqa: E501 thread_affinity.getaffinity()) # Setup process environment cn = int(current_line[12]) cn_names = ','.join(current_line[13:13 + cn]) cu = current_line[13 + cn] if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) logger.debug("\t - Number of threads: %s" % (str(cu))) setup_environment(cn, cn_names, cu) # Execute task result = execute_task(process_name, storage_conf, current_line[9:], tracing, logger, (job_out, job_err)) exit_value, new_types, new_values, timed_out, except_msg = result if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message message = build_successful_message(new_types, new_values, job_id, exit_value) # noqa: E501 if __debug__: logger.debug( "%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str(pipe.output_pipe), str(message))) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg, message = build_compss_exception_message( except_msg, job_id) # noqa: E501 if __debug__: logger.debug("%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(except_msg))) else: # An exception other than COMPSsException has been raised # within the task message = build_exception_message(job_id, exit_value) if __debug__: logger.debug( "%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str(pipe.output_pipe), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - 'null' if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) if queue: queue.put("EXCEPTION") # Stop the worker process return False # Clean environment variables if __debug__: logger.debug("Cleaning environment.") clean_environment(cpus, gpus) # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) for handler in logger_handlers: logger.addHandler(handler) i = 0 for storage_logger in storage_loggers: storage_logger.removeHandler(out_file_handler) storage_logger.removeHandler(err_file_handler) for handler in storage_loggers_handlers[i]: storage_logger.addHandler(handler) i += 1 if __debug__: logger.debug(HEADER + "[%s] Finished task with id: %s" % (str(process_name), str(job_id))) # Notify the runtime that the task has finished pipe.write(message) return True
def main(): # type: () -> int """ Main method to process the task execution. :return: Exit value """ # Parse arguments # TODO: Enhance the received parameters from ContainerInvoker.java func_file_path = str(sys.argv[1]) func_name = str(sys.argv[2]) num_slaves = 0 timeout = 0 cus = 1 log_level = sys.argv[3] tracing = sys.argv[4] == 'true' has_target = str(sys.argv[5]).lower() == "true" return_type = str(sys.argv[6]) return_length = int(sys.argv[7]) num_params = int(sys.argv[8]) func_params = sys.argv[9:] # Log initialisation # Load log level configuration file worker_path = os.path.dirname(os.path.realpath(__file__)) if log_level == "true" or log_level == "debug": # Debug log_json = "".join((worker_path, "/log/logging_container_worker_debug.json")) elif log_level == "info" or log_level == "off": # Info or no debug log_json = "".join((worker_path, "/log/logging_container_worker_off.json")) else: # Default log_json = "".join((worker_path, "/log/logging_container_worker.json")) init_logging_worker(log_json, tracing) if __debug__: logger = logging.getLogger('pycompss.worker.container.container_worker') # noqa: E501 logger.debug("Initialising Python worker inside the container...") task_params = [func_file_path, func_name, num_slaves, timeout, cus, has_target, return_type, return_length, num_params] # type: typing.List[typing.Any] execute_task_params = task_params + func_params if __debug__: logger.debug("- File: " + str(func_file_path)) logger.debug("- Function: " + str(func_name)) logger.debug("- HasTarget: " + str(has_target)) logger.debug("- ReturnType: " + str(return_type)) logger.debug("- Num Returns: " + str(return_length)) logger.debug("- Num Parameters: " + str(num_params)) logger.debug("- Parameters: " + str(func_params)) logger.debug("DONE Parsing Python function and arguments") # Process task if __debug__: logger.debug("Processing task...") process_name = "ContainerInvoker" storage_conf = "null" tracing = False log_files = () python_mpi = False collections_layouts = None # type: typing.Optional[dict] context.set_pycompss_context(context.WORKER) result = execute_task(process_name, storage_conf, execute_task_params, tracing, logger, "None", log_files, # noqa python_mpi, collections_layouts # noqa ) # The ignored result is time out exit_value, new_types, new_values, _, except_msg = result if __debug__: logger.debug("DONE Processing task") # Process results if __debug__: logger.debug("Processing results...") logger.debug("Task exit value = " + str(exit_value)) if exit_value == 0: # Task has finished without exceptions if __debug__: logger.debug("Building return parameters...") logger.debug("New Types: " + str(new_types)) logger.debug("New Values: " + str(new_values)) build_return_params_message(new_types, new_values) if __debug__: logger.debug("DONE Building return parameters") elif exit_value == 2: # Task has finished with a COMPSs Exception if __debug__: except_msg = except_msg.replace(" ", "_") logger.debug("Registered COMPSs Exception: %s" % str(except_msg)) else: # An exception has been raised in task if __debug__: except_msg = except_msg.replace(" ", "_") logger.debug("Registered Exception in task execution %s" % str(except_msg)) # Return if exit_value != 0: logger.debug("ERROR: Task execution finished with non-zero exit value (%s != 0)" % str(exit_value)) # noqa: E501 else: logger.debug("Task execution finished SUCCESSFULLY!") return exit_value
def process_task( current_line, # type: list process_name, # type: str pipe, # type: Pipe queue, # type: typing.Optional[Queue] tracing, # type: bool logger, # type: typing.Any logger_cfg, # type: str logger_handlers, # type: list logger_level, # type: int logger_formatter, # type: typing.Any storage_conf, # type: str storage_loggers, # type: list storage_loggers_handlers, # type: list cache_queue, # type: typing.Optional[Queue] cache_ids, # type: typing.Any cache_profiler, # type: bool ): # type: (...) -> bool """ Process command received from the runtime through a pipe. :param current_line: Current command (line) to process. :param process_name: Process name for logger messages. :param pipe: Pipe where to write the result. :param queue: Queue where to drop the process exceptions. :param tracing: Tracing. :param logger: Logger. :param logger_cfg: Logger configuration file :param logger_handlers: Logger handlers. :param logger_level: Logger level. :param logger_formatter: Logger formatter. :param storage_conf: Storage configuration. :param storage_loggers: Storage loggers. :param storage_loggers_handlers: Storage loggers handlers. :param cache_queue: Cache tracker communication queue. :param cache_ids: Cache proxy dictionary (read-only). :param cache_profiler: Cache profiler :return: True if processed successfully, False otherwise. """ with event_worker(PROCESS_TASK_EVENT): affinity_event_emit = False binded_cpus = False binded_gpus = False # CPU binding cpus = current_line[-3] if cpus != "-" and THREAD_AFFINITY: # The cpu affinity event is already emitted in Java. # Instead of emitting what we receive, we are emitting what whe check # after setting the affinity. binded_cpus = bind_cpus(cpus, process_name, logger) # GPU binding gpus = current_line[-2] if gpus != "-": emit_manual_event(int(gpus) + 1, inside=True, gpu_affinity=True) bind_gpus(gpus, process_name, logger) binded_gpus = True # Remove the last elements: cpu and gpu bindings current_line = current_line[0:-3] # task jobId command job_id, job_out, job_err = current_line[1:4] # 4th is not taken # current_line[4] = <boolean> = tracing # current_line[5] = <integer> = task id # current_line[6] = <boolean> = debug # current_line[7] = <string> = storage conf. # current_line[8] = <string> = operation type (e.g. METHOD) # current_line[9] = <string> = module # current_line[10]= <string> = method # current_line[11]= <string> = time out # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes # <<list of slave nodes>> # current_line[12 + #nodes] = <integer> = computing units # current_line[13 + #nodes] = <boolean> = has target # current_line[14 + #nodes] = <string> = has return (always "null") # current_line[15 + #nodes] = <integer> = Number of parameters # <<list of parameters>> # !---> type, stream, prefix , value if __debug__: logger.debug(HEADER + "[%s] Received task with id: %s" % (str(process_name), str(job_id))) logger.debug(HEADER + "[%s] - TASK CMD: %s" % (str(process_name), str(current_line))) # Swap logger from stream handler to file handler # All task output will be redirected to job.out/err for log_handler in logger_handlers: logger.removeHandler(log_handler) for storage_logger in storage_loggers: for log_handler in storage_logger.handlers: storage_logger.removeHandler(log_handler) out_file_handler = logging.FileHandler(job_out) out_file_handler.setLevel(logger_level) out_file_handler.setFormatter(logger_formatter) err_file_handler = logging.FileHandler(job_err) err_file_handler.setLevel("ERROR") err_file_handler.setFormatter(logger_formatter) logger.addHandler(out_file_handler) logger.addHandler(err_file_handler) for storage_logger in storage_loggers: storage_logger.addHandler(out_file_handler) storage_logger.addHandler(err_file_handler) if __debug__: # From now onwards the log is in the job out and err files logger.debug("-" * 100) logger.debug("Received task in process: %s" % str(process_name)) logger.debug("TASK CMD: %s" % str(current_line)) try: # Check thread affinity if THREAD_AFFINITY: # The cpu affinity can be long if multiple cores have been # assigned. To avoid issues, we get just the first id. real_affinity = thread_affinity.getaffinity() cpus = str(real_affinity[0]) num_cpus = len(real_affinity) emit_manual_event(int(cpus) + 1, inside=True, cpu_affinity=True) emit_manual_event(int(num_cpus), inside=True, cpu_number=True) affinity_event_emit = True if not binded_cpus: logger.warning( "This task is going to be executed with default thread affinity %s" % # noqa: E501 str(real_affinity)) # Setup process environment cn = int(current_line[12]) cn_names = ",".join(current_line[13:13 + cn]) cu = current_line[13 + cn] if __debug__: logger.debug("Process environment:") logger.debug("\t - Number of nodes: %s" % (str(cn))) logger.debug("\t - Hostnames: %s" % str(cn_names)) logger.debug("\t - Number of threads: %s" % (str(cu))) setup_environment(cn, cn_names, cu) # Execute task result = execute_task(process_name, storage_conf, current_line[9:], tracing, logger, logger_cfg, (job_out, job_err), False, None, cache_queue, cache_ids, cache_profiler) # The ignored variable is timed_out exit_value, new_types, new_values, _, except_msg = result if exit_value == 0: # Task has finished without exceptions # endTask jobId exitValue message message = build_successful_message(new_types, new_values, job_id, exit_value) # noqa: E501 if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) elif exit_value == 2: # Task has finished with a COMPSs Exception # compssExceptionTask jobId exitValue message except_msg, message = build_compss_exception_message( except_msg, job_id) # noqa: E501 if __debug__: logger.debug( "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(except_msg))) else: # An exception other than COMPSsException has been raised # within the task message = build_exception_message(job_id, exit_value) if __debug__: logger.debug("%s - Pipe %s END TASK MESSAGE: %s" % (str(process_name), str( pipe.output_pipe), str(message))) # The return message is: # # TaskResult ==> jobId exitValue D List<Object> # # Where List<Object> has D * 2 length: # D = #parameters == #task_parameters + # (has_target ? 1 : 0) + # #returns # And contains a pair of elements per parameter: # - Parameter new type. # - Parameter new value: # - "null" if it is NOT a PSCO # - PSCOId (String) if is a PSCO # Example: # 4 null 9 null 12 <pscoid> # # The order of the elements is: parameters + self + returns # # This is sent through the pipe with the END_TASK message. # If the task had an object or file as parameter and the worker # returns the id, the runtime can change the type (and locations) # to a EXTERNAL_OBJ_T. except Exception as e: logger.exception("%s - Exception %s" % (str(process_name), str(e))) if queue: queue.put("EXCEPTION") # Stop the worker process return False # Clean environment variables if __debug__: logger.debug("Cleaning environment.") clean_environment(binded_cpus, binded_gpus) if affinity_event_emit: emit_manual_event(0, inside=True, cpu_affinity=True) emit_manual_event(0, inside=True, cpu_number=True) if binded_gpus: emit_manual_event(0, inside=True, gpu_affinity=True) # Restore loggers if __debug__: logger.debug("Restoring loggers.") logger.debug("-" * 100) # No more logs in job out and err files # Restore worker log logger.removeHandler(out_file_handler) logger.removeHandler(err_file_handler) logger.handlers = [] for handler in logger_handlers: logger.addHandler(handler) i = 0 for storage_logger in storage_loggers: storage_logger.removeHandler(out_file_handler) storage_logger.removeHandler(err_file_handler) storage_logger.handlers = [] for handler in storage_loggers_handlers[i]: storage_logger.addHandler(handler) i += 1 if __debug__: logger.debug(HEADER + "[%s] Finished task with id: %s" % (str(process_name), str(job_id))) # Notify the runtime that the task has finished pipe.write(message) return True