コード例 #1
0
ファイル: mpi_piper_worker.py プロジェクト: bsc-wdc/compss
def compss_persistent_executor(config):
    # type: (PiperWorkerConfiguration) -> None
    """ Persistent executor main function.

    Retrieves the initial configuration and spawns the worker processes.

    :param config: Piper Worker Configuration description.
    :return: None
    """
    COMM.gather(str(os.getpid()), root=0)

    # Catch SIGTERM sent by bindings_piper
    signal.signal(signal.SIGTERM, shutdown_handler)
    # Catch SIGUSER2 to solve strange behaviour with mpi4py
    signal.signal(signal.SIGUSR2, user_signal_handler)

    # Set the binding in worker mode
    import pycompss.util.context as context
    context.set_pycompss_context(context.WORKER)

    persistent_storage = (config.storage_conf != "null")

    logger, logger_cfg, storage_loggers, _ = load_loggers(
        config.debug, persistent_storage)

    cache_profiler = False
    if config.cache_profiler.lower() == 'true':
        cache_profiler = True

    if persistent_storage:
        # Initialize storage
        with event_worker(INIT_STORAGE_AT_WORKER_EVENT):
            from storage.api import initWorker as initStorageAtWorker  # noqa
            initStorageAtWorker(config_file_path=config.storage_conf)

    process_name = "".join(("Rank-", str(RANK)))
    conf = ExecutorConf(config.debug, get_temporary_directory(), TRACING,
                        config.storage_conf, logger, logger_cfg,
                        persistent_storage, storage_loggers,
                        config.stream_backend, config.stream_master_name,
                        config.stream_master_port, CACHE_IDS, CACHE_QUEUE,
                        cache_profiler)
    executor(None, process_name, config.pipes[RANK - 1], conf)

    if persistent_storage:
        # Finish storage
        if __debug__:
            logger.debug(HEADER + "Stopping persistent storage")
        with event_worker(FINISH_STORAGE_AT_WORKER_EVENT):
            from storage.api import finishWorker as finishStorageAtWorker  # noqa
            finishStorageAtWorker()
コード例 #2
0
ファイル: persistent.py プロジェクト: bsc-wdc/compss
def stop_storage(logger):
    # type: (typing.Any) -> None
    """ Stops the persistent storage.

    This function emits the event in the worker.

    :param logger: Logger where to log the messages.
    :return: None
    """
    with event_worker(STOP_STORAGE_EVENT):
        __stop_storage__(logger)
コード例 #3
0
ファイル: persistent.py プロジェクト: bsc-wdc/compss
def init_storage(storage_conf, logger):  # noqa
    # type: (str, typing.Any) -> bool
    """ Call to init storage.

    This function emits the event in the worker.

    :param storage_conf: Storage configuration file.
    :param logger: Logger where to log the messages.
    :return: True if initialized. False on the contrary.
    """
    with event_worker(INIT_STORAGE_EVENT):
        return __init_storage__(storage_conf, logger)
コード例 #4
0
def process_quit(logger, process_name):  # noqa
    # type: (typing.Any, str) -> bool
    """ Process quit message.

    Response: False.

    :param logger: Logger.
    :param process_name: Process name.
    :return: Always false.
    """
    with event_worker(PROCESS_QUIT_EVENT):
        if __debug__:
            logger.debug(HEADER + "[%s] Received quit." % str(process_name))
        return False
コード例 #5
0
def process_ping(pipe, logger, process_name):  # noqa
    # type: (Pipe, typing.Any, str) -> bool
    """ Process ping message.

    Response: Pong.

    :param pipe: Where to write the ping response.
    :param logger: Logger.
    :param process_name: Process name.
    :return: True if success. False otherwise.
    """
    with event_worker(PROCESS_PING_EVENT):
        if __debug__:
            logger.debug(HEADER + "[%s] Received ping." % str(process_name))
        try:
            pipe.write(PONG_TAG)
        except Exception:  # noqa
            return False
        return True
コード例 #6
0
def main():
    # type: () -> None
    """ GAT worker main code.

    Executes the task provided by parameters.

    :return: None
    """
    # Emit sync event if tracing is enabled
    tracing = sys.argv[1] == 'true'
    task_id = int(sys.argv[2])
    log_level = sys.argv[3]
    storage_conf = sys.argv[4]
    stream_backend = sys.argv[5]
    stream_master_name = sys.argv[6]
    stream_master_port = sys.argv[7]
    # Next: method_type = sys.argv[8]
    params = sys.argv[9:]
    # Next parameters:
    # class_name = sys.argv[10]
    # method_name = sys.argv[11]
    # num_slaves = sys.argv[12]
    # i = 13 + num_slaves
    # slaves = sys.argv[12..i]
    # numCus = sys.argv[i+1]
    # has_target = sys.argv[i+2] == 'true'
    # num_params = int(sys.argv[i+3])
    # params = sys.argv[i+4..]

    if log_level == "true" or log_level == "debug":
        print("Tracing = " + str(tracing))
        print("Task id = " + str(task_id))
        print("Log level = " + str(log_level))
        print("Storage conf = " + str(storage_conf))

    persistent_storage = False
    if storage_conf != "null":
        persistent_storage = True

    streaming = False
    if stream_backend not in [None, "null", "NONE"]:
        streaming = True

    with trace_multiprocessing_worker() if tracing else dummy_context():

        if streaming:
            # Start streaming
            DistroStreamClientHandler.init_and_start(
                master_ip=stream_master_name, master_port=stream_master_port)

        # Load log level configuration file
        worker_path = os.path.dirname(os.path.realpath(__file__))
        if log_level == "true" or log_level == "debug":
            # Debug
            log_json = "".join(
                (worker_path, "/../../../log/logging_gat_worker_debug.json"))
        elif log_level == "info" or log_level == "off":
            # Info or no debug
            log_json = "".join(
                (worker_path, "/../../../log/logging_gat_worker_off.json"))
        else:
            # Default
            log_json = "".join(
                (worker_path, "/../../../log/logging_gat_worker.json"))
        init_logging_worker(log_json, tracing)

        if persistent_storage:
            # Initialize storage
            with event_worker(INIT_STORAGE_AT_WORKER_EVENT):
                from storage.api import initWorker as initStorageAtWorker  # noqa
                initStorageAtWorker(config_file_path=storage_conf)

        # Init worker
        exit_code = compss_worker(tracing, str(task_id), storage_conf, params,
                                  log_json)

        if streaming:
            # Finish streaming
            DistroStreamClientHandler.set_stop()

        if persistent_storage:
            # Finish storage
            with event_worker(FINISH_STORAGE_AT_WORKER_EVENT):
                from storage.api import finishWorker as finishStorageAtWorker  # noqa
                finishStorageAtWorker()

    if exit_code == 1:
        exit(1)
コード例 #7
0
def compss_persistent_worker(config):
    # type: (PiperWorkerConfiguration) -> None
    """ Persistent worker main function.

    Retrieves the initial configuration and spawns the worker processes.

    :param config: Piper Worker Configuration description.
    :return: None
    """
    global CACHE
    global CACHE_PROCESS

    # Catch SIGTERM sent by bindings_piper
    signal.signal(signal.SIGTERM, shutdown_handler)

    # Set the binding in worker mode
    context.set_pycompss_context(context.WORKER)

    persistent_storage = (config.storage_conf != 'null')

    logger, logger_cfg, storage_loggers, log_dir = load_loggers(config.debug, persistent_storage)

    if __debug__:
        logger.debug(HEADER + "piper_worker.py wake up")
        config.print_on_logger(logger)

    if persistent_storage:
        # Initialize storage
        logger.debug(HEADER + "Starting persistent storage")
        with event_worker(INIT_STORAGE_AT_WORKER_EVENT):
            from storage.api import initWorker as initStorageAtWorker  # noqa
            initStorageAtWorker(config_file_path=config.storage_conf)

    # Create new processes
    queues = []

    cache_profiler = False
    if config.cache_profiler.lower() == 'true':
        cache_profiler = True

    # Setup cache
    if is_cache_enabled(str(config.cache)):
        # Deploy the necessary processes
        CACHE = True
        cache_params = start_cache(logger, str(config.cache), cache_profiler, log_dir)
    else:
        # No cache
        CACHE = False
        cache_params = (None, None, None, None)  # type: ignore
    smm, CACHE_PROCESS, cache_queue, cache_ids = cache_params

    # Create new executor processes
    conf = ExecutorConf(config.debug,
                        get_temporary_directory(),
                        TRACING,
                        config.storage_conf,
                        logger,
                        logger_cfg,
                        persistent_storage,
                        storage_loggers,
                        config.stream_backend,
                        config.stream_master_name,
                        config.stream_master_port,
                        cache_ids,
                        cache_queue,
                        cache_profiler)

    for i in range(0, config.tasks_x_node):
        if __debug__:
            logger.debug(HEADER + "Launching process " + str(i))
        process_name = "".join(("Process-", str(i)))
        pid, queue = create_executor_process(process_name, conf, config.pipes[i])
        queues.append(queue)

    # Read command from control pipe
    alive = True
    process_counter = config.tasks_x_node
    control_pipe = config.control_pipe  # type: typing.Any
    while alive:
        command = control_pipe.read_command(retry_period=1)
        if command != "":
            line = command.split()

            if line[0] == ADD_EXECUTOR_TAG:
                process_name = "".join(("Process-", str(process_counter)))
                process_counter = process_counter + 1
                in_pipe = line[1]
                out_pipe = line[2]
                pipe = Pipe(in_pipe, out_pipe)
                pid, queue = create_executor_process(process_name, conf, pipe)
                queues.append(queue)
                control_pipe.write(" ".join((ADDED_EXECUTOR_TAG,
                                             out_pipe,
                                             in_pipe,
                                             str(pid))))

            elif line[0] == QUERY_EXECUTOR_ID_TAG:
                in_pipe = line[1]
                out_pipe = line[2]
                proc = PROCESSES.get(in_pipe)  # type: typing.Any
                pid = proc.pid
                control_pipe.write(" ".join((REPLY_EXECUTOR_ID_TAG,
                                             out_pipe,
                                             in_pipe,
                                             str(pid))))

            elif line[0] == CANCEL_TASK_TAG:
                in_pipe = line[1]
                cancel_proc = PROCESSES.get(in_pipe)  # type: typing.Any
                cancel_pid = cancel_proc.pid
                if __debug__:
                    logger.debug(HEADER + "Signaling process with PID " +
                                 str(cancel_pid) + " to cancel a task")
                os.kill(cancel_pid, signal.SIGUSR2)  # NOSONAR cancellation produced by COMPSs

            elif line[0] == REMOVE_EXECUTOR_TAG:
                in_pipe = line[1]
                out_pipe = line[2]
                proc = PROCESSES.pop(in_pipe, None)
                if proc:
                    if proc.is_alive():
                        logger.warn(HEADER + "Forcing terminate on : " +
                                    proc.name)
                        proc.terminate()
                    proc.join()
                control_pipe.write(" ".join((REMOVED_EXECUTOR_TAG,
                                             out_pipe,
                                             in_pipe)))

            elif line[0] == PING_TAG:
                control_pipe.write(PONG_TAG)

            elif line[0] == QUIT_TAG:
                alive = False

    # Wait for all threads
    for proc in PROCESSES.values():
        proc.join()

    # Check if there is any exception message from the threads
    for i in range(0, config.tasks_x_node):
        if not queues[i].empty:
            logger.error(HEADER + "Exception in threads queue: " +
                         str(queues[i].get()))

    for queue in queues:
        queue.close()
        queue.join_thread()

    if CACHE:
        stop_cache(smm, cache_queue, cache_profiler, CACHE_PROCESS)  # noqa

    if persistent_storage:
        # Finish storage
        if __debug__:
            logger.debug(HEADER + "Stopping persistent storage")
        with event_worker(FINISH_STORAGE_AT_WORKER_EVENT):
            from storage.api import finishWorker as finishStorageAtWorker  # noqa
            finishStorageAtWorker()

    if __debug__:
        logger.debug(HEADER + "Finished")

    control_pipe.write(QUIT_TAG)
    control_pipe.close()
コード例 #8
0
ファイル: mpi_executor.py プロジェクト: bsc-wdc/compss
def process_task(
    current_line,  # type: str
    process_name,  # type: str
    logger,  # type: typing.Any
    log_json,  # type: str
    logger_handlers,  # type: typing.Any
    logger_level,  # type: int
    logger_formatter  # type: typing.Any
):  # type: (...) -> typing.Tuple[int, str]
    """ Process command received from the current_line.

    :param current_line: Current command (line) to process.
    :param process_name: Process name for logger messages.
    :param logger: Logger.
    :param log_json: Logger configuration file.
    :param logger_handlers: Logger handlers.
    :param logger_level: Logger level.
    :param logger_formatter: Logger formatter.
    :return: exit_value and message.
    """
    with event_worker(PROCESS_TASK_EVENT):
        # Process properties
        stdout = sys.stdout
        stderr = sys.stderr
        job_id = None

        if __debug__:
            logger.debug("[PYTHON EXECUTOR] [%s] Received message: %s" %
                         (str(process_name), str(current_line)))

        splitted_current_line = current_line.split()
        if splitted_current_line[0] == EXECUTE_TASK_TAG:
            num_collection_params = int(splitted_current_line[-1])
            collections_layouts = dict()
            if num_collection_params > 0:
                raw_layouts = splitted_current_line[(
                    (num_collection_params * -4) - 1):-1]
                for i in range(num_collection_params):
                    param = raw_layouts[i * 4]
                    layout = [
                        int(raw_layouts[(i * 4) + 1]),
                        int(raw_layouts[(i * 4) + 2]),
                        int(raw_layouts[(i * 4) + 3])
                    ]
                    collections_layouts[param] = layout

            # Remove the last elements: cpu and gpu bindings and collection params
            current_line_filtered = splitted_current_line[0:-3]

            # task jobId command
            job_id = current_line_filtered[1]
            job_out = current_line_filtered[2]
            job_err = current_line_filtered[3]
            # current_line_filtered[4] = <boolean> = tracing
            # current_line_filtered[5] = <integer> = task id
            # current_line_filtered[6] = <boolean> = debug
            # current_line_filtered[7] = <string>  = storage conf.
            # current_line_filtered[8] = <string>  = operation type (e.g. METHOD)
            # current_line_filtered[9] = <string>  = module
            # current_line_filtered[10]= <string>  = method
            # current_line_filtered[11]= <string>  = time out
            # current_line_filtered[12]= <integer> = Number of slaves (worker nodes)==#nodes
            # <<list of slave nodes>>
            # current_line_filtered[12 + #nodes] = <integer> = computing units
            # current_line_filtered[13 + #nodes] = <boolean> = has target
            # current_line_filtered[14 + #nodes] = <string>  = has return (always "null")
            # current_line_filtered[15 + #nodes] = <integer> = Number of parameters
            # <<list of parameters>>
            #       !---> type, stream, prefix , value

            if __debug__:
                logger.debug(
                    "[PYTHON EXECUTOR] [%s] Received task with id: %s" %
                    (str(process_name), str(job_id)))
                logger.debug("[PYTHON EXECUTOR] [%s] - TASK CMD: %s" %
                             (str(process_name), str(current_line_filtered)))

            # Swap logger from stream handler to file handler
            # All task output will be redirected to job.out/err
            for log_handler in logger_handlers:
                logger.removeHandler(log_handler)

            out_file_handler = logging.FileHandler(job_out)
            out_file_handler.setLevel(logger_level)
            out_file_handler.setFormatter(logger_formatter)
            err_file_handler = logging.FileHandler(job_err)
            err_file_handler.setLevel("ERROR")
            err_file_handler.setFormatter(logger_formatter)
            logger.addHandler(out_file_handler)
            logger.addHandler(err_file_handler)

            if __debug__:
                logger.debug("Received task in process: %s" %
                             str(process_name))
                logger.debug(" - TASK CMD: %s" % str(current_line_filtered))

            try:
                # Setup out/err wrappers
                out = open(job_out, "a")
                err = open(job_err, "a")
                sys.stdout = out
                sys.stderr = err

                # Setup process environment
                cn = int(current_line_filtered[12])
                cn_names = ",".join(current_line_filtered[13:13 + cn])
                cu = int(current_line_filtered[13 + cn])
                os.environ["COMPSS_NUM_NODES"] = str(cn)
                os.environ["COMPSS_HOSTNAMES"] = cn_names
                os.environ["COMPSS_NUM_THREADS"] = str(cu)
                os.environ["OMP_NUM_THREADS"] = str(cu)
                if __debug__:
                    logger.debug("Process environment:")
                    logger.debug("\t - Number of nodes: %s" % (str(cn)))
                    logger.debug("\t - Hostnames: %s" % str(cn_names))
                    logger.debug("\t - Number of threads: %s" % (str(cu)))

                # Execute task
                storage_conf = "null"
                tracing = False
                python_mpi = True
                result = execute_task(process_name, storage_conf,
                                      current_line_filtered[9:], tracing,
                                      logger, log_json, (job_out, job_err),
                                      python_mpi, collections_layouts, None,
                                      None)
                exit_value, new_types, new_values, time_out, except_msg = result

                # Restore out/err wrappers
                sys.stdout = stdout
                sys.stderr = stderr
                sys.stdout.flush()
                sys.stderr.flush()
                out.close()
                err.close()

                # To reduce if necessary:
                # global_exit_value = MPI.COMM_WORLD.reduce(exit_value,
                #                                           op=MPI.SUM,
                #                                           root=0)
                # message = ""

                # if MPI.COMM_WORLD.rank == 0 and global_exit_value == 0:
                if exit_value == 0:
                    # Task has finished without exceptions
                    # endTask jobId exitValue message
                    params = build_return_params_message(new_types, new_values)
                    message = " ".join((END_TASK_TAG, str(job_id),
                                        str(exit_value), str(params) + "\n"))
                elif exit_value == 2:
                    # Task has finished with a COMPSs Exception
                    # compssExceptionTask jobId exitValue message
                    except_msg = except_msg.replace(" ", "_")
                    message = " ".join((COMPSS_EXCEPTION_TAG, str(job_id),
                                        str(except_msg) + "\n"))
                    if __debug__:
                        logger.debug("%s - COMPSS EXCEPTION TASK MESSAGE: %s" %
                                     (str(process_name), str(except_msg)))
                else:
                    # elif MPI.COMM_WORLD.rank == 0 and global_exit_value != 0:
                    # An exception has been raised in task
                    message = " ".join(
                        (END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

                if __debug__:
                    logger.debug("%s - END TASK MESSAGE: %s" %
                                 (str(process_name), str(message)))
                # The return message is:
                #
                # TaskResult ==> jobId exitValue D List<Object>
                #
                # Where List<Object> has D * 2 length:
                # D = #parameters == #task_parameters +
                #                    (has_target ? 1 : 0) +
                #                    #returns
                # And contains a pair of elements per parameter:
                #     - Parameter new type.
                #     - Parameter new value:
                #         - "null" if it is NOT a PSCO
                #         - PSCOId (String) if is a PSCO
                # Example:
                #     4 null 9 null 12 <pscoid>
                #
                # The order of the elements is: parameters + self + returns
                #
                # This is sent through the pipe with the END_TASK message.
                # If the task had an object or file as parameter and the worker
                # returns the id, the runtime can change the type (and locations)
                # to a EXTERNAL_OBJ_T.

            except Exception as e:
                logger.exception("%s - Exception %s" %
                                 (str(process_name), str(e)))
                exit_value = 7
                message = " ".join(
                    (END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

            # Clean environment variables
            if __debug__:
                logger.debug("Cleaning environment.")

            del os.environ["COMPSS_HOSTNAMES"]

            # Restore loggers
            if __debug__:
                logger.debug("Restoring loggers.")
            logger.removeHandler(out_file_handler)
            logger.removeHandler(err_file_handler)
            for handler in logger_handlers:
                logger.addHandler(handler)

            if __debug__:
                logger.debug(
                    "[PYTHON EXECUTOR] [%s] Finished task with id: %s" %
                    (str(process_name), str(job_id)))
            # return SUCCESS_SIG,
            #        "{0} -- Task Ended Successfully!".format(str(process_name))

        else:
            if __debug__:
                logger.debug("[PYTHON EXECUTOR] [%s] Unexpected message: %s" %
                             (str(process_name), str(current_line_filtered)))
            exit_value = 7
            message = " ".join(
                (END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

        return exit_value, message
コード例 #9
0
def process_task(
        current_line,  # type: list
        process_name,  # type: str
        pipe,  # type: Pipe
        queue,  # type: typing.Optional[Queue]
        tracing,  # type: bool
        logger,  # type: typing.Any
        logger_cfg,  # type: str
        logger_handlers,  # type: list
        logger_level,  # type: int
        logger_formatter,  # type: typing.Any
        storage_conf,  # type: str
        storage_loggers,  # type: list
        storage_loggers_handlers,  # type: list
        cache_queue,  # type: typing.Optional[Queue]
        cache_ids,  # type: typing.Any
        cache_profiler,  # type: bool
):  # type: (...) -> bool
    """ Process command received from the runtime through a pipe.

    :param current_line: Current command (line) to process.
    :param process_name: Process name for logger messages.
    :param pipe: Pipe where to write the result.
    :param queue: Queue where to drop the process exceptions.
    :param tracing: Tracing.
    :param logger: Logger.
    :param logger_cfg: Logger configuration file
    :param logger_handlers: Logger handlers.
    :param logger_level: Logger level.
    :param logger_formatter: Logger formatter.
    :param storage_conf: Storage configuration.
    :param storage_loggers: Storage loggers.
    :param storage_loggers_handlers: Storage loggers handlers.
    :param cache_queue: Cache tracker communication queue.
    :param cache_ids: Cache proxy dictionary (read-only).
    :param cache_profiler: Cache profiler
    :return: True if processed successfully, False otherwise.
    """
    with event_worker(PROCESS_TASK_EVENT):
        affinity_event_emit = False
        binded_cpus = False
        binded_gpus = False

        # CPU binding
        cpus = current_line[-3]
        if cpus != "-" and THREAD_AFFINITY:
            # The cpu affinity event is already emitted in Java.
            # Instead of emitting what we receive, we are emitting what whe check
            # after setting the affinity.
            binded_cpus = bind_cpus(cpus, process_name, logger)

        # GPU binding
        gpus = current_line[-2]
        if gpus != "-":
            emit_manual_event(int(gpus) + 1, inside=True, gpu_affinity=True)
            bind_gpus(gpus, process_name, logger)
            binded_gpus = True

        # Remove the last elements: cpu and gpu bindings
        current_line = current_line[0:-3]

        # task jobId command
        job_id, job_out, job_err = current_line[1:4]  # 4th is not taken
        # current_line[4] = <boolean> = tracing
        # current_line[5] = <integer> = task id
        # current_line[6] = <boolean> = debug
        # current_line[7] = <string>  = storage conf.
        # current_line[8] = <string>  = operation type (e.g. METHOD)
        # current_line[9] = <string>  = module
        # current_line[10]= <string>  = method
        # current_line[11]= <string>  = time out
        # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes
        # <<list of slave nodes>>
        # current_line[12 + #nodes] = <integer> = computing units
        # current_line[13 + #nodes] = <boolean> = has target
        # current_line[14 + #nodes] = <string>  = has return (always "null")
        # current_line[15 + #nodes] = <integer> = Number of parameters
        # <<list of parameters>>
        #       !---> type, stream, prefix , value

        if __debug__:
            logger.debug(HEADER + "[%s] Received task with id: %s" %
                         (str(process_name), str(job_id)))
            logger.debug(HEADER + "[%s] - TASK CMD: %s" %
                         (str(process_name), str(current_line)))

        # Swap logger from stream handler to file handler
        # All task output will be redirected to job.out/err
        for log_handler in logger_handlers:
            logger.removeHandler(log_handler)
        for storage_logger in storage_loggers:
            for log_handler in storage_logger.handlers:
                storage_logger.removeHandler(log_handler)
        out_file_handler = logging.FileHandler(job_out)
        out_file_handler.setLevel(logger_level)
        out_file_handler.setFormatter(logger_formatter)
        err_file_handler = logging.FileHandler(job_err)
        err_file_handler.setLevel("ERROR")
        err_file_handler.setFormatter(logger_formatter)
        logger.addHandler(out_file_handler)
        logger.addHandler(err_file_handler)
        for storage_logger in storage_loggers:
            storage_logger.addHandler(out_file_handler)
            storage_logger.addHandler(err_file_handler)

        if __debug__:
            # From now onwards the log is in the job out and err files
            logger.debug("-" * 100)
            logger.debug("Received task in process: %s" % str(process_name))
            logger.debug("TASK CMD: %s" % str(current_line))

        try:
            # Check thread affinity
            if THREAD_AFFINITY:
                # The cpu affinity can be long if multiple cores have been
                # assigned. To avoid issues, we get just the first id.
                real_affinity = thread_affinity.getaffinity()
                cpus = str(real_affinity[0])
                num_cpus = len(real_affinity)
                emit_manual_event(int(cpus) + 1,
                                  inside=True,
                                  cpu_affinity=True)
                emit_manual_event(int(num_cpus), inside=True, cpu_number=True)
                affinity_event_emit = True
                if not binded_cpus:
                    logger.warning(
                        "This task is going to be executed with default thread affinity %s"
                        %  # noqa: E501
                        str(real_affinity))

            # Setup process environment
            cn = int(current_line[12])
            cn_names = ",".join(current_line[13:13 + cn])
            cu = current_line[13 + cn]
            if __debug__:
                logger.debug("Process environment:")
                logger.debug("\t - Number of nodes: %s" % (str(cn)))
                logger.debug("\t - Hostnames: %s" % str(cn_names))
                logger.debug("\t - Number of threads: %s" % (str(cu)))
            setup_environment(cn, cn_names, cu)

            # Execute task
            result = execute_task(process_name, storage_conf, current_line[9:],
                                  tracing, logger, logger_cfg,
                                  (job_out, job_err), False, None, cache_queue,
                                  cache_ids, cache_profiler)
            # The ignored variable is timed_out
            exit_value, new_types, new_values, _, except_msg = result

            if exit_value == 0:
                # Task has finished without exceptions
                # endTask jobId exitValue message
                message = build_successful_message(new_types, new_values,
                                                   job_id,
                                                   exit_value)  # noqa: E501
                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))
            elif exit_value == 2:
                # Task has finished with a COMPSs Exception
                # compssExceptionTask jobId exitValue message
                except_msg, message = build_compss_exception_message(
                    except_msg, job_id)  # noqa: E501
                if __debug__:
                    logger.debug(
                        "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" %
                        (str(process_name), str(
                            pipe.output_pipe), str(except_msg)))
            else:
                # An exception other than COMPSsException has been raised
                # within the task
                message = build_exception_message(job_id, exit_value)
                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))

            # The return message is:
            #
            # TaskResult ==> jobId exitValue D List<Object>
            #
            # Where List<Object> has D * 2 length:
            # D = #parameters == #task_parameters +
            #                    (has_target ? 1 : 0) +
            #                    #returns
            # And contains a pair of elements per parameter:
            #     - Parameter new type.
            #     - Parameter new value:
            #         - "null" if it is NOT a PSCO
            #         - PSCOId (String) if is a PSCO
            # Example:
            #     4 null 9 null 12 <pscoid>
            #
            # The order of the elements is: parameters + self + returns
            #
            # This is sent through the pipe with the END_TASK message.
            # If the task had an object or file as parameter and the worker
            # returns the id, the runtime can change the type (and locations)
            # to a EXTERNAL_OBJ_T.

        except Exception as e:
            logger.exception("%s - Exception %s" % (str(process_name), str(e)))
            if queue:
                queue.put("EXCEPTION")

            # Stop the worker process
            return False

        # Clean environment variables
        if __debug__:
            logger.debug("Cleaning environment.")
        clean_environment(binded_cpus, binded_gpus)
        if affinity_event_emit:
            emit_manual_event(0, inside=True, cpu_affinity=True)
            emit_manual_event(0, inside=True, cpu_number=True)
        if binded_gpus:
            emit_manual_event(0, inside=True, gpu_affinity=True)

        # Restore loggers
        if __debug__:
            logger.debug("Restoring loggers.")
            logger.debug("-" * 100)
            # No more logs in job out and err files
        # Restore worker log
        logger.removeHandler(out_file_handler)
        logger.removeHandler(err_file_handler)
        logger.handlers = []
        for handler in logger_handlers:
            logger.addHandler(handler)
        i = 0
        for storage_logger in storage_loggers:
            storage_logger.removeHandler(out_file_handler)
            storage_logger.removeHandler(err_file_handler)
            storage_logger.handlers = []
            for handler in storage_loggers_handlers[i]:
                storage_logger.addHandler(handler)
            i += 1
        if __debug__:
            logger.debug(HEADER + "[%s] Finished task with id: %s" %
                         (str(process_name), str(job_id)))

        # Notify the runtime that the task has finished
        pipe.write(message)

        return True
コード例 #10
0
def executor(queue, process_name, pipe, conf):
    # type: (typing.Union[None, Queue], str, Pipe, typing.Any) -> None
    """Thread main body - Overrides Threading run method.

    Iterates over the input pipe in order to receive tasks (with their
    parameters) and process them.
    Notifies the runtime when each task  has finished with the
    corresponding output value.
    Finishes when the "quit" message is received.

    :param queue: Queue where to put exception messages.
    :param process_name: Process name (Thread-X, where X is the thread id).
    :param pipe: Pipe to receive and send messages from/to the runtime.
    :param conf: configuration of the executor.
    :return: None
    """
    try:
        # Replace Python Worker's SIGTERM handler.
        signal.signal(signal.SIGTERM, shutdown_handler)

        if len(conf.logger.handlers) == 0:
            # Logger has not been inherited correctly. Happens in MacOS.
            set_temporary_directory(conf.tmp_dir, create_tmpdir=False)
            # Reload logger
            conf.logger, conf.logger_cfg, conf.storage_loggers, _ = \
                load_loggers(conf.debug, conf.persistent_storage)
            # Set the binding in worker mode too
            context.set_pycompss_context(context.WORKER)
        logger = conf.logger

        tracing = conf.tracing
        storage_conf = conf.storage_conf
        storage_loggers = conf.storage_loggers

        # Get a copy of the necessary information from the logger to
        # re-establish after each task
        logger_handlers = copy.copy(logger.handlers)
        logger_level = logger.getEffectiveLevel()
        logger_formatter = logging.Formatter(
            logger_handlers[0].formatter._fmt)  # noqa
        storage_loggers_handlers = []
        for storage_logger in storage_loggers:
            storage_loggers_handlers.append(copy.copy(storage_logger.handlers))

        # Establish link with the binding-commons to enable task nesting
        if __debug__:
            logger.debug(HEADER +
                         "Establishing link with runtime in process " +
                         str(process_name))  # noqa: E501
        COMPSs.load_runtime(external_process=False, _logger=logger)
        COMPSs.set_pipes(pipe.output_pipe, pipe.input_pipe)

        if storage_conf != "null":
            try:
                from storage.api import initWorkerPostFork  # noqa
                with event_worker(INIT_WORKER_POSTFORK_EVENT):
                    initWorkerPostFork()
            except (ImportError, AttributeError):
                if __debug__:
                    logger.info(
                        HEADER +
                        "[%s] Could not find initWorkerPostFork storage call. Ignoring it."
                        %  # noqa: E501
                        str(process_name))

        # Start the streaming backend if necessary
        streaming = False
        if conf.stream_backend not in [None, "null", "NONE"]:
            streaming = True

        if streaming:
            # Initialize streaming
            logger.debug(HEADER + "Starting streaming for process " +
                         str(process_name))
            try:
                DistroStreamClientHandler.init_and_start(
                    master_ip=conf.stream_master_ip,
                    master_port=conf.stream_master_port)
            except Exception as e:
                logger.error(e)
                raise e

        # Connect to Shared memory manager
        if conf.cache_queue:
            load_shared_memory_manager()

        # Process properties
        alive = True

        if __debug__:
            logger.debug(HEADER + "[%s] Starting process" % str(process_name))

        # MAIN EXECUTOR LOOP
        while alive:
            # Runtime -> pipe - Read command from pipe
            command = COMPSs.read_pipes()
            if command != "":
                if __debug__:
                    logger.debug(HEADER + "[%s] Received command %s" %
                                 (str(process_name), str(command)))
                # Process the command
                alive = process_message(
                    command, process_name, pipe, queue, tracing, logger,
                    conf.logger_cfg, logger_handlers, logger_level,
                    logger_formatter, storage_conf, storage_loggers,
                    storage_loggers_handlers, conf.cache_queue, conf.cache_ids,
                    conf.cache_profiler)
        # Stop storage
        if storage_conf != "null":
            try:
                from storage.api import finishWorkerPostFork  # noqa
                with event_worker(FINISH_WORKER_POSTFORK_EVENT):
                    finishWorkerPostFork()
            except (ImportError, AttributeError):
                if __debug__:
                    logger.info(
                        HEADER +
                        "[%s] Could not find finishWorkerPostFork storage call. Ignoring it."
                        %  # noqa: E501
                        str(process_name))

        # Stop streaming
        if streaming:
            logger.debug(HEADER + "Stopping streaming for process " +
                         str(process_name))
            DistroStreamClientHandler.set_stop()

        sys.stdout.flush()
        sys.stderr.flush()
        if __debug__:
            logger.debug(HEADER + "[%s] Exiting process " % str(process_name))
        pipe.write(QUIT_TAG)
        pipe.close()
    except Exception as e:
        logger.error(e)
        raise e