Exemple #1
0
def compss_worker(tracing, task_id, storage_conf, params):
    """
    Worker main method (invocated from __main__).

    :param tracing: Tracing boolean
    :param task_id: Task identifier
    :param storage_conf: Storage configuration file
    :param params: Parameters following the common order of the workers
    :return: Exit code
    """

    if __debug__:
        logger = logging.getLogger('pycompss.worker.gat.worker')
        logger.debug("Starting Worker")

    # Set the binding in worker mode
    import pycompss.util.context as context
    context.set_pycompss_context(context.WORKER)

    result = execute_task("Task " + task_id, storage_conf, params, tracing,
                          logger, None)
    exit_code, new_types, new_values, timed_out, except_msg = result

    if __debug__:
        logger.debug("Finishing Worker")

    return exit_code
Exemple #2
0
def compss_worker(tracing, task_id, storage_conf, params, log_json):
    # type: (bool, str, str, list, str) -> int
    """ Worker main method (invoked from __main__).

    :param tracing: Tracing boolean
    :param task_id: Task identifier
    :param storage_conf: Storage configuration file
    :param params: Parameters following the common order of the workers
    :param log_json: Logger configuration file.
    :return: Exit code
    """

    if __debug__:
        logger = logging.getLogger('pycompss.worker.gat.worker')
        logger.debug("Starting Worker")

    # Set the binding in worker mode
    import pycompss.util.context as context
    context.set_pycompss_context(context.WORKER)

    result = execute_task("".join(
        ("Task ", task_id)), storage_conf, params, tracing, logger, log_json,
                          (), False, dict(), None, None)
    # Result contains:
    # exit_code, new_types, new_values, timed_out, except_msg = result
    exit_code, _, _, _, _ = result

    if __debug__:
        logger.debug("Finishing Worker")

    return exit_code
Exemple #3
0
def process_task(
        current_line,  # type: str
        process_name,  # type: str
        logger,  # type: ...
        log_json,  # type: str
        logger_handlers,  # type: ...
        logger_level,  # type: int
        logger_formatter  # type: ...
):
    # type: (...) -> (str, str)
    """ Process command received from the current_line.

    :param current_line: Current command (line) to process.
    :param process_name: Process name for logger messages.
    :param logger: Logger.
    :param log_json: Logger configuration file.
    :param logger_handlers: Logger handlers.
    :param logger_level: Logger level.
    :param logger_formatter: Logger formatter.
    :return: exit_value and message.
    """
    # Process properties
    stdout = sys.stdout
    stderr = sys.stderr
    job_id = None

    if __debug__:
        logger.debug("[PYTHON EXECUTOR] [%s] Received message: %s" %
                     (str(process_name), str(current_line)))

    current_line = current_line.split()
    if current_line[0] == EXECUTE_TASK_TAG:
        num_collection_params = int(current_line[-1])
        if num_collection_params > 0:
            collections_layouts = dict()
            raw_layouts = current_line[((num_collection_params * -4) - 1):-1]
            for i in range(num_collection_params):
                param = raw_layouts[i * 4]
                layout = [
                    int(raw_layouts[(i * 4) + 1]),
                    int(raw_layouts[(i * 4) + 2]),
                    int(raw_layouts[(i * 4) + 3])
                ]
                collections_layouts[param] = layout
        else:
            collections_layouts = None

        # Remove the last elements: cpu and gpu bindings and collection params
        current_line = current_line[0:-3]

        # task jobId command
        job_id = current_line[1]
        job_out = current_line[2]
        job_err = current_line[3]
        # current_line[4] = <boolean> = tracing
        # current_line[5] = <integer> = task id
        # current_line[6] = <boolean> = debug
        # current_line[7] = <string>  = storage conf.
        # current_line[8] = <string>  = operation type (e.g. METHOD)
        # current_line[9] = <string>  = module
        # current_line[10]= <string>  = method
        # current_line[11]= <string>  = time out
        # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes
        # <<list of slave nodes>>
        # current_line[12 + #nodes] = <integer> = computing units
        # current_line[13 + #nodes] = <boolean> = has target
        # current_line[14 + #nodes] = <string>  = has return (always 'null')
        # current_line[15 + #nodes] = <integer> = Number of parameters
        # <<list of parameters>>
        #       !---> type, stream, prefix , value

        if __debug__:
            logger.debug("[PYTHON EXECUTOR] [%s] Received task with id: %s" %
                         (str(process_name), str(job_id)))
            logger.debug("[PYTHON EXECUTOR] [%s] - TASK CMD: %s" %
                         (str(process_name), str(current_line)))

        # Swap logger from stream handler to file handler
        # All task output will be redirected to job.out/err
        for log_handler in logger_handlers:
            logger.removeHandler(log_handler)

        out_file_handler = logging.FileHandler(job_out)
        out_file_handler.setLevel(logger_level)
        out_file_handler.setFormatter(logger_formatter)
        err_file_handler = logging.FileHandler(job_err)
        err_file_handler.setLevel("ERROR")
        err_file_handler.setFormatter(logger_formatter)
        logger.addHandler(out_file_handler)
        logger.addHandler(err_file_handler)

        if __debug__:
            logger.debug("Received task in process: %s" % str(process_name))
            logger.debug(" - TASK CMD: %s" % str(current_line))

        try:
            # Setup out/err wrappers
            out = open(job_out, 'a')
            err = open(job_err, 'a')
            sys.stdout = out
            sys.stderr = err

            # Setup process environment
            cn = int(current_line[11])
            cn_names = ','.join(current_line[12:12 + cn])
            os.environ["COMPSS_NUM_NODES"] = str(cn)
            os.environ["COMPSS_HOSTNAMES"] = cn_names
            if __debug__:
                logger.debug("Process environment:")
                logger.debug("\t - Number of nodes: %s" % (str(cn)))
                logger.debug("\t - Hostnames: %s" % str(cn_names))

            # Execute task
            storage_conf = "null"
            tracing = False
            python_mpi = True
            result = execute_task(process_name, storage_conf, current_line[9:],
                                  tracing, logger, log_json,
                                  (job_out, job_err), python_mpi,
                                  collections_layouts)
            exit_value, new_types, new_values, time_out, except_msg = result

            # Restore out/err wrappers
            sys.stdout = stdout
            sys.stderr = stderr
            sys.stdout.flush()
            sys.stderr.flush()
            out.close()
            err.close()

            # To reduce if necessary:
            # global_exit_value = MPI.COMM_WORLD.reduce(exit_value,
            #                                           op=MPI.SUM,
            #                                           root=0)
            # message = ""

            # if MPI.COMM_WORLD.rank == 0 and global_exit_value == 0:
            if exit_value == 0:
                # Task has finished without exceptions
                # endTask jobId exitValue message
                params = build_return_params_message(new_types, new_values)
                message = " ".join((END_TASK_TAG, str(job_id), str(exit_value),
                                    str(params) + "\n"))
            elif exit_value == 2:
                # Task has finished with a COMPSs Exception
                # compssExceptionTask jobId exitValue message
                except_msg = except_msg.replace(" ", "_")
                message = " ".join((COMPSS_EXCEPTION_TAG, str(job_id),
                                    str(except_msg) + "\n"))
                if __debug__:
                    logger.debug("%s - COMPSS EXCEPTION TASK MESSAGE: %s" %
                                 (str(process_name), str(except_msg)))
            else:
                # elif MPI.COMM_WORLD.rank == 0 and global_exit_value != 0:
                # An exception has been raised in task
                message = " ".join(
                    (END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

            if __debug__:
                logger.debug("%s - END TASK MESSAGE: %s" %
                             (str(process_name), str(message)))
            # The return message is:
            #
            # TaskResult ==> jobId exitValue D List<Object>
            #
            # Where List<Object> has D * 2 length:
            # D = #parameters == #task_parameters +
            #                    (has_target ? 1 : 0) +
            #                    #returns
            # And contains a pair of elements per parameter:
            #     - Parameter new type.
            #     - Parameter new value:
            #         - 'null' if it is NOT a PSCO
            #         - PSCOId (String) if is a PSCO
            # Example:
            #     4 null 9 null 12 <pscoid>
            #
            # The order of the elements is: parameters + self + returns
            #
            # This is sent through the pipe with the END_TASK message.
            # If the task had an object or file as parameter and the worker
            # returns the id, the runtime can change the type (and locations)
            # to a EXTERNAL_OBJ_T.

        except Exception as e:
            logger.exception("%s - Exception %s" % (str(process_name), str(e)))
            exit_value = 7
            message = " ".join(
                (END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

        # Clean environment variables
        if __debug__:
            logger.debug("Cleaning environment.")

        del os.environ['COMPSS_HOSTNAMES']

        # Restore loggers
        if __debug__:
            logger.debug("Restoring loggers.")
        logger.removeHandler(out_file_handler)
        logger.removeHandler(err_file_handler)
        for handler in logger_handlers:
            logger.addHandler(handler)

        if __debug__:
            logger.debug("[PYTHON EXECUTOR] [%s] Finished task with id: %s" %
                         (str(process_name), str(job_id)))
        # return SUCCESS_SIG,
        #        "{0} -- Task Ended Successfully!".format(str(process_name))

    else:
        if __debug__:
            logger.debug("[PYTHON EXECUTOR] [%s] Unexpected message: %s" %
                         (str(process_name), str(current_line)))
        exit_value = 7
        message = " ".join((END_TASK_TAG, str(job_id), str(exit_value) + "\n"))

    return exit_value, message
Exemple #4
0
def process_task(current_line, process_name, pipe, queue, tracing, logger,
                 logger_handlers, logger_level, logger_formatter, storage_conf,
                 storage_loggers, storage_loggers_handlers):
    """
    Process command received from the runtime through a pipe.

    :param current_line: Current command (line) to process
    :param process_name: Process name for logger messages
    :param pipe: Pipe where to write the result
    :param queue: Queue where to drop the process exceptions
    :param tracing: Tracing
    :param logger: Logger
    :param logger_handlers: Logger handlers
    :param logger_level: Logger level
    :param logger_formatter: Logger formatter
    :param storage_conf: Storage configuration
    :param storage_loggers: Storage loggers
    :param storage_loggers_handlers: Storage loggers handlers
    :return: <Boolean> True if processed successfully, False otherwise.
    """
    stdout = sys.stdout
    stderr = sys.stderr

    affinity_ok = True

    if __debug__:
        logger.debug(HEADER + "[%s] Received message: %s" %
                     (str(process_name), str(current_line)))
    current_line = current_line.split()
    if current_line[0] == EXECUTE_TASK_TAG:
        # CPU binding
        cpus = current_line[-3]
        if cpus != "-":
            affinity_ok = bind_cpus(cpus, process_name, logger)

        # GPU binding
        gpus = current_line[-2]
        if gpus != "-":
            bind_gpus(gpus, process_name, logger)

        # Remove the last elements: cpu and gpu bindings
        current_line = current_line[0:-3]

        # task jobId command
        job_id = current_line[1]
        job_out = current_line[2]
        job_err = current_line[3]
        # current_line[4] = <boolean> = tracing
        # current_line[5] = <integer> = task id
        # current_line[6] = <boolean> = debug
        # current_line[7] = <string>  = storage conf.
        # current_line[8] = <string>  = operation type (e.g. METHOD)
        # current_line[9] = <string>  = module
        # current_line[10]= <string>  = method
        # current_line[11]= <string>  = time out
        # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes
        # <<list of slave nodes>>
        # current_line[12 + #nodes] = <integer> = computing units
        # current_line[13 + #nodes] = <boolean> = has target
        # current_line[14 + #nodes] = <string>  = has return (always 'null')
        # current_line[15 + #nodes] = <integer> = Number of parameters
        # <<list of parameters>>
        #       !---> type, stream, prefix , value

        if __debug__:
            logger.debug(HEADER + "[%s] Received task with id: %s" %
                         (str(process_name), str(job_id)))
            logger.debug(HEADER + "[%s] - TASK CMD: %s" %
                         (str(process_name), str(current_line)))

        # Swap logger from stream handler to file handler
        # All task output will be redirected to job.out/err
        for log_handler in logger_handlers:
            logger.removeHandler(log_handler)
        for storage_logger in storage_loggers:
            for log_handler in storage_logger.handlers:
                storage_logger.removeHandler(log_handler)
        out_file_handler = logging.FileHandler(job_out)
        out_file_handler.setLevel(logger_level)
        out_file_handler.setFormatter(logger_formatter)
        err_file_handler = logging.FileHandler(job_err)
        err_file_handler.setLevel("ERROR")
        err_file_handler.setFormatter(logger_formatter)
        logger.addHandler(out_file_handler)
        logger.addHandler(err_file_handler)
        for storage_logger in storage_loggers:
            storage_logger.addHandler(out_file_handler)
            storage_logger.addHandler(err_file_handler)

        if __debug__:
            logger.debug("Received task in process: %s" % str(process_name))
            logger.debug(" - TASK CMD: %s" % str(current_line))

        try:
            # Setup out/err wrappers
            out = open(job_out, 'a')
            err = open(job_err, 'a')
            sys.stdout = out
            sys.stderr = err

            # Check thread affinity
            if not affinity_ok:
                err.write(
                    "WARNING: This task is going to be executed with default thread affinity %s"
                    %  # noqa: E501
                    thread_affinity.getaffinity())

            # Setup process environment
            cn = int(current_line[12])
            cn_names = ','.join(current_line[13:13 + cn])
            cu = current_line[13 + cn]
            os.environ["COMPSS_NUM_NODES"] = str(cn)
            os.environ["COMPSS_HOSTNAMES"] = cn_names
            os.environ["COMPSS_NUM_THREADS"] = cu
            os.environ["OMP_NUM_THREADS"] = cu
            if __debug__:
                logger.debug("Process environment:")
                logger.debug("\t - Number of nodes: %s" % (str(cn)))
                logger.debug("\t - Hostnames: %s" % str(cn_names))
                logger.debug("\t - Number of threads: %s" % (str(cu)))

            # Execute task
            from pycompss.worker.commons.worker import execute_task
            result = execute_task(process_name, storage_conf, current_line[9:],
                                  tracing, logger)
            exit_value = result[0]
            new_types = result[1]
            new_values = result[2]
            timed_out = result[3]
            except_msg = result[4]

            # Restore out/err wrappers
            sys.stdout = stdout
            sys.stderr = stderr
            sys.stdout.flush()
            sys.stderr.flush()
            out.close()
            err.close()

            if exit_value == 0:
                # Task has finished without exceptions
                # endTask jobId exitValue message
                params = build_return_params_message(new_types, new_values)
                message = END_TASK_TAG + " " + str(job_id)
                message += " " + str(exit_value) + " " + str(params) + "\n"
                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))
            elif exit_value == 2:
                # Task has finished with a COMPSs Exception
                # compssExceptionTask jobId exitValue message

                except_msg = except_msg.replace(" ", "_")
                message = COMPSS_EXCEPTION_TAG + " " + str(job_id)
                message += " " + str(except_msg) + "\n"
                if __debug__:
                    logger.debug(
                        "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" %
                        (str(process_name), str(
                            pipe.output_pipe), str(except_msg)))
            else:
                # An exception other than COMPSsException has been raised
                # within the task
                message = END_TASK_TAG + " " + str(job_id)
                message += " " + str(exit_value) + "\n"

                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))

            # The return message is:
            #
            # TaskResult ==> jobId exitValue D List<Object>
            #
            # Where List<Object> has D * 2 length:
            # D = #parameters == #task_parameters +
            #                    (has_target ? 1 : 0) +
            #                    #returns
            # And contains a pair of elements per parameter:
            #     - Parameter new type.
            #     - Parameter new value:
            #         - 'null' if it is NOT a PSCO
            #         - PSCOId (String) if is a PSCO
            # Example:
            #     4 null 9 null 12 <pscoid>
            #
            # The order of the elements is: parameters + self + returns
            #
            # This is sent through the pipe with the END_TASK message.
            # If the task had an object or file as parameter and the worker
            # returns the id, the runtime can change the type (and locations)
            # to a EXTERNAL_OBJ_T.
            pipe.write(message)

        except Exception as e:
            logger.exception("%s - Exception %s" % (str(process_name), str(e)))
            if queue:
                queue.put("EXCEPTION")

        # Clean environment variables
        if __debug__:
            logger.debug("Cleaning environment.")
        if cpus != "-":
            del os.environ['COMPSS_BINDED_CPUS']
        if gpus != "-":
            del os.environ['COMPSS_BINDED_GPUS']
            del os.environ['CUDA_VISIBLE_DEVICES']
            del os.environ['GPU_DEVICE_ORDINAL']
        del os.environ['COMPSS_HOSTNAMES']

        # Restore loggers
        if __debug__:
            logger.debug("Restoring loggers.")
        logger.removeHandler(out_file_handler)
        logger.removeHandler(err_file_handler)
        for handler in logger_handlers:
            logger.addHandler(handler)
        i = 0
        for storage_logger in storage_loggers:
            storage_logger.removeHandler(out_file_handler)
            storage_logger.removeHandler(err_file_handler)
            for handler in storage_loggers_handlers[i]:
                storage_logger.addHandler(handler)
            i += 1
        if __debug__:
            logger.debug(HEADER + "[%s] Finished task with id: %s" %
                         (str(process_name), str(job_id)))

    elif current_line[0] == PING_TAG:
        pipe.write(PONG_TAG)

    elif current_line[0] == QUIT_TAG:
        # Received quit message -> Suicide
        if __debug__:
            logger.debug(HEADER + "[%s] Received quit." % str(process_name))
        return False

    else:
        if __debug__:
            logger.debug(HEADER + "[%s] Unexpected message: %s" %
                         (str(process_name), str(current_line)))
        raise Exception("Unexpected message: %s" % str(current_line))
    return True
Exemple #5
0
def main():
    # type: (...) -> int
    """ Main method to process the task execution.

    :return: Exit value
    """

    # Log initialisation
    if __debug__:
        LOGGER.debug("Initialising Python worker inside the container...")

    # Parse arguments
    if __debug__:
        LOGGER.debug("Parsing Python function and arguments...")

    # TODO: Enhance the received parameters from ContainerInvoker.java
    func_file_path = str(sys.argv[1])
    func_name = str(sys.argv[2])
    num_slaves = 0
    timeout = 0
    cus = 1
    has_target = str(sys.argv[3]).lower() == "true"
    return_type = str(sys.argv[4])
    return_length = int(sys.argv[5])
    num_params = int(sys.argv[6])
    func_params = sys.argv[7:]

    execute_task_params = [
        func_file_path, func_name, num_slaves, timeout, cus, has_target,
        return_type, return_length, num_params
    ] + func_params

    if __debug__:
        LOGGER.debug("- File: " + str(func_file_path))
        LOGGER.debug("- Function: " + str(func_name))
        LOGGER.debug("- HasTarget: " + str(has_target))
        LOGGER.debug("- ReturnType: " + str(return_type))
        LOGGER.debug("- Num Returns: " + str(return_length))
        LOGGER.debug("- Num Parameters: " + str(num_params))
        LOGGER.debug("- Parameters: " + str(func_params))
        LOGGER.debug("DONE Parsing Python function and arguments")

    # Process task
    if __debug__:
        LOGGER.debug("Processing task...")

    process_name = "ContainerInvoker"
    storage_conf = "null"
    tracing = False
    log_files = None
    python_mpi = False
    collections_layouts = None
    context.set_pycompss_context(context.WORKER)
    result = execute_task(
        process_name,
        storage_conf,
        execute_task_params,
        tracing,
        LOGGER,
        None,
        log_files,  # noqa
        python_mpi,
        collections_layouts  # noqa
    )
    # The ignored result is time out
    exit_value, new_types, new_values, _, except_msg = result

    if __debug__:
        LOGGER.debug("DONE Processing task")

    # Process results
    if __debug__:
        LOGGER.debug("Processing results...")
        LOGGER.debug("Task exit value = " + str(exit_value))

    if exit_value == 0:
        # Task has finished without exceptions
        if __debug__:
            LOGGER.debug("Building return parameters...")
            LOGGER.debug("New Types: " + str(new_types))
            LOGGER.debug("New Values: " + str(new_values))
        build_return_params_message(new_types, new_values)
        if __debug__:
            LOGGER.debug("DONE Building return parameters")
    elif exit_value == 2:
        # Task has finished with a COMPSs Exception
        if __debug__:
            except_msg = except_msg.replace(" ", "_")
            LOGGER.debug("Registered COMPSs Exception: %s" % str(except_msg))
    else:
        # An exception has been raised in task
        if __debug__:
            except_msg = except_msg.replace(" ", "_")
            LOGGER.debug("Registered Exception in task execution %s" %
                         str(except_msg))

    # Return
    if exit_value != 0:
        LOGGER.debug(
            "ERROR: Task execution finished with non-zero exit value (%s != 0)"
            % str(exit_value))  # noqa: E501
    else:
        LOGGER.debug("Task execution finished SUCCESSFULLY!")
    return exit_value
Exemple #6
0
def process_task(
        current_line,  # type: list
        process_name,  # type: str
        pipe,  # type: Pipe
        queue,  # type: ...
        tracing,  # type: bool
        logger,  # type: ...
        logger_handlers,  # type: list
        logger_level,  # type: int
        logger_formatter,  # type: ...
        storage_conf,  # type: str
        storage_loggers,  # type: list
        storage_loggers_handlers  # type: list
):
    # type: (...) -> bool
    """ Process command received from the runtime through a pipe.

    :param current_line: Current command (line) to process.
    :param process_name: Process name for logger messages.
    :param pipe: Pipe where to write the result.
    :param queue: Queue where to drop the process exceptions.
    :param tracing: Tracing.
    :param logger: Logger.
    :param logger_handlers: Logger handlers.
    :param logger_level: Logger level.
    :param logger_formatter: Logger formatter.
    :param storage_conf: Storage configuration.
    :param storage_loggers: Storage loggers.
    :param storage_loggers_handlers: Storage loggers handlers.
    :return: True if processed successfully, False otherwise.
    """
    affinity_ok = True

    # CPU binding
    cpus = current_line[-3]
    if cpus != "-" and THREAD_AFFINITY:
        affinity_ok = bind_cpus(cpus, process_name, logger)

    # GPU binding
    gpus = current_line[-2]
    if gpus != "-":
        bind_gpus(gpus, process_name, logger)

    # Remove the last elements: cpu and gpu bindings
    current_line = current_line[0:-3]

    # task jobId command
    job_id, job_out, job_err = current_line[1:4]  # 4th is not taken
    # current_line[4] = <boolean> = tracing
    # current_line[5] = <integer> = task id
    # current_line[6] = <boolean> = debug
    # current_line[7] = <string>  = storage conf.
    # current_line[8] = <string>  = operation type (e.g. METHOD)
    # current_line[9] = <string>  = module
    # current_line[10]= <string>  = method
    # current_line[11]= <string>  = time out
    # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes
    # <<list of slave nodes>>
    # current_line[12 + #nodes] = <integer> = computing units
    # current_line[13 + #nodes] = <boolean> = has target
    # current_line[14 + #nodes] = <string>  = has return (always 'null')
    # current_line[15 + #nodes] = <integer> = Number of parameters
    # <<list of parameters>>
    #       !---> type, stream, prefix , value

    if __debug__:
        logger.debug(HEADER + "[%s] Received task with id: %s" %
                     (str(process_name), str(job_id)))
        logger.debug(HEADER + "[%s] - TASK CMD: %s" %
                     (str(process_name), str(current_line)))

    # Swap logger from stream handler to file handler
    # All task output will be redirected to job.out/err
    for log_handler in logger_handlers:
        logger.removeHandler(log_handler)
    for storage_logger in storage_loggers:
        for log_handler in storage_logger.handlers:
            storage_logger.removeHandler(log_handler)
    out_file_handler = logging.FileHandler(job_out)
    out_file_handler.setLevel(logger_level)
    out_file_handler.setFormatter(logger_formatter)
    err_file_handler = logging.FileHandler(job_err)
    err_file_handler.setLevel("ERROR")
    err_file_handler.setFormatter(logger_formatter)
    logger.addHandler(out_file_handler)
    logger.addHandler(err_file_handler)
    for storage_logger in storage_loggers:
        storage_logger.addHandler(out_file_handler)
        storage_logger.addHandler(err_file_handler)

    if __debug__:
        logger.debug("Received task in process: %s" % str(process_name))
        logger.debug(" - TASK CMD: %s" % str(current_line))

    try:
        # Check thread affinity
        if not affinity_ok and THREAD_AFFINITY:
            logger.warning(
                "This task is going to be executed with default thread affinity %s"
                %  # noqa: E501
                thread_affinity.getaffinity())

        # Setup process environment
        cn = int(current_line[12])
        cn_names = ','.join(current_line[13:13 + cn])
        cu = current_line[13 + cn]
        if __debug__:
            logger.debug("Process environment:")
            logger.debug("\t - Number of nodes: %s" % (str(cn)))
            logger.debug("\t - Hostnames: %s" % str(cn_names))
            logger.debug("\t - Number of threads: %s" % (str(cu)))
        setup_environment(cn, cn_names, cu)

        # Execute task
        result = execute_task(process_name, storage_conf, current_line[9:],
                              tracing, logger, (job_out, job_err))
        exit_value, new_types, new_values, timed_out, except_msg = result

        if exit_value == 0:
            # Task has finished without exceptions
            # endTask jobId exitValue message
            message = build_successful_message(new_types, new_values, job_id,
                                               exit_value)  # noqa: E501
            if __debug__:
                logger.debug(
                    "%s - Pipe %s END TASK MESSAGE: %s" %
                    (str(process_name), str(pipe.output_pipe), str(message)))
        elif exit_value == 2:
            # Task has finished with a COMPSs Exception
            # compssExceptionTask jobId exitValue message
            except_msg, message = build_compss_exception_message(
                except_msg, job_id)  # noqa: E501
            if __debug__:
                logger.debug("%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" %
                             (str(process_name), str(
                                 pipe.output_pipe), str(except_msg)))
        else:
            # An exception other than COMPSsException has been raised
            # within the task
            message = build_exception_message(job_id, exit_value)
            if __debug__:
                logger.debug(
                    "%s - Pipe %s END TASK MESSAGE: %s" %
                    (str(process_name), str(pipe.output_pipe), str(message)))

        # The return message is:
        #
        # TaskResult ==> jobId exitValue D List<Object>
        #
        # Where List<Object> has D * 2 length:
        # D = #parameters == #task_parameters +
        #                    (has_target ? 1 : 0) +
        #                    #returns
        # And contains a pair of elements per parameter:
        #     - Parameter new type.
        #     - Parameter new value:
        #         - 'null' if it is NOT a PSCO
        #         - PSCOId (String) if is a PSCO
        # Example:
        #     4 null 9 null 12 <pscoid>
        #
        # The order of the elements is: parameters + self + returns
        #
        # This is sent through the pipe with the END_TASK message.
        # If the task had an object or file as parameter and the worker
        # returns the id, the runtime can change the type (and locations)
        # to a EXTERNAL_OBJ_T.

    except Exception as e:
        logger.exception("%s - Exception %s" % (str(process_name), str(e)))
        if queue:
            queue.put("EXCEPTION")

        # Stop the worker process
        return False

    # Clean environment variables
    if __debug__:
        logger.debug("Cleaning environment.")
    clean_environment(cpus, gpus)

    # Restore loggers
    if __debug__:
        logger.debug("Restoring loggers.")
    logger.removeHandler(out_file_handler)
    logger.removeHandler(err_file_handler)
    for handler in logger_handlers:
        logger.addHandler(handler)
    i = 0
    for storage_logger in storage_loggers:
        storage_logger.removeHandler(out_file_handler)
        storage_logger.removeHandler(err_file_handler)
        for handler in storage_loggers_handlers[i]:
            storage_logger.addHandler(handler)
        i += 1
    if __debug__:
        logger.debug(HEADER + "[%s] Finished task with id: %s" %
                     (str(process_name), str(job_id)))

    # Notify the runtime that the task has finished
    pipe.write(message)

    return True
Exemple #7
0
def main():
    # type: () -> int
    """ Main method to process the task execution.

    :return: Exit value
    """

    # Parse arguments
    # TODO: Enhance the received parameters from ContainerInvoker.java
    func_file_path = str(sys.argv[1])
    func_name = str(sys.argv[2])
    num_slaves = 0
    timeout = 0
    cus = 1
    log_level = sys.argv[3]
    tracing = sys.argv[4] == 'true'
    has_target = str(sys.argv[5]).lower() == "true"
    return_type = str(sys.argv[6])
    return_length = int(sys.argv[7])
    num_params = int(sys.argv[8])
    func_params = sys.argv[9:]

    # Log initialisation
    # Load log level configuration file
    worker_path = os.path.dirname(os.path.realpath(__file__))
    if log_level == "true" or log_level == "debug":
        # Debug
        log_json = "".join((worker_path,
                            "/log/logging_container_worker_debug.json"))
    elif log_level == "info" or log_level == "off":
        # Info or no debug
        log_json = "".join((worker_path,
                            "/log/logging_container_worker_off.json"))
    else:
        # Default
        log_json = "".join((worker_path,
                            "/log/logging_container_worker.json"))
    init_logging_worker(log_json, tracing)
    if __debug__:
        logger = logging.getLogger('pycompss.worker.container.container_worker')  # noqa: E501
        logger.debug("Initialising Python worker inside the container...")

    task_params = [func_file_path, func_name, num_slaves,
                   timeout, cus, has_target, return_type,
                   return_length, num_params]  # type: typing.List[typing.Any]
    execute_task_params = task_params + func_params

    if __debug__:
        logger.debug("- File: " + str(func_file_path))
        logger.debug("- Function: " + str(func_name))
        logger.debug("- HasTarget: " + str(has_target))
        logger.debug("- ReturnType: " + str(return_type))
        logger.debug("- Num Returns: " + str(return_length))
        logger.debug("- Num Parameters: " + str(num_params))
        logger.debug("- Parameters: " + str(func_params))
        logger.debug("DONE Parsing Python function and arguments")

    # Process task
    if __debug__:
        logger.debug("Processing task...")

    process_name = "ContainerInvoker"
    storage_conf = "null"
    tracing = False
    log_files = ()
    python_mpi = False
    collections_layouts = None  # type: typing.Optional[dict]
    context.set_pycompss_context(context.WORKER)
    result = execute_task(process_name,
                          storage_conf,
                          execute_task_params,
                          tracing,
                          logger,
                          "None",
                          log_files,           # noqa
                          python_mpi,
                          collections_layouts  # noqa
                          )
    # The ignored result is time out
    exit_value, new_types, new_values, _, except_msg = result

    if __debug__:
        logger.debug("DONE Processing task")

    # Process results
    if __debug__:
        logger.debug("Processing results...")
        logger.debug("Task exit value = " + str(exit_value))

    if exit_value == 0:
        # Task has finished without exceptions
        if __debug__:
            logger.debug("Building return parameters...")
            logger.debug("New Types: " + str(new_types))
            logger.debug("New Values: " + str(new_values))
        build_return_params_message(new_types, new_values)
        if __debug__:
            logger.debug("DONE Building return parameters")
    elif exit_value == 2:
        # Task has finished with a COMPSs Exception
        if __debug__:
            except_msg = except_msg.replace(" ", "_")
            logger.debug("Registered COMPSs Exception: %s" %
                         str(except_msg))
    else:
        # An exception has been raised in task
        if __debug__:
            except_msg = except_msg.replace(" ", "_")
            logger.debug("Registered Exception in task execution %s" %
                         str(except_msg))

    # Return
    if exit_value != 0:
        logger.debug("ERROR: Task execution finished with non-zero exit value (%s != 0)" % str(exit_value))  # noqa: E501
    else:
        logger.debug("Task execution finished SUCCESSFULLY!")
    return exit_value
Exemple #8
0
def process_task(
        current_line,  # type: list
        process_name,  # type: str
        pipe,  # type: Pipe
        queue,  # type: typing.Optional[Queue]
        tracing,  # type: bool
        logger,  # type: typing.Any
        logger_cfg,  # type: str
        logger_handlers,  # type: list
        logger_level,  # type: int
        logger_formatter,  # type: typing.Any
        storage_conf,  # type: str
        storage_loggers,  # type: list
        storage_loggers_handlers,  # type: list
        cache_queue,  # type: typing.Optional[Queue]
        cache_ids,  # type: typing.Any
        cache_profiler,  # type: bool
):  # type: (...) -> bool
    """ Process command received from the runtime through a pipe.

    :param current_line: Current command (line) to process.
    :param process_name: Process name for logger messages.
    :param pipe: Pipe where to write the result.
    :param queue: Queue where to drop the process exceptions.
    :param tracing: Tracing.
    :param logger: Logger.
    :param logger_cfg: Logger configuration file
    :param logger_handlers: Logger handlers.
    :param logger_level: Logger level.
    :param logger_formatter: Logger formatter.
    :param storage_conf: Storage configuration.
    :param storage_loggers: Storage loggers.
    :param storage_loggers_handlers: Storage loggers handlers.
    :param cache_queue: Cache tracker communication queue.
    :param cache_ids: Cache proxy dictionary (read-only).
    :param cache_profiler: Cache profiler
    :return: True if processed successfully, False otherwise.
    """
    with event_worker(PROCESS_TASK_EVENT):
        affinity_event_emit = False
        binded_cpus = False
        binded_gpus = False

        # CPU binding
        cpus = current_line[-3]
        if cpus != "-" and THREAD_AFFINITY:
            # The cpu affinity event is already emitted in Java.
            # Instead of emitting what we receive, we are emitting what whe check
            # after setting the affinity.
            binded_cpus = bind_cpus(cpus, process_name, logger)

        # GPU binding
        gpus = current_line[-2]
        if gpus != "-":
            emit_manual_event(int(gpus) + 1, inside=True, gpu_affinity=True)
            bind_gpus(gpus, process_name, logger)
            binded_gpus = True

        # Remove the last elements: cpu and gpu bindings
        current_line = current_line[0:-3]

        # task jobId command
        job_id, job_out, job_err = current_line[1:4]  # 4th is not taken
        # current_line[4] = <boolean> = tracing
        # current_line[5] = <integer> = task id
        # current_line[6] = <boolean> = debug
        # current_line[7] = <string>  = storage conf.
        # current_line[8] = <string>  = operation type (e.g. METHOD)
        # current_line[9] = <string>  = module
        # current_line[10]= <string>  = method
        # current_line[11]= <string>  = time out
        # current_line[12]= <integer> = Number of slaves (worker nodes)==#nodes
        # <<list of slave nodes>>
        # current_line[12 + #nodes] = <integer> = computing units
        # current_line[13 + #nodes] = <boolean> = has target
        # current_line[14 + #nodes] = <string>  = has return (always "null")
        # current_line[15 + #nodes] = <integer> = Number of parameters
        # <<list of parameters>>
        #       !---> type, stream, prefix , value

        if __debug__:
            logger.debug(HEADER + "[%s] Received task with id: %s" %
                         (str(process_name), str(job_id)))
            logger.debug(HEADER + "[%s] - TASK CMD: %s" %
                         (str(process_name), str(current_line)))

        # Swap logger from stream handler to file handler
        # All task output will be redirected to job.out/err
        for log_handler in logger_handlers:
            logger.removeHandler(log_handler)
        for storage_logger in storage_loggers:
            for log_handler in storage_logger.handlers:
                storage_logger.removeHandler(log_handler)
        out_file_handler = logging.FileHandler(job_out)
        out_file_handler.setLevel(logger_level)
        out_file_handler.setFormatter(logger_formatter)
        err_file_handler = logging.FileHandler(job_err)
        err_file_handler.setLevel("ERROR")
        err_file_handler.setFormatter(logger_formatter)
        logger.addHandler(out_file_handler)
        logger.addHandler(err_file_handler)
        for storage_logger in storage_loggers:
            storage_logger.addHandler(out_file_handler)
            storage_logger.addHandler(err_file_handler)

        if __debug__:
            # From now onwards the log is in the job out and err files
            logger.debug("-" * 100)
            logger.debug("Received task in process: %s" % str(process_name))
            logger.debug("TASK CMD: %s" % str(current_line))

        try:
            # Check thread affinity
            if THREAD_AFFINITY:
                # The cpu affinity can be long if multiple cores have been
                # assigned. To avoid issues, we get just the first id.
                real_affinity = thread_affinity.getaffinity()
                cpus = str(real_affinity[0])
                num_cpus = len(real_affinity)
                emit_manual_event(int(cpus) + 1,
                                  inside=True,
                                  cpu_affinity=True)
                emit_manual_event(int(num_cpus), inside=True, cpu_number=True)
                affinity_event_emit = True
                if not binded_cpus:
                    logger.warning(
                        "This task is going to be executed with default thread affinity %s"
                        %  # noqa: E501
                        str(real_affinity))

            # Setup process environment
            cn = int(current_line[12])
            cn_names = ",".join(current_line[13:13 + cn])
            cu = current_line[13 + cn]
            if __debug__:
                logger.debug("Process environment:")
                logger.debug("\t - Number of nodes: %s" % (str(cn)))
                logger.debug("\t - Hostnames: %s" % str(cn_names))
                logger.debug("\t - Number of threads: %s" % (str(cu)))
            setup_environment(cn, cn_names, cu)

            # Execute task
            result = execute_task(process_name, storage_conf, current_line[9:],
                                  tracing, logger, logger_cfg,
                                  (job_out, job_err), False, None, cache_queue,
                                  cache_ids, cache_profiler)
            # The ignored variable is timed_out
            exit_value, new_types, new_values, _, except_msg = result

            if exit_value == 0:
                # Task has finished without exceptions
                # endTask jobId exitValue message
                message = build_successful_message(new_types, new_values,
                                                   job_id,
                                                   exit_value)  # noqa: E501
                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))
            elif exit_value == 2:
                # Task has finished with a COMPSs Exception
                # compssExceptionTask jobId exitValue message
                except_msg, message = build_compss_exception_message(
                    except_msg, job_id)  # noqa: E501
                if __debug__:
                    logger.debug(
                        "%s - Pipe %s COMPSS EXCEPTION TASK MESSAGE: %s" %
                        (str(process_name), str(
                            pipe.output_pipe), str(except_msg)))
            else:
                # An exception other than COMPSsException has been raised
                # within the task
                message = build_exception_message(job_id, exit_value)
                if __debug__:
                    logger.debug("%s - Pipe %s END TASK MESSAGE: %s" %
                                 (str(process_name), str(
                                     pipe.output_pipe), str(message)))

            # The return message is:
            #
            # TaskResult ==> jobId exitValue D List<Object>
            #
            # Where List<Object> has D * 2 length:
            # D = #parameters == #task_parameters +
            #                    (has_target ? 1 : 0) +
            #                    #returns
            # And contains a pair of elements per parameter:
            #     - Parameter new type.
            #     - Parameter new value:
            #         - "null" if it is NOT a PSCO
            #         - PSCOId (String) if is a PSCO
            # Example:
            #     4 null 9 null 12 <pscoid>
            #
            # The order of the elements is: parameters + self + returns
            #
            # This is sent through the pipe with the END_TASK message.
            # If the task had an object or file as parameter and the worker
            # returns the id, the runtime can change the type (and locations)
            # to a EXTERNAL_OBJ_T.

        except Exception as e:
            logger.exception("%s - Exception %s" % (str(process_name), str(e)))
            if queue:
                queue.put("EXCEPTION")

            # Stop the worker process
            return False

        # Clean environment variables
        if __debug__:
            logger.debug("Cleaning environment.")
        clean_environment(binded_cpus, binded_gpus)
        if affinity_event_emit:
            emit_manual_event(0, inside=True, cpu_affinity=True)
            emit_manual_event(0, inside=True, cpu_number=True)
        if binded_gpus:
            emit_manual_event(0, inside=True, gpu_affinity=True)

        # Restore loggers
        if __debug__:
            logger.debug("Restoring loggers.")
            logger.debug("-" * 100)
            # No more logs in job out and err files
        # Restore worker log
        logger.removeHandler(out_file_handler)
        logger.removeHandler(err_file_handler)
        logger.handlers = []
        for handler in logger_handlers:
            logger.addHandler(handler)
        i = 0
        for storage_logger in storage_loggers:
            storage_logger.removeHandler(out_file_handler)
            storage_logger.removeHandler(err_file_handler)
            storage_logger.handlers = []
            for handler in storage_loggers_handlers[i]:
                storage_logger.addHandler(handler)
            i += 1
        if __debug__:
            logger.debug(HEADER + "[%s] Finished task with id: %s" %
                         (str(process_name), str(job_id)))

        # Notify the runtime that the task has finished
        pipe.write(message)

        return True